function RESULT = dlmreadbar(filename,msg)
%DLMREADBAR Read ASCII delimited file.
% RESULT= DLMREADBAR(FILENAME,MESSAGE) reads numeric data from the ASCII
% delimited file FILENAME showing the progress through a progress bar. The
% delimiter in the ASCII file is guessed by DLMREADBAR. The MESSAGE will
% be shown in the progress bar, together with the estimated time left (in
% seconds). The result is returned in RESULT.
%
% RESULT= DLMREADBAR(FILENAME) shows a standard MESSAGE field where the
% FILENAME and the estimated time left will appear.
%
% The function is suitable for huge ASCII files. A CANCEL button is
% available to interrupt the loading process. In that case an empty
% RESULT is returned
%
% Example:
% X = rand(390625,32); % 100M variable !!!
% save example.txt X -ASCII
% Y = dlmreadbar('example.txt');
%
% The function DLMREADBAR is (estimated) 15% slower than load, but
% provide a method to interrupt the execution and to note the progress of
% loading. It is useless for small files.
% Copyright 2004 Stefano Gianoli, ETH Zurich
% gianoli@chem.ethz.ch
% $Revision: 5.0 $ $Date: 2004/06/15 12:06:46 $
%
% v4: improved compatibility: read files generated with
% save('filename','var1',...,'-ASCII')
% v5: bugfix for NxM data where N<<M.
% if no arguments display the help
if nargin == 0
help(mfilename)
return
end
% the allowed argument are two
error(nargchk(1,2,nargin))
% Get Filename
if ~isstr(filename), error('Filename must be a string.'); end
% check that the file exists
if ~isequal(exist(filename,'file'), 2), error(['Unable to find file: ' filename]), end
% if one argument is provided, show a standard message
if nargin == 1, msg = GetStdMsg(filename); end
% create waitbar
h = waitbar(0,'Calculating the size of the file','CreateCancelBtn',@dlmreadbar_cancel_Callback,'Interruptible','on','BusyAction','cancel');
% open the file
fid = fopen(filename,'r');
% Make sure it can be opened, otherwise close the waitbar and stop
if fid == -1, close(h), error('Unable to open file'), end
% get the number of iteration necessary to read all the file: WARNING this
% line must be before the determination of the size of RESULT, since the
% number of iteration is needed
SetIterNum(filename);
% determine the size of the RESULT matrix and initialize it to zero
SizeRESULT = GetSizeResult(fid,h);
if length(SizeRESULT) == 2
RESULT = zeros(SizeRESULT);
nIter = GetIterNum;
else
close(h)
error('Invalid size for RESULT: check BUFFERSIZE or End Of Line delimiters')
end
close(h)
h = waitbar(0,msg,'CreateCancelBtn',@dlmreadbar_cancel_Callback,'Interruptible','on','BusyAction','cancel');
hAxes = findobj(h,'type','axes');
hxLabel = get(hAxes,'xlabel');
% save the cpu time at the beginning reading iterations
a = cputime;
tmp =[];
waitbar(0,h,msg);
for iIter=1:nIter
% update the waitbar
try
waitbar(iIter/nIter,h);
set(hxLabel,'string',[int2str((nIter/iIter-1)*(cputime-a)) ' seconds left']);
catch
% if the wait bar have been cancelled, interrupt the iterations
if ~ishandle(h), RESULT = []; return, end
end
% retrive the buffer size for the current iteration
s = iGetBufSize(iIter);
% read the file in a char vector
B = fread(fid,s,'*uint8');
% retrive the range indexes for the rows of the RESULT matrice to be
% used to store the converted buffer of char read
r = iGetIndexRow([iIter iIter+1]);
% try to read convert the char in the buffer and store them in RESULT
B = strtrim(B);
try
if any(r) && r(1) ~= r(end)
if length(tmp)>0
tmp = [tmp dataread('string',char(B))];
RESULT(1+r(1):r(2),:) = tmp;
tmp =[];
else
RESULT(1+r(1):r(2),:) = dataread('string',char(B));
end
else
tmp = [tmp dataread('string',char(B))];
end
catch
delete(h);
disp(['On file' mfilename ' ==> using dataread'])
error(lasterr)
end
end
delete(h);
%//////////////////////////////////////////////////////////////////////////
% end of main
%//////////////////////////////////////////////////////////////////////////
function SetIterNum (f)
global GBUFFERSIZE GITERATIONNUM % protected variable
% protected max buffer size -> GBUFFERSIZE
% protected number of iteration -> GITERATIONNUM
% get the file size
info = dir(f);
fSize = info.bytes;
% check that the buffer size is in the range 2^12 to 2^20
GBUFFERSIZE = min([max([4096 floor(fSize/50)]) 2^18]);
% get the binary value
binbuf = dec2binvec(GBUFFERSIZE);
% convert to the maximum value multiple of 2
binbuf(1:end-1) = 0;
% return to the decimal value of the maximum buffer size
GBUFFERSIZE = binvec2dec(binbuf);
% get the number of iteration, rounded to next integer
GITERATIONNUM = ceil(fSize/GBUFFERSIZE);
function GITERATIONNUM = GetIterNum
global GITERATIONNUM % protected variable
%//////////////////////////////////////////////////////////////////////////
function S = GetSizeResult(f,h)
global GBUFFERSIZE GITERATIONNUM ITEMROWS ITEMBUFFER % protected variable
% max buffer size -> GBUFFERSIZE
% number of iteration -> GITERATIONNUM
% index of the first rows of the RESULT matrix to be filled at the i-th iteration -> ITEMROWS
% buffer size to be read in order to fill the irow of the RESULT matrix at the i-th iteration -> ITEMBUFFER
% save the current position
currpos = ftell(f);
% go at the beginning of the file
fseek(f,0,'bof');
c = 0; SUBITER = 0;
while ~feof(f)
%----------------------------
% AIM: >>find the number of column c<<
% read the file in the char buffer A
[A,count] = fread(f,4096,'*uint8');
A = strtrim(A);
% convert the char array A to the double matrix B
try
B = dataread('string',char(A));
catch
disp(['on file: ' mfilename ' using: dataread'])
error(lasterr)
end
% store the number of column of B in the variable c
c = c + size(B,2); SUBITER = SUBITER + 1;
if size(B,1)>1 || any(A==10), break, end;
end
%----------------------------
% AIM: >>find the index of the first row (ITEMROWS) and the respective
% buffer size (ITEMBUFFER) for the i-th iteration<<
% initialize ITEMROWS and ITEMBUFFER
[ITEMROWS,ITEMBUFFER] = deal(zeros(GITERATIONNUM,1));
% go at the beginning of the file
fseek(f,0,'bof');
hAxes = findobj(h,'type','axes');
hxLabel = get(hAxes,'xlabel');
% for each iteration determine ITEMROWS and ITEMBUFFER
a = cputime;
for i = 1:GITERATIONNUM
try
waitbar(i/GITERATIONNUM,h);
set(hxLabel,'string',[int2str((GITERATIONNUM/i-1)*(cputime-a)) ' seconds left']);
catch
% if the wait bar have been cancelled, interrupt the iterations
if ~ishandle(h), RESULT = []; return, end
end
% read in the buffer char vector A the file f
[A,count] = fread(f,GBUFFERSIZE,'*uint8');
% find the number of Carriage return character into the buffer A
tempCR = (A == 10);
% the number of ITEMROWS is equal to the sum of the CR
ITEMROWS(i) = sum(tempCR);
% the number of the bytes from the end of the char buffer, excluding
% the CR, carriage return, i.e. +1, as a negative number, to be added
% to the buffer at the i-th iteration
ITEMBUFFER(i) = FindLastOneNeg(tempCR)+1;
% set the file position to the end of the just after the last CR, i.e.
% subtract the value already read after the last CR.
fseek(f,ITEMBUFFER(i),'cof');
% set the length char buffer ITEMBUFFER to the number of bytes to read in the i-th
% iteration, to stop just after the last CR, i.e. position of last CR
% from the end of the file, as a negative number, plus bytes actually
% read
ITEMBUFFER(i) = ITEMBUFFER(i) + count;
end
ITEMROWS = [0; cumsum(ITEMROWS)];
% restore the current position
fseek(f,currpos,'bof');
% return the size the matrix RESULT in S
S = [ITEMROWS(end) c];
%//////////////////////////////////////////////////////////////////////////
function v = iGetIndexRow(i)
global ITEMROWS
v = ITEMROWS(i);
%//////////////////////////////////////////////////////////////////////////
function v = iGetBufSize(i)
global ITEMBUFFER
v = ITEMBUFFER(i);
%//////////////////////////////////////////////////////////////////////////
function p = FindLastOneNeg(A)
global GBUFFERSIZE;
% find the index of last value in A that is equal to 1, starting from the
% bottom of the file, and returning a negative value, that is the number
% that must be subtracted to the length of the file in order get the last
% CR. that correspond to find the max index, but since this could be time
% consuming for a huge vector, it is possible to find this value in the
% latest 2048 characters (hopefully thre are no line bigger than 2048 char
% in a ACSII file). A check is done in order to avoid an error message for
% files smaller than 2048 bytes
p = max(find(A(end-min(end,GBUFFERSIZE-1)+1:end)))-min(length(A),GBUFFERSIZE);
if isempty(p)
p = -1;
end
%//////////////////////////////////////////////////////////////////////////
function m = GetStdMsg(f)
% get the standard message (containing the file name)
[v,w,z] = fileparts(f);
w = strrep(w,'\','\\');
w = strrep(w,'_','\_');
m = ['Loading ''' w z ''''];
%//////////////////////////////////////////////////////////////////////////
function X = strtrim(X)
tmp = 0;
X = uint8(strrep(char(X'),sprintf('\t\n '),sprintf('\n')))';
while length(X) ~= tmp
tmp = length(X);
X = uint8(strrep(char(X'),sprintf(' '),sprintf(' ')))';
end
%//////////////////////////////////////////////////////////////////////////
function dlmreadbar_cancel_Callback(obj, eventdata)
% Callback function that delete the waitbar once the user press the cancel
% button
handles = guihandles(obj);
hfields = fields(handles);
delete(handles.(hfields{1}))