Code covered by the BSD License  

Highlights from
DLMREADBAR

from DLMREADBAR by Stefano Gianoli
Read ASCII delimited file showing a waitbar.

dlmreadbar(filename,msg)
function RESULT = dlmreadbar(filename,msg)
%DLMREADBAR Read ASCII delimited file.
%   RESULT= DLMREADBAR(FILENAME,MESSAGE) reads numeric data from the ASCII
%   delimited file FILENAME showing the progress through a progress bar. The 
%   delimiter in the ASCII file is guessed by DLMREADBAR. The MESSAGE will 
%   be shown in the progress bar, together with the estimated time left (in
%   seconds). The result is returned in RESULT.
%   
%   RESULT= DLMREADBAR(FILENAME) shows a standard MESSAGE field where the 
%   FILENAME and the estimated time left will appear.
% 
%   The function is suitable for huge ASCII files. A CANCEL button is
%   available to interrupt the loading process. In that case an empty
%   RESULT is returned
%
%   Example:
%   X = rand(390625,32); % 100M variable !!!
%   save example.txt X -ASCII
%   Y = dlmreadbar('example.txt');
%
%   The function DLMREADBAR is (estimated) 15% slower than load, but
%   provide a method to interrupt the execution and to note the progress of
%   loading. It is useless for small files.

%   Copyright 2004 Stefano Gianoli, ETH Zurich
%   gianoli@chem.ethz.ch
%   $Revision: 5.0 $  $Date: 2004/06/15 12:06:46 $
%   
%   v4: improved compatibility: read files generated with
%   save('filename','var1',...,'-ASCII')
%   v5: bugfix for NxM data where N<<M.

% if no arguments display the help
if nargin == 0
    help(mfilename)
    return
end
% the allowed argument are two
error(nargchk(1,2,nargin))

% Get Filename
if ~isstr(filename), error('Filename must be a string.'); end

% check that the file exists
if ~isequal(exist(filename,'file'), 2), error(['Unable to find file: ' filename]), end

% if one argument is provided, show a standard message 
if nargin == 1, msg = GetStdMsg(filename); end

% create waitbar 
h = waitbar(0,'Calculating the size of the file','CreateCancelBtn',@dlmreadbar_cancel_Callback,'Interruptible','on','BusyAction','cancel');

% open the file
fid = fopen(filename,'r');
% Make sure it can be opened, otherwise close the waitbar and stop
if fid == -1, close(h), error('Unable to open file'), end 

% get the number of iteration necessary to read all the file: WARNING this
% line must be before the determination of the size of RESULT, since the
% number of iteration is needed
SetIterNum(filename);

% determine the size of the RESULT matrix and initialize it to zero
SizeRESULT = GetSizeResult(fid,h);
if length(SizeRESULT) == 2  
    RESULT = zeros(SizeRESULT);
    nIter = GetIterNum;
else
    close(h)
    error('Invalid size for RESULT: check BUFFERSIZE or End Of Line delimiters')
end
close(h)

h = waitbar(0,msg,'CreateCancelBtn',@dlmreadbar_cancel_Callback,'Interruptible','on','BusyAction','cancel');
hAxes = findobj(h,'type','axes');
hxLabel = get(hAxes,'xlabel');

% save the cpu time at the beginning reading iterations
a = cputime;
tmp =[];
waitbar(0,h,msg);
for iIter=1:nIter
    % update the waitbar
    try
        waitbar(iIter/nIter,h);
        set(hxLabel,'string',[int2str((nIter/iIter-1)*(cputime-a)) ' seconds left']);
    catch
        % if the wait bar have been cancelled, interrupt the iterations
        if ~ishandle(h), RESULT = []; return, end
    end
    % retrive the buffer size for the current iteration
    s = iGetBufSize(iIter);
    % read the file in a char vector
    B = fread(fid,s,'*uint8');
    % retrive the range indexes for the rows of the RESULT matrice to be
    % used to store the converted buffer of char read
    r = iGetIndexRow([iIter iIter+1]);
    % try to read convert the char in the buffer and store them in RESULT
    B = strtrim(B);
    try
        if any(r) && r(1) ~= r(end)
            if length(tmp)>0
                tmp = [tmp dataread('string',char(B))];
                RESULT(1+r(1):r(2),:) = tmp;
                tmp =[];
            else
                RESULT(1+r(1):r(2),:) = dataread('string',char(B));
            end
        else
            tmp = [tmp dataread('string',char(B))];
        end
    catch
        delete(h);
        disp(['On file' mfilename ' ==> using dataread'])
        error(lasterr)
    end
end
delete(h);
%//////////////////////////////////////////////////////////////////////////
% end of main

%//////////////////////////////////////////////////////////////////////////
function SetIterNum (f)
global GBUFFERSIZE GITERATIONNUM % protected variable 
% protected max buffer size -> GBUFFERSIZE
% protected number of iteration -> GITERATIONNUM
% get the file size
info = dir(f);
fSize = info.bytes;
% check that the buffer size is in the range 2^12 to 2^20
GBUFFERSIZE = min([max([4096 floor(fSize/50)]) 2^18]);
% get the binary value
binbuf = dec2binvec(GBUFFERSIZE);
% convert to the maximum value multiple of 2
binbuf(1:end-1) = 0;
% return to the decimal value of the maximum buffer size
GBUFFERSIZE = binvec2dec(binbuf);
% get the number of iteration, rounded to next integer
GITERATIONNUM = ceil(fSize/GBUFFERSIZE);  

function GITERATIONNUM = GetIterNum
global GITERATIONNUM % protected variable 

%//////////////////////////////////////////////////////////////////////////
function S = GetSizeResult(f,h)
global GBUFFERSIZE GITERATIONNUM ITEMROWS ITEMBUFFER % protected variable 
% max buffer size -> GBUFFERSIZE
% number of iteration -> GITERATIONNUM
% index of the first rows of the RESULT matrix to be filled at the i-th iteration -> ITEMROWS
% buffer size to be read in order to fill the irow of the RESULT matrix at the i-th iteration -> ITEMBUFFER

% save the current position  
currpos = ftell(f);
% go at the beginning of the file
fseek(f,0,'bof');
c = 0; SUBITER = 0;
while ~feof(f)
    %----------------------------
    % AIM: >>find the number of column c<<
    % read the file in the char buffer A
    [A,count] = fread(f,4096,'*uint8');
    A = strtrim(A);
    % convert the char array A to the double matrix B
    try
        B = dataread('string',char(A));
    catch
        disp(['on file: ' mfilename ' using: dataread'])
        error(lasterr)
    end
    % store the number of column of B in the variable c
    
    c = c + size(B,2); SUBITER = SUBITER + 1;
    if size(B,1)>1 || any(A==10), break, end;
end

%----------------------------
% AIM: >>find the index of the first row (ITEMROWS) and the respective
%      buffer size (ITEMBUFFER) for the i-th iteration<<
% initialize ITEMROWS and ITEMBUFFER
[ITEMROWS,ITEMBUFFER] = deal(zeros(GITERATIONNUM,1));
% go at the beginning of the file
fseek(f,0,'bof');

hAxes = findobj(h,'type','axes');
hxLabel = get(hAxes,'xlabel');

% for each iteration determine ITEMROWS and ITEMBUFFER
a = cputime;
for i = 1:GITERATIONNUM
    try
        waitbar(i/GITERATIONNUM,h);
        set(hxLabel,'string',[int2str((GITERATIONNUM/i-1)*(cputime-a)) ' seconds left']);
    catch
        % if the wait bar have been cancelled, interrupt the iterations
        if ~ishandle(h), RESULT = []; return, end
    end
    
    % read in the buffer char vector A the file f
    [A,count] = fread(f,GBUFFERSIZE,'*uint8');
    % find the number of Carriage return character into the buffer A
    tempCR = (A == 10);
    % the number of ITEMROWS is equal to the sum of the CR
    ITEMROWS(i) = sum(tempCR);
    % the number of the bytes from the end of the char buffer, excluding
    % the CR, carriage return, i.e. +1, as a negative number, to be added
    % to the buffer at the i-th iteration
    ITEMBUFFER(i) = FindLastOneNeg(tempCR)+1;
    % set the file position to the end of the just after the last CR, i.e.
    % subtract the value already read after the last CR.
    fseek(f,ITEMBUFFER(i),'cof');
    % set the length char buffer ITEMBUFFER to the number of bytes to read in the i-th
    % iteration, to stop just after the last CR, i.e. position of last CR
    % from the end of the file, as a negative number, plus bytes actually
    % read
    ITEMBUFFER(i) = ITEMBUFFER(i) + count;
end

ITEMROWS = [0; cumsum(ITEMROWS)];
% restore the current position
fseek(f,currpos,'bof');

% return the size the matrix RESULT in S
S = [ITEMROWS(end) c];

%//////////////////////////////////////////////////////////////////////////
function v = iGetIndexRow(i)
global ITEMROWS   
v = ITEMROWS(i);

%//////////////////////////////////////////////////////////////////////////
function v = iGetBufSize(i)
global ITEMBUFFER
v = ITEMBUFFER(i);

%//////////////////////////////////////////////////////////////////////////
function p = FindLastOneNeg(A)
global GBUFFERSIZE;
% find the index of last value in A that is equal to 1, starting from the
% bottom of the file, and returning a negative value, that is the number
% that must be subtracted to the length of the file in order get the last
% CR. that correspond to find the max index, but since this could be time 
% consuming for a huge vector, it is possible to find this value in the 
% latest 2048 characters (hopefully thre are no line bigger than 2048 char 
% in a ACSII file). A check is done in order to avoid an error message for 
% files smaller than 2048 bytes
p = max(find(A(end-min(end,GBUFFERSIZE-1)+1:end)))-min(length(A),GBUFFERSIZE);
if isempty(p)
    p = -1;
end         

%//////////////////////////////////////////////////////////////////////////
function m = GetStdMsg(f)
% get the standard message (containing the file name)
[v,w,z] = fileparts(f);
w = strrep(w,'\','\\');
w = strrep(w,'_','\_');
m = ['Loading ''' w z '''']; 

%//////////////////////////////////////////////////////////////////////////
function X = strtrim(X)
tmp = 0;
X = uint8(strrep(char(X'),sprintf('\t\n  '),sprintf('\n')))';
while length(X) ~= tmp
    tmp = length(X);
    X = uint8(strrep(char(X'),sprintf('  '),sprintf(' ')))';
end

%//////////////////////////////////////////////////////////////////////////
function dlmreadbar_cancel_Callback(obj, eventdata)
% Callback function that delete the waitbar once the user press the cancel
% button
handles = guihandles(obj);
hfields = fields(handles);
delete(handles.(hfields{1}))

Contact us at files@mathworks.com