No BSD License  

Highlights from
squeezefile

from squeezefile by Giuseppe RidinĂ²
Squeeze text files.

squeezefile(varargin)
function varargout = squeezefile(varargin)
% SQUEEZEFILE Squeeze text files.
%    SQUEEZEFILE removes all unusefull ending blanks in a text file.
%
%    SQUEEZEFILE       applys to current directory (see below)
%                       i.e is the same of SQUEEZEFILE(CD).
%
%    SQUEEZEFILE(PATH) applys to all Mfiles present in the PATH directory.
%
%    SQUEEZEFILE(FILE) applys to FILE; if FILE has no path specifications,
%                       it is searched in the current directory.
%
%    SQUEEZEFILE(...,OPT) is the same as above, but OPT options are used.
%
%    OPT is a struct with fields:
%
%        region,           {'left','right','extremes'}
%                          specify which part of each line must be considered,
%                          beginning (left), ending (right), or both (extremes)
%
%        leftaction,       {'none','space','tab','remove'}
%
%        tabsize,          (=N)
%                          tab size (>0)
%
%        carriagereturn,   (0,1)
%                          specify if carriage return character must be
%                          used (=1) to write the text file or not
%
%        subfolders,       (0,1)
%                          specify if function should be applied to
%                          subfolders (=1) or not
%
%        logfile,          (0,1)
%                          specify if log info about files should be
%                          displayed or not
%
%        report,           (0,1)
%                          specify final report should be displayed or not
%
%    Use of wildcards is allowed. For example:
%
%        squeezefile('*.txt') applys to all .txt files in the current directory
%
%    Some option preset profiles are available.
%    Enter SQUEEZEFILE(...,PROFILE) to use them (PROFILE instead of OPT),
%    where PROFILE may be one of the following:
%
%       'default',
%                 region: 'extremes'
%             leftaction: 'tab'
%                tabsize: 4
%         carriagereturn: 0
%             subfolders: 0
%                logfile: 1
%                 report: 1
%
%       'right',
%                 region: 'right'
%             leftaction: 'none'    ('not used')
%                tabsize: 4         ('not used')
%         carriagereturn: 0
%             subfolders: 0
%                logfile: 1
%                 report: 1
%
%       'space',
%                 region: 'extremes'
%             leftaction: 'space'
%                tabsize: 4
%         carriagereturn: 0
%             subfolders: 0
%                logfile: 1
%                 report: 1
%
%       'compact',
%                 region: 'extremes'
%             leftaction: 'remove'
%                tabsize: 4         ('not used')
%         carriagereturn: 0
%             subfolders: 0
%                logfile: 1
%                 report: 1
%
%    If no blanks are removed, the file remains unchanged.


%   $Author: Giuseppe Ridino' $
%   $Revision: 3.0 $  $Date: 13-May-2004 15:46:31 $


% check input arguments
msg = nargchk(0,2,nargin);
error(msg);

% check output arguments
msg = nargoutchk(0,1,nargout);
error(msg);

% default call with no argument
switch nargin
case 0,
	fileinfo = cd;
	opt = getopt('default');
case 1,
	fileinfo = varargin{1};
	opt = getopt('default');
case 2,
	fileinfo = varargin{1};
	opt = varargin{2};
otherwise,
	error('wrong number of input arguments')
end

% check opt
if ischar(opt),
	opt = getopt(opt);
end
if ~isoptok(opt),
	error('wrong option format')
end

% initialize filepath and filespec
filepath = '';
filespec = '';

% handle fileinfo
if exist(fileinfo,'dir'),
	% get filepath
	filepath = fileinfo;
	% get filespec
	filespec = '*.m';
elseif exist(fileinfo,'file'),
	% get filepath
	[filepath,files,ext] = fileparts(fileinfo);
	if isempty(filepath),
		filepath = cd;
	end
	% get filespec
	filespec = [files,ext];
	% reset subfolder scan
	opt.subfolders = 0;
else, % try to use dir command to get files names (maybe user used wildcards!)
	try,
		% get filepath
		[filepath,files,ext] = fileparts(fileinfo);
		if isempty(filepath),
			filepath = cd;
		end
		% get filespec
		filespec = [files,ext];
	catch,
		error('Argument must be a directory name, a fullpath file name, or a file name! Wildcards can be used')
	end
end

% call local squeeze function
counter = squeezefolder(filepath,filespec,opt);

% termination log if no output arguments are required
if opt.report,
	fprintf('\n');
	fprintf('%5g  folders scanned\n',counter.folders);
	fprintf('%5g  file red\n',counter.red);
	fprintf('%5g  file written\n',counter.writen);
	fprintf('%5g  errors\n',counter.errors);
	fprintf('\n');
	if counter.bytesold>0,
		fprintf('saved %g/%g bytes (%g%%)\n',counter.bytessaved,counter.bytesold,100*counter.bytessaved/counter.bytesold);
		fprintf('\n');
	end
end

% output result
if nargout==1,
	varargout = {counter};
end


% ##################################################################
function counter = squeezefolder(filepath,filespec,opt)
% preset counter
counter = counterinit;

% increment folder counter
counter.folders = counter.folders+1;

% extract content info
content = dir([filepath filesep filespec]);
if isempty(content),
	files = [];
else,
	isdir = [content.isdir];
	files = {content(~isdir).name};
end

% update file
Nfiles = length(files);
% reset byte counter
bytes_old = zeros(Nfiles,1);
bytes_new = bytes_old;
% log "working on folder..."
if opt.logfile,
	fprintf('Folder %s ... \n',filepath);
end
% loop each file
for index = 1:Nfiles,
	filename = files{index};
	changed = 0; % changed flag
	% log filename
	if opt.logfile,
		fprintf(['File ' filename ' ... ']);
	end
	try,
		% log "read file"
		if opt.logfile,
			fprintf('reading ... ');
		end
		
		% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
		% 1. read the full file
		% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
		str = textread(fullfile(filepath,filename),'%s','delimiter','\n','whitespace','');
		str_new = str;
		
		% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
		% 2. scan all lines to remove useles characters
		% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
		for nline=1:numel(str),
			% removes all unusefull ending blanks
			str_new{nline} = strfilter(str{nline},opt);
			% update bytes counters
			bytes_old(index) = bytes_old(index) + numel(str{nline});
			bytes_new(index) = bytes_new(index) + numel(str_new{nline});
		end
		% check if a change has been made
		if ~isempty(str) & ~isempty(str_new),
			if ~isequal(str,str_new),
				changed=1;
			end
		elseif isempty(str) & isempty(str_new),
			% changed=0;
		else, % one is empty while the other is not!
			changed=1;
		end
		counter.red = counter.red + 1;
		
		if changed,
			% log "write file"
			if opt.logfile,
				fprintf('writing ... ');
			end
			% 2. overwrite it
			try,
				fid = fopen(fullfile(filepath,filename),'w');
				fprintf(fid,'%s\n',str_new{:});
				fclose(fid);
				counter.writen = counter.writen + 1;
			catch,
				counter.errors = counter.errors + 1;
			end
			% log "bytes"
			if opt.logfile,
				fprintf('saved %g/%g bytes ... ',bytes_old(index)-bytes_new(index),bytes_old(index));
			end
		else,
			% log "unchanged"
			if opt.logfile,
				fprintf('unchanged ... ');
			end
		end
		
		% log "done"
		if opt.logfile,
			fprintf('done\n');
		end
				
	catch,
		% log "error"
		if opt.logfile,
			fprintf('\nerror\n');
		end
		counter.errors = counter.errors + 1;
	end
end
% saving statistics data
counter.bytesold = sum(bytes_old);
counter.bytessaved = sum(bytes_old-bytes_new);

% apply the function to subfolders if required
if opt.subfolders,
	contersubs = squeezesubfolders(filepath,filespec,opt);
	counter = countersum(counter,contersubs);
end


% ##################################################################
%        region,        {'left','right','extremes'}
%                       specify which part of each line must be considered,
%                       beginning (left), ending (right), or both (extremes)
%
%        leftaction,    {'none','space','tab','remove'}
%
%        tabsize,       (=N)
%                       tab size (>0)
%        carriagereturn {0,1}
function result = isoptok(opt)
result = logical(0);
if ~isempty(opt),
	if isstruct(opt),
		fields = sort(fieldnames(opt));
		if isequal(fields,sort(fieldnames(getopt('default')))),
			type = getopt('type');
			result = logical(1);
			for thisfield = fields(:)',
				thisfield = thisfield{1};
				thistype = class(getfield(opt,thisfield));
				if strcmp('set',getfield(type,thisfield)),
					% type is always ok, only the content should be checked
					% (see forward when values are checked)
				else,
					result = result & strcmp(thistype,getfield(type,thisfield));
				end
			end
			if result,
				for thisfield = fields(:)',
					thisfield = thisfield{1};
					thisvalue = getfield(opt,thisfield);
					thisresult = logical(0);
					value = getopt('value');
					value = getfield(value,thisfield);
					switch getfield(type,thisfield),
						case 'char',
							thisresult = ~isempty(strmatch(thisvalue,value,thisfield));
						case 'double',
							switch value,
								case -inf,
									thisresult = (thisvalue<0);
								case -1,
									thisresult = (thisvalue<=0);
								case 1,
									thisresult = (thisvalue>=0);
								case inf,
									thisresult = (thisvalue>0);
								otherwise,
									error(sprintf('wrong %s option value',thisfield))
							end
						case 'set',
							for index = 1:numel(value),
								thisresult = thisresult | isequal(thisvalue,value{index});
							end
					end
					result = result & thisresult;
				end
			end
		end
	end
end


% ##################################################################
%        region,         {'left','right','extremes'}
%                        specify which part of each line must be considered,
%                        beginning (left), ending (right), or both (extremes)
%
%        leftaction,     {'none','space','tab','remove'}
%
%        tabsize,        (=N)
%                        tab size (>0)
%        carriagereturn, {0,1}
%        subfolders,     {0,1}
function opt = getopt(id)
if ~ischar(id)
	error('Option profile must be a char string')
end
switch id,
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	%                  OPTIONS STRUCTURES                  %
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	
	case 'type',             % these are the fields types
		opt = struct(...
			'region',        'char',...
			'leftaction',    'char',...
			'tabsize',       'double',...
			'subfolders',    'set',...
			'logfile',       'set',...
			'report',        'set',...
			'carriagereturn','set');
	case 'value',          % these are possible values
		opt.region         = {'left','right','extremes'};     % a set of values
		opt.leftaction     = {'none','space','tab','remove'}; % a set of values
		opt.tabsize        = inf;                             % 0<x value
		opt.carriagereturn = {0,1};                           % a set of values
		opt.subfolders     = {0,1};                           % a set of values
		opt.logfile        = {0,1};                           % a set of values
		opt.report         = {0,1};                           % a set of values

		%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
		%                   DEFAULT OPTIONS                    %
		%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
		
	case 'default',
		opt = struct(...
			'region',         'extremes',...
			'leftaction',     'tab',...
			'tabsize',        4,...
			'subfolders',     0,...
			'logfile',        1,...
			'report',         1,...
			'carriagereturn', 0);
		
		%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
		%                   PRESET OPTIONS                     %
		%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
		
	case 'right',
		opt = struct(...
			'region',         'right',...
			'leftaction',     'none',...
			'tabsize',        4,...
			'subfolders',     0,...
			'logfile',        1,...
			'report',         1,...
			'carriagereturn', 0);
	case 'space',
		opt = struct(...
			'region',         'extremes',...
			'leftaction',     'space',...
			'tabsize',        4,...
			'subfolders',     0,...
			'logfile',        1,...
			'report',         1,...
			'carriagereturn', 0);
	case 'compact',
		opt = struct(...
			'region',         'extremes',...
			'leftaction',     'remove',...
			'tabsize',        4,...
			'subfolders',     0,...
			'logfile',        1,...
			'report',         1,...
			'carriagereturn', 0);
	otherwise,
		error(['Unknown option profile ' id])
end


% ##################################################################
function str = strfilter(str,opt)
if ~isempty(str),
	leftstr = '';
	minpos = 1;
	maxpos = length(str);
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	% 1. take care of left side (if required)
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	switch opt.region,
		case {'left','extremes'},
			% calculate how many blanks are on the leftside
			tabsize = opt.tabsize;
			index = 0;
			numblanks = 0;
			quit = 0;
			len = length(str);
			while ~quit & index<len,
				index = index+1;
				c = str(index);
				if c==32,
					numblanks = numblanks+1;
				elseif c==9,
					numblanks = (floor(numblanks/tabsize)+1).*tabsize;
				else,
					quit = 1;
				end
			end
			% apply leftaction
			switch opt.leftaction,
				case 'space',
					leftstr = char(zeros(1,numblanks)+32);
				case 'tab',
					Nt=floor(numblanks/tabsize);
					Nb=numblanks-Nt.*tabsize;
					leftstr = [char(zeros(1,Nt)+9) char(zeros(1,Nb)+32)];
				case 'remove',
					leftstr = '';
				otherwise, % 'none'
					leftstr = '';
					index = 1;
			end
			% set minpos
			minpos = index;
	end
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	% 2. take care of right side (if required)
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	switch opt.region,
		case {'right','extremes'},
			maxpos = max(find(~isspace(str)));
	end
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	% 3. return the filtered string
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	str = [leftstr str(minpos:maxpos)];
end


% ##################################################################
function counter = squeezesubfolders(filepath,filespec,opt)
counter = counterinit; % initialize counter
content = dir(filepath);
isdir = [content.isdir];
folders = {content(isdir).name};
% remove '.' and '..' from the folder list
index = strcmp(folders,'.') | strcmp(folders,'..');
folders(index) = [];
% loop to apply the function to all subfolders
for index=1:length(folders),
	countersub = squeezefolder([filepath filesep folders{index}],filespec,opt);
	counter = countersum(counter,countersub);
end


% ##################################################################
function counter = countersum(counter,counter_A)
counter.folders    = counter.folders    + counter_A.folders;
counter.red        = counter.red        + counter_A.red;
counter.writen     = counter.writen     + counter_A.writen;
counter.errors     = counter.errors     + counter_A.errors;
counter.bytesold   = counter.bytesold   + counter_A.bytesold;
counter.bytessaved = counter.bytessaved + counter_A.bytessaved;


% ##################################################################
function counter = counterinit
% counter initialization
counter.folders    = 0;
counter.red        = 0;
counter.writen     = 0;
counter.errors     = 0;
counter.bytesold   = 0;
counter.bytessaved = 0;

Contact us at files@mathworks.com