Code covered by the BSD License  

Highlights from
% MATLAB Comment Stripping Toolbox

image thumbnail
from % MATLAB Comment Stripping Toolbox by Peter J. Acklam
A small collection of utilities for stripping MATLAB comments from strings and files.

mlstripcommentsstr(istr)
function ostr = mlstripcommentsstr(istr)
%MLSTRIPCOMMENTSSTR Strip comments from a string with MATLAB code.
%
%   OSTR = MLSTRIPCOMMENTSSTR(ISTR) takes the input string ISTR, assuming it
%   contains MATLAB code, strips all MATLAB comments, and returns the result as
%   the output string OSTR.  The input string must be a row vector or a cell
%   array of row vectors.  Any input string may be a multi-line string, that is
%   a string containing newline characters, for example a string containing the
%   entire contents of an m-file.
%
%   Example:  Read all MATLAB code from an m-file opened for reading on file
%   identifier "fid" and strip all comments:
%
%       str = fread(fid);               % data as numerical column vector
%       str = char(str)';               % data as character row vector
%       str = mlstripcommentsstr(str);  % strip all MATLAB comments
%
%   See also MLSTRIPCOMMENTSFILE, MLSTRIPCOMMENTSFID.

%   Author:      Peter J. Acklam
%   Time-stamp:  2004-03-18 08:23:19 +0100
%   E-mail:      pjacklam@online.no
%   URL:         http://home.online.no/~pjacklam

   % Regex for removing a comment on a line possibly containing MATLAB code.
   %
   % Use a persistent variable so that the variable is set up only once.
   %
   % Find the part of the line which is not a comment and then replace the
   % whole line with the part that was not a comment.
   %
   % A comment is started by a percent sign, but not all percent signs start a
   % comment.  A percent sign might be embedded in a string, in which case the
   % percent sign obviously does not start a comment.  So we must look for
   % percent signs which are not embedded in strings.  To do this we must find
   % all strings.  Strings are delimited by single quotes, but not all single
   % quotes start strings.  A single quote might also be a tranpose operator.
   % So we must look for single quotes which are not tranpose operators.
   %
   % The MATLAB grammar (syntax and semantics) is not available to the public,
   % but from my knowledge, a single quote is a transpose operator if and only
   % if one of the following criteria is satisfied
   %
   %   1) it follows a right closing delimiter
   %   2) it follows right after a variable name or function name
   %   3) it follows right after a period (as part of the ".'" operator)
   %   4) it follows right after another tranpose operator
   %
   % This can be immediately after a right closing bracket ("]"), right closing
   % parenthesis (")"), right closing brace ("}"), an English letter ("a" to
   % "z" and "A" to "Z", a digit ("0" to "9"), an underline ("_"), a period
   % ("."), or another single quote ("'").

   persistent mainregex

   if isempty(mainregex)

      mainregex = [ ...
         ' (                   ' ... % Grouping parenthesis (content goes to $1).
         '   ( ^ | \n )        ' ... % Beginning of string or beginning of line.
         '   (                 ' ... % Non-capturing grouping parenthesis.
         '                     ' ...
         '' ... % Match anything that is neither a comment nor a string...
         '       (             ' ... % Non-capturing grouping parenthesis.
         '           [\]\)}\w.]' ... % Either a character followed by
         '           ''+       ' ... %    one or more transpose operators
         '         |           ' ... % or else
         '           [^''%]    ' ... %   any character except single quote (which
         '                     ' ... %   starts a string) or a percent sign (which
         '                     ' ... %   starts a comment).
         '       )+            ' ... % Match one or more times.
         '                     ' ...
         '' ...  % ...or...
         '     |               ' ...
         '                     ' ...
         '' ...  % ...match a string.
         '       ''            ' ... % Opening single quote that starts the string.
         '         [^''\n]*    ' ... % Zero or more chars that are neither single
         '                     ' ... %   quotes (special) nor newlines (illegal).
         '         (           ' ... % Non-capturing grouping parenthesis.
         '           ''''      ' ... % An embedded (literal) single quote character.
         '           [^''\n]*  ' ... % Again, zero or more chars that are neither
         '                     ' ... %   single quotes nor newlines.
         '         )*          ' ... % Match zero or more times.
         '       ''            ' ... % Closing single quote that ends the string.
         '                     ' ...
         '   )*                ' ... % Match zero or more times.
         ' )                   ' ...
         ' [^\n]*              ' ... % What remains must be a comment.
                  ];

      % Remove all the blanks from the regex.
      mainregex = mainregex(~isspace(mainregex));

   end

   % Initialize output.
   ostr = istr;

   % If input is a character array, put it into a cell array.  We'll later make
   % sure output is a character array if input is a character array.
   if ischar(ostr)
      ostr = {ostr};
   end

   % Iterate over each element in the cell array.
   for i = 1 : numel(ostr)

      % Get the ith input string.
      str = ostr{i};

      % All character arrays must be row vectors.
      if (ndims(str) > 2) | (any(size(str) > 0) & (size(str, 1) ~= 1))
         error('All character arrays must be row vectors.');
      end

      % Note:  With MATLAB 6.5.1 it seems that the '^' and '$' anchors in the
      % regexes '(^|\n)' and '($|\n)' are not working (bug?), so use a
      % workaround:  prepend and append a newline and remove them later.

      use_workaround = 1;

      if use_workaround
         lf   = sprintf('\n');     % LF = Line feed character
         str = [lf, str, lf];      % prepend and append an LF
      end

      % Remove comment lines.
      str = regexprep(str, '(^|\n)[^\S\n]*%[^\n]*', '$1', 'tokenize');

      % Remove a comment on a line possibly containing MATLAB code.
      str = regexprep(str, mainregex, '$1', 'tokenize');

      % Remove trailing whitespace from each line.
      str = regexprep(str, '[^\S\n]+($|\n)', '$1', 'tokenize');

      % Compress multiple blank lines.
      %str = regexprep(str, '\n\n\n+', sprintf('\n\n'));

      if use_workaround
         str = str(2:end-1);       % remove leading and trailing LF
      end

      % Insert ith string into output cell array.
      ostr{i} = str;

   end

   % If the input was a character array, make sure output is so too.
   if ischar(istr)
      ostr = ostr{1};
   end

Contact us at files@mathworks.com