Code covered by the BSD License  

Highlights from
Data Manipulation Toolbox

from Data Manipulation Toolbox by Dahua Lin
Manipulation of data such as string, array, cell array and struct, and a unit testing framework

strsubs(str, pattern, rule, varargin)
function strnew = strsubs(str, pattern, rule, varargin)
%STRTOKENSUBS Substitute the tokens in a string by converted strings
%
% [ Syntax ]
%   - strnew = strsubs(str, pattern, rule, ...)
%
% [ Arguments ]
%   - str:          the source string
%   - pattern:      the regular expression 
%   - rule:         the substitution rule
%                   \{:
%                       - fixstr: replace each matched sub string with 
%                         a fixed string                       
%                       - conv: replace each matched sub string by
%                         applying a conversion function on the matched
%                       - t-str: replace each token with given strings
%                       - t-conv: replace each token by applying a 
%                         conversion functions on it                                          
%                   \:}
%   - strnew:       the resulting string
%
% [ Description ]
%   - strnew = strsubs(str, pattern, 'fixstr', rstr) replaces all the
%     matched sub strings with a fixed string rstr.
%
%   - strnew = strsubs(str, pattern, 'conv', f, inputform) replaces each
%     matched sub string by applying a conversion function f. Here, f can
%     should be a function handle.
%
%     The inputform specifies what inputs are given to f. It can be
%     \{:
%         - str:    input the whole matched sub string
%                    $ rstr = f(str) $
%         - tokens: input the extracted tokens specified by the pattern
%                    $ rstr = f(token1, token2, ...) $
%         - tkcell: input the cell array of extracted tokens
%                    $ rstr = f({token1, token2, ...}) $
%         - tkstruct: input the struct of named tokens
%                       $ rstr = f(s) $
%                     s is a struct with field names being the token name
%                     and field values being the tokens.         
%     \:}
%
%   - strnew = strsubs(str, pattern, 't-str', rstr) replaces all tokens
%     in each matched sub string with a fixed string rstr.
%
%   - strnew = strsubs(str, pattern, 't-str', rstrs) replaces each token
%     in every matched sub string with the corresponding string in rstrs.
%     If there are nt tokens specified by pattern ,then rstrs should be a
%     cell array with nt strings.
%
%   - strnew = strsubs(str, pattern, 't-conv', f) replaces each token in 
%     every matched sub string by applying the conversion function f as
%        $ rstr = f(token) $
%
%   - strnew = strsubs(str, pattern, 't-conv', fs) replaces each token in
%     every matched sub string by applying the corresponding function. fs
%     is a cell array of nt functions (in function name of function
%     handle).
%
% [ Examples ]
%   - Convert every word 'good' to 'great'
%     \{
%         strsubs('It is a good thing. He is a good man', 'good', 'fixstr', 'great')
%         => 'It is a great thing. He is a great man'  
%     \}
%
%   - Capitalize every word in a title
%     \{
%         strsubs('MATLAB plays an important role', '\w+',  ...
%                 'conv', @(x) [upper(x(1)), x(2:end)], 'str')
%         => 'MATLAB Plays An Important Role'
%     \}
%
%   - Replace each pair of numbers in braces to an addition formula
%     \{
%         strsubs('Look at (1, 2) and (3, 4)', '\(\s*(\d+)\s*,\s*(\d+)\s*\)', ...
%                 'conv', @(x, y) [x, '+', y, '=', num2str(str2double(x) + str2double(y))], ...
%                 'tokens')
%         => 'Look at 1+2=3 and 3+4=7'
%     \}
%
%   - Replace convert a simple for-loop in Matlab to C-syntax with named
%     tokens
%     \{
%         strsubs('for i = 1 : n', 
%                 'for\s+(?<var>\w+)\s*=\s*(?<init>\w+)\s*\:\s*(?<last>\w+)', ...
%                 'conv', @(x) sprintf('for(int %s = %s; %s <= %s; ++%s)', ...
%                                      x.var, x.init, x.var, x.last, x.var), ...
%                 'tkstruct')
%         => 'for(int i = 1; i <= n; ++i)'
%     \}
%
%   - Replace every ip address in the string with xxx.xxx.xxx.xxx
%     \{
%         strsubs('My IP is 123.456.789.012', '(\d+).(\d+).(\d+).(\d+)', ...
%                 't-str', 'xxx')
%         => 'My IP is xxx.xxx.xxx.xxx'
%     \}
%
%   - Suppose in some text, it defines a section 
%     like 'start ... completed', 
%     and we like to convert it to 'begin ... end'
%     \{
%         strsubs('start abc completed  start xyz completed', ...
%                 '(start)\s*\w+\s*(completed)', ...
%                 't-str', {'begin', 'end'})
%         => 'begin abc end  begin xyz end'
%     \}
%
%   - Append each operand of an addition formula with a char a
%     \{
%         strsubs('Here are 1+2 and 4+5', '(\d+)\+(\d+)', ...
%                 't-conv', @(x) ['a', x])
%         => 'Here are a1+a2 and a4+a5'
%     \}
%
%   - Convert all forms like xxx.yyy to [xxx].(yyy)
%     \{
%         strsubs('abc.name1, xyz.field2', '(\w+)\.(\w+)', ...
%                 't-conv', {@(x)['[', x, ']'], @(x)['(', x, ')']})
%         => '[abc].(name1), [xyz].(field2)'
%     \}
%
% [ History]
%   - Created by Dahua Lin, on Jun 26, 2007
%

%% parse and verify input arguments

error(nargchk(4, 5, nargin));

typecheck(str, 'str should be a string or empty', 'string', 'empty');
typecheck(pattern, 'pattern should be a string', 'string');
typecheck(rule, 'rule should be a string', 'string');

%% main delegate

switch rule
    case 'fixstr'
        strnew = strsubs_fixstr(str, pattern, varargin{1});
    case 'conv'
        strnew = strsubs_conv(str, pattern, varargin{1}, varargin{2});
    case 't-str'
        strnew = strsubs_tstr(str, pattern, varargin{1});
    case 't-conv'
        strnew = strsubs_tconv(str, pattern, varargin{1});
    otherwise
        error('string:strsubs:invalidarg', 'Invalid rule %s for strsubs', rule);
end


%% Core functions

function strnew = strsubs_fixstr(str, pattern, rstr)

typecheck(rstr, 'rstr should be a string', 'string');

[sp, ep] = regexp(str, pattern, 'start', 'end');
ranges = [sp; ep];
strnew = strrangesubs(str, ranges, rstr);


function strnew = strsubs_conv(str, pattern, f, inputform)

typecheck(f, 'f should be a function handle', 'function_handle');    
typecheck(inputform, 'inputform should be a string', 'string');

switch inputform
    case 'str'
        [args, sp, ep] = regexp(str, pattern, 'match', 'start', 'end');
        rstrs = cellfun(f, args, 'UniformOutput', false);
    case 'tokens'
        [args, sp, ep] = regexp(str, pattern , 'tokens', 'start', 'end');
        rstrs = cellfun(@(c) f(c{:}), args, 'UniformOutput', false);
    case 'tkcell'
        [args, sp, ep] = regexp(str, pattern, 'tokens', 'start', 'end');
        rstrs = cellfun(f, args, 'UniformOutput', false);
    case 'tkstruct'
        [args, sp, ep] = regexp(str, pattern, 'names', 'start', 'end');
        rstrs = arrayfun(f, args, 'UniformOutput', false);
    otherwise
        error('dmtoolbox:strsubs', ...
            'Invalid inputform %s for the conv rule', inputform);
end

ranges = [sp; ep];
strnew = strrangesubs(str, ranges, rstrs);


function strnew = strsubs_tstr(str, pattern, rstr)

typecheck(rstr, 'rstr should be a either a string or a cell array', ...
        'string', 'cell');

r = regexp(str, pattern, 'tokenExtents');
nm = length(r);

ranges = cellfun(@(x) x', r, 'UniformOutput', false);
ranges = horzcat(ranges{:});

if iscell(rstr)
    rstr = repmat(rstr, 1, nm);
end

strnew = strrangesubs(str, ranges, rstr);



function strnew = strsubs_tconv(str, pattern, conv)

typecheck(conv, 'conv should be either a function handle or a cell of function handles', ...
        'function_handle', 'cell');
    
[tks, r] = regexp(str, pattern, 'tokens', 'tokenExtents');

ranges = cellfun(@(x) x', r, 'UniformOutput', false);
ranges = horzcat(ranges{:});

if iscell(conv)
    rstrs = cellfun(@(c) cellfun(@(tk, f) f(tk), c, conv, 'UniformOutput', false), ...
        tks, 'UniformOutput', false);        
else
    rstrs = cellfun(@(c) cellfun(conv, c, 'UniformOutput', false), ...
        tks, 'UniformOutput', false);
end

rstrs = [rstrs{:}];

strnew = strrangesubs(str, ranges, rstrs);


Contact us at files@mathworks.com