image thumbnail
from toy compiler by Bill McKeeman
A toy compiler

parser(tc)
% FILE:     parser.m
% PURPOSE:  Example recursive parser in M
% METHOD:   Recursive descent
% SIG:     [[uint8]] = parser([uint16])
% USAGE:    sr = parser(tc) where
%           sr is a shift/reduce sequence and
%           tc is a vector of token codes (see lexer)
%
% GRAMMAR:  M subset
%                             rule numbers
% program
%    stmts                         1
% stmts
%    stmt                          1
%    stmts stmt                    2
% stmt
%    assign ;                      1
% assign
%    var = expr                    1
% var
%    id                            1
% expr
%    term                          1
%    expr + term                   2
%    expr - term                   3
% term
%    factor                        1
%    term * factor                 2
%    term / factor                 3
%    term \ factor                 4
% factor
%    ( expr )                      1
%    var                           2
%    int                           3

function sr = parser(tc)
% sr: shift-reduce sequence
% tc: token code sequence
  
  % Establish nested variables
  TOK  = enum(tokens);                  % enumerations
  RULE = enum(rules);
  toki = uint16(1);                     % token index
  sr   = ones(100,1,'uint8');           % preallocate sr sequence
  sri  = uint16(0);                     % sr index
  
  program();                            % parse input  (this is IT)
 
  sr = sr(1:sri);                       % trim exactly
  
  
% ----------- nested recursive function definitions ----------------

function program()                      % program
  stmts();
  reduce(RULE.program1); 
end

function stmts()                        % stmts
  stmt();
  reduce(RULE.stmts1);
  while tc(toki) ~= TOK.eof
    stmt();
    reduce(RULE.stmts2);
  end
end

function stmt()                         % stmt
  assign();
  reduce(RULE.stmt1);
end

function assign()                       % assign
  var();
  if tc(toki) == TOK.eq
    shift();                            % discard =
    expr();
  else
    parseerror('expected =, ', tc(toki));
  end
  if tc(toki) == TOK.semi
    shift();                            % discard ;
  else
    parseerror('expected ;, ', tc(toki));
  end
  reduce(RULE.assign1);
end

function var()                          % var
  if tc(toki) == TOK.id
    shift();                            % record id
    reduce(RULE.var1);
  else
    parseerror('expected var, ', tc(toki));
  end
end

function expr()                         % expr
  term();
  reduce(RULE.expr1);
  u = tc(toki);
  while u == TOK.add || u == TOK.sub
    shift();                            % discard + or -
    r = RULE.expr2;                     % usual case
    if u == TOK.sub, r = RULE.expr3; end 
    term();
    reduce(r);
    u = tc(toki);  
  end
end

function term()                         % term
  factor();
  reduce(RULE.term1);
  t = tc(toki);
  while t == TOK.mul || t == TOK.div || t == TOK.vid
    shift();                            % discard * / \
    r = RULE.term2;                     % usual case
    if t == TOK.div, r = RULE.term3; end 
    if t == TOK.vid, r = RULE.term4; end
    factor();
    reduce(r);
    t = tc(toki);  
  end
end

function factor()                       % factor
  w = tc(toki);
  if w == TOK.lp
    shift();                            % discard (
    expr();
    if tc(toki) == TOK.rp
      shift();                            % discard )
    else
      parseerror('expected ), ', tc(toki));
    end
    reduce(RULE.factor1);
  elseif w == TOK.id
    var();
    reduce(RULE.factor2);
  elseif w == TOK.int
    shift();                              % record int
    reduce(RULE.factor3);
  else
    parseerror('unexpected token, ', w);
  end
end

function shift                          % shift
  appd(RULE.shift);                     % special shift marker
  appd(toki);                           % index of token
  toki = toki+1;                        % discard token
end

function reduce(rule)                   % reduce
  appd(rule);
end

function appd(data)                     % append to sr sequence
  if sri >= length(sr)
     sr(end+end) = 0;                   % double allocation
  end
  sri     = sri+1;                      % make room
  sr(sri) = data;                       % place data
end  
  
function parseerror(msg, found)
  disp('***Parse error, sr sequence so far:');
  rulenames = rules;
  tokennames = tokens;
  i = 1;
  while true                            % dump sr sequence so far
    if i >= sri, break; end
    t = sr(i);
    if t == RULE.shift                  % special shift marker
      i = i+1;                          % move to token index
      disp(['   shift ' tokennames{tc(sr(i))}]);
    else
      disp(['  reduce ' rulenames{t}]);
    end
    i = i+1;
  end
  
  error([msg 'found ' tokennames{found}]);  
end
  
end   % of parse

Contact us at files@mathworks.com