|
|
| lexer(src) |
% FILE: lexer.m
% PURPOSE: Simple lexer
% CALL: [tcode, tstart, tend] = lexer(src)
% SIG: [[uint8], [uint16], [uint16]] = lexer([char])
% Accepts M-like source text and produces three arrays
% tcode(i) is a unique code for the i-th token.
% src(tstart(i):tend(i)) is the text of the i-th token.
function [tcode, tstart, tend] = lexer(src)
% Initialize character and token tables.
TOK = enum(tokens);
EOF = uint8(128);
isdigit(1:EOF) = false;
isdigit(uint8('0':'9')) = true;
isalnum = isdigit;
isalnum(uint8('a':'z')) = true;
isalnum(uint8('A':'Z')) = true;
ch2code(EOF) = 0; % allocate fast table
ch2code('=') = TOK.eq; % for single char tokens
ch2code('+') = TOK.add; % watch out for ':' !!!
ch2code('-') = TOK.sub;
ch2code('*') = TOK.mul;
ch2code('/') = TOK.div;
ch2code('\') = TOK.vid;
ch2code('(') = TOK.lp;
ch2code(')') = TOK.rp;
ch2code(';') = TOK.semi;
% Allocate storage for the token sequences
len = uint16(length(src)); % more than enough
tcode = zeros(1,len, 'uint8'); % preallocate
tstart = zeros(1,len, 'uint16');
tend = zeros(1,len, 'uint16');
ti = uint16(0); % token index
ci = uint16(1); % character index
src = [src EOF]; % append eof
% Lex the input
while true
st = ci;
ch = src(ci);
if ch<1 || ch>EOF
error(['unexpected input character ' num2str(ch)]);
elseif isspace(ch) % blank
ci = ci+1;
continue; % ignore whitespace
elseif isletter(ch)
while isalnum(ch) || ch == '_' % identifier
ci = ci+1;
ch = src(ci);
end
tok = TOK.id;
elseif isdigit(ch) % number
while isdigit(ch)
ci = ci+1;
ch = src(ci);
end
tok = TOK.int;
elseif ch2code(ch) ~= 0 % single char tokens
ci = ci+1;
tok = ch2code(ch);
elseif ch == EOF % eof
tok = TOK.eof;
else
error(['unexpected input character ''' ch '''']);
end
if ti == numel(tcode) % need more storage
tcode(len+len) = 0; % double it
tstart(len+len) = 0;
tend(len+len) = 0;
end
ti = ti+1; % make room
tcode(ti) = tok; % record token info
tstart(ti) = st;
tend(ti) = ci-1; % ci starts next token
if tok == TOK.eof, break; end % quit
end
% Clean up
tcode = tcode(1:ti); % trim results
tstart = tstart(1:ti);
tend = tend(1:ti);
|
|
Contact us at files@mathworks.com