%GEM6 Machine to execute self-describing grammars
% G = gem6(); builds a GEM object.
% G.self0 is a self-describing grammar
% G.self as above, preprocessed for use
% G.self1 as above, using long phrase names
% G.postfix0 describes infix to postfix transformation
% G.postfix as above, preprocessed for use
% G.nostar1 transforms x* to X, <x>* to <x*>
% G.nostar2 adds rules X=xX;x=;
% G.nostar3 adds rules <x*>=<x><x*>;<x*>=;
% G.noplus replaces x+ and <x>+ with xx* and <x><x>*
% G.pretty0 a grammar pretty printer
% G.pretty as above, preprocessed for use
% G.invert grammar inverter
% G.nowhite deblanker
% G.scan function to call deblanker
% G.exe function to run X86 code
% G.run function to execute o=GEM(i,g,flags)
% EXAMPLES:
% G = gem6(); % constructor
% G.scan('x y z') % deblank a string
% G.self1 % a version of self
% G.run(G.scan(G.self), G.self, 'dlua')% self on self yields ''
% G.run(G.self0, G.pretty, 'DLUA') % pretty self
% G.run('', 'g="1";') % prints '1'
% G.run('', 'g="1";', 'T') % example of trace
% G.exe('5589E5606133C0C9C3') % execute X86 code
% COPYRIGHT: 1988 W. M. McKeeman. See license.txt.
% MODS: 1988.06.02 -- mckeeman -- original
% 2008.06.11 -- mckeeman@dartmouth -- moved to MEX
% 2009.05.04 -- lu.he@dartmouth -- major extension
function obj = gem6()
EOL = char(10); % ascii end-of-line
% ------- input grammars -----------------
% de-white-ed de-whiter-er
% removes blank and EOL, may fail on bad input
nowhite = [ % requires flag A
'<g>=<s><g>;' ...
'<g>=;' ...
'<s>='' '';' ... discard blank
'<s>=''' EOL ''';' ... discard EOL
'<s>=<I><I><I>;' ... keep '''
'<s>=<I><i>;' ... keep 'input stuff'
'<s>=<O><O><O>;' ... keep """
'<s>=<O><o>;' ... keep "output stuff"
'<s>=A;' ... keep everything else
'<I>=''''''"''";' ... see ', send '
'<i>=<I>;' ... comes after input stuff
'<i>=A<i>;' ... keep an input char
'<O>=''"''""";' ... see ", send "
'<o>=<O>;' ... comes after output stuff
'<o>=A<o>;' ... expand one output char
];
% Grammar-grammar for extended IOG;
% must be preprocessed by nostar1, nostar2, nostar3 and nodup;
self0 = [ % requires flag luad
'<g> = <r>*;' EOL ...
'<r> = <n> ''='' <s>* '';'';' EOL ...
'<n> = l;' EOL ...
'<n> = ''<'' l n* ''>'';' EOL ...
'<n> = ''<'' l ''*>'';' EOL ...
'n = l;' EOL ...
'n = d;' EOL ...
'n = ''_'';' EOL ...
'<s> = i i i;' EOL ...
'<s> = i <i>;' EOL ...
'<s> = o o o;' EOL ...
'<s> = o <o>;' EOL ...
'<s> = l ''*'';' EOL ...
'<s> = l ''+'';' EOL ...
'<s> = ''<'' l ''>'' ''*'';' EOL ...
'<s> = ''<'' l ''>'' ''+'';' EOL ...
'<s> = <n>;' EOL ...
'i = '''''';' EOL ...
'<i> = i;' EOL ...
'<i> = a <i>;' EOL ...
'o = ''"'';' EOL ...
'<o> = o;' EOL ...
'<o> = a <o>;' EOL ...
];
% Same as self0, except uses multi-character phrase names
self1 = [ % requires flag luad
'<self> = <rules>;' EOL ...
'<rules> = <rule> <rules>;' EOL ...
'<rule> = ;' EOL ...
'<rule> = <name> ''='' <symbols> '';'';' EOL ...
'<name> = l;' EOL ...
'<name> = ''<'' l n* ''>'';' EOL ...
'<name> = ''<'' l ''*>'';' EOL ...
'n = l;' EOL ...
'n = d;' EOL ...
'n = ''_'';' EOL ...
'<symbols> = <symbols> <symbol>;' EOL ...
'<symbols> = ;' EOL ...
'<symbol> = i i i;' EOL ...
'<symbol> = i <any_in>;' EOL ...
'<symbol> = o o o;' EOL ...
'<symbol> = o <any_out>;' EOL ...
'<symbol> = ''<'' l ''>'' ''*'';' EOL ...
'<symbol> = ''<'' l ''>'' ''+'';' EOL ...
'<symbol> = l ''*'';' EOL ...
'<symbol> = l ''+'';' EOL ...
'<symbol> = <name>;' EOL ...
'<any_in> = i;' EOL ...
'<any_in> = a <any_in>;' EOL ...
'i = '''''';' EOL ...
'<any_out> = o;' EOL ...
'<any_out> = a <any_out>;' EOL ...
'o = ''"'';' EOL ...
];
% an IOG pretty printer
% Handles extra and missing whitespace, * and +
% Leaves multicharacter inputs and outputs unchanged
% Requires preprocessing by nostar1, nostar2 and nostar3
% <g> is a grammar, <r> is a rule,
% <s> is a symbol, <p> is a phrase name,
% b is whitespace
pretty0 = [ % needs flag LUDA
'<g> = b* <r>*;' EOL ... rules
'<r> = <p> b* ''='' " =" <s>* b* '';'' b* ";' EOL '";' EOL ...
'<p> = L;' EOL ... phrase name
'<p> = ''<'' "<" L n* ''>'' ">";' EOL ... <stuff>
'<p> = ''<'' "<" L ''*>'' "*>";' EOL ... <s*>
'n = L;' EOL ... letter
'n = D;' EOL ... digit
'n = ''_'' "_";' EOL ... _
'<s> = b* " " <t>;' EOL ... symbol
'<t> = I I I;' EOL ... '''
'<t> = I <i>;' EOL ... 'stuff'
'<t> = O O O;' EOL ... """
'<t> = O <o>;' EOL ... "stuff"
'<t> = ''<'' "<" L ''>'' ">" b* ''*'' "*";' EOL ... <s>*
'<t> = ''<'' "<" L ''>'' ">" b* ''+'' "+";' EOL ... <s>+
'<t> = L b* ''*'' "*";' EOL ... s*
'<t> = L b* ''+'' "+";' EOL ... s+
'<t> = <p>;' EOL ... phrase name
'I = '''''' "''";' EOL ... see ', send '
'<i> = I;' EOL ... end of 'stuff'
'<i> = A <i>;' EOL ... an in char
'O = ''"'' """;' EOL ... see ", send "
'<o> = O;' EOL ... end of "stuff"
'<o> = A <o>;' EOL ... an out char
'b = '' '';' EOL ... see blank
'b = ''' EOL ''';' EOL ... see EOL
];
% invert the input/output of a grammar
% no need to deblank input
invert = [ % requires flag A
'g = s g;' EOL ...
'g = ;' EOL ...
's = I I I;' EOL ...
's = I <i>;' EOL ...
's = O O O;' EOL ...
's = O <o>;' EOL ...
's = A;' EOL ...
'I = '''''' """;' EOL ... see ', send "
'<i> = I;' EOL ...
'<i> = A <i>;' EOL ...
'O = ''"'' "''";' EOL ... see ", send '
'<o> = O;' EOL ...
'<o> = A <o>;' EOL ...
];
% expr with * to operator-late PFN
% input must be deblanked
postfix0 = [ % requires flag LD
'g = e;' EOL ...
'e = t r*;' EOL ...
'r = ''+'' t "+";' EOL ...
'r = ''-'' t "-";' EOL ...
't = f s*;' EOL ...
's = ''*'' f "*";' EOL ...
's = ''/'' f "/";' EOL ...
'f = L;' EOL ...
'f = D;' EOL ...
'f = ''('' e '')'';' EOL ...
];
% replace x* with X in a grammar (only lower)
% replace <x>* with <x*> in a grammar
nostar1 = [ % requires flag ALUD
'<g> = <r*>;' EOL ...
'<r> = <p> ''='' "=" <s*> '';'' ";' EOL '";' EOL ...
'<p> = L;' EOL ...
'<p> = ''<'' "<" L N ''>'' ">";' EOL ...
'<p> = ''<'' "<" L ''*>'' "*>";' EOL ...
'n = L;' EOL ...
'n = D;' EOL ...
'n = ''_'' "_";' EOL ...
'<s> = I I I;' EOL ...
'<s> = I <i>;' EOL ...
'<s> = O O O;' EOL ...
'<s> = O <o>;' EOL ...
'<s> = <p>;' EOL ...
sprintf('<s>=''%c*''"%c";', ...
shuffle('a':'z','A':'Z')) EOL ...
sprintf('<s>=''<%c>*''"<%c*>";', ...
repeat(['a':'z','A':'Z'], 2)) EOL ...
'I = '''''' "''";' EOL ...
'<i> = I;' EOL ...
'<i> = A <i>;' EOL ...
'O = ''"'' """;' EOL ...
'<o> = O;' EOL ...
'<o> = A <o>;' EOL ...
'b = '' '';' EOL ...
'b = ''' EOL ''';' EOL ...
'B = b B;' EOL ...
'B =;' EOL ...
'N = n N;' EOL ...
'N =;' EOL ...
'<r*>=<r><r*>;' EOL ...
'<r*>=;' EOL ...
'<s*>=<s><s*>;' EOL ...
'<s*>=;' EOL ...
];
% make new rules to define a*
% 'x*' goes to "X=xX;X=;";
% input must be deblanked
nostar2 = [ % requires flag alud
'<g> = <r> <g>;' EOL ...
'<g> =;' EOL ...
'<r> = l ''='' <f> '';'';' EOL ...
'<r> = <n> ''='' <f> '';'';' EOL ...
'<n> = ''<'' l <m> ''>'';' EOL ...
'<n> = ''<'' l ''*>'';' EOL ...
'<m> = n <m>;' EOL ...
'<m> =;' EOL ...
'n = l;' EOL ...
'n = d;' EOL ...
'n = ''_'';' EOL ...
'<f> = <p> <f>;' EOL ...
'<f> =;' EOL ...
'<p> = i i i;' EOL ...
'<p> = i <i>;' EOL ...
'<p> = o o o;' EOL ...
'<p> = o <o>;' EOL ...
'<p> = s;' EOL ...
'<p> = ''<'' l ''>'' ''*'';' EOL ...
'<p> = l;' EOL ...
'<p> = <n>;' EOL ...
'i = '''''';' EOL ...
'<i> = i;' EOL ...
'<i> = a <i>;' EOL ...
'o = ''"'';' EOL ...
'<o> = o;' EOL ...
'<o> = a <o>;' EOL ...
sprintf(sprintf('s=''%c*''"%%c=%c%%c;%%c=;";', ...
repeat('a':'z',2)),repeat('A':'Z',3)) EOL ...
];
% make new rules to define <x>*
% '<x>*' goes to "<x*>=<x><x*>;<x*>=;"
% requires deblanked input
nostar3 = [ % requires flag alud
'<g> = <r> <g>;' EOL ...
'<g> =;' EOL ...
'<r> = l ''='' <f> '';'';' EOL ...
'<r> = <n> ''='' <f> '';'';' EOL ...
'<n> = ''<'' l <m> ''>'';' EOL ...
'<n> = ''<'' l ''*>'';' EOL ...
'<m> = n <m>;' EOL ...
'<m> =;' EOL ...
'n = l;' EOL ...
'n = d;' EOL ...
'n = ''_'';' EOL ...
'<f> = <p> <f>;' EOL ...
'<f> =;' EOL ...
'<p> = i i i;' EOL ...
'<p> = i <i>;' EOL ...
'<p> = o o o;' EOL ...
'<p> = o <o>;' EOL ...
'<p> = l ''*'';' EOL ...
'<p> = l;' EOL ...
'<p> = s;' EOL ...
'<p> = <n>;' EOL ...
'i = '''''';' EOL ...
'<i> = i;' EOL ...
'<i> = a <i>;' EOL ...
'o = ''"'';' EOL ...
'<o> = o;' EOL ...
'<o> = a <o>;' EOL ...
sprintf('s=''<%c>*''"<%c*>=<%c><%c*>;<%c*>=;";', ...
repeat(['a':'z' 'A':'Z'],5)) EOL ...
];
% replace x+ in a grammar with xx* (only lower)
% replaces <x>+ in a grammar with <x><x>* (upper or lower)
noplus = [ % requires flag ALUD
'<g> = <r*>;' EOL ...
'<r> = <p> ''='' "=" <s*> '';'' ";' EOL '";' EOL ...
'<p> = L;' EOL ...
'<p> = ''<'' "<" L <m> ''>'' ">";' EOL ...
'<p> = ''<'' "<" L ''*>'' "*>";' EOL ...
'<m> = n <m>;' EOL ...
'<m> =;' EOL ...
'n = L;' EOL ...
'n = D;' EOL ...
'n = ''_'' "_";' EOL ...
'<s> = I I I;' EOL ...
'<s> = I <i>;' EOL ...
'<s> = O O O;' EOL ...
'<s> = O <o>;' EOL ...
'<s> = L ''*'' "*";' EOL ...
'<s> = ''<'' "<" L ''>'' ">" ''*'' "*";' EOL ...
sprintf('<s>=''%c+''"%c%c*";', ...
repeat('a':'z',3)) EOL ...
sprintf('<s>=''<%c>+''"<%c><%c>*";', ...
repeat(['a':'z','A':'Z'],3)) EOL ...
'<s> = <p>;' EOL ...
'I = '''''' "''";' EOL ...
'O = ''"'' """;' EOL ...
'<i> = I;' EOL ...
'<i> = A <i>;' EOL ...
'<o> = O;' EOL ...
'<o> = A <o>;' EOL ...
'<s*>=<s><s*>;' EOL ...
'<s*>=;' EOL ...
'<r*>=<r><r*>;' EOL ...
'<r*>=;' EOL ...
];
% convert ascii HEX characters into binary
% Note: fails in MEX interface because of imbedded nulls
% mxArrayToString truncates the input at the null (MATLAB bug)
char2hex = ['g=hg;' EOL 'g=;' EOL];
v = 0;
for i=['0':'9' 'A':'F'];
for j=['0':'9' 'A':'F']
char2hex = [char2hex 'h=''' i j ''' "' v '";' EOL]; %#ok<AGROW>
v = v + 1;
end;
end;
% Apply the * and + removing transformations.
% The following integers are needed for nodup
letlen = numel('X=xX;x=;');
bnflen = numel('<x*>=<x><x*>;<x*>=;');
% transform away the *
s = scan(self0);
selfgrammar = run(s, nostar1, 'LUA');
selfrules2 = run(s, nostar2, 'lua');
selfrules3 = run(s, nostar3, 'lua');
self = [selfgrammar selfrules2 selfrules3];
% transform away the *
s = scan(pretty0);
prettygrammar = run(s, nostar1, 'LUA');
prettyrules2 = nodup(run(s, nostar2, 'lua'), letlen);
prettyrules3 = nodup(run(s, nostar3, 'lua'), bnflen);
pretty = [prettygrammar prettyrules2 prettyrules3];
% transform away the *
s = scan(postfix0);
post1 = run(s, nostar1, 'LUA');
post2 = run(s, nostar2, 'lua');
postfix = [post1 post2];
obj = public();
%return;
function otxt = run(itxt, gtxt, flags)
if nargin == 2, flags = ''; end;
otxt = iog6(itxt, gtxt, flags); % call mex
end
function otxt = scan(itxt)
otxt = iog6(itxt, nowhite, 'A');
end
% Remove duplicates of length sz from the input.
% In a loop,
% 1. capture and remove the first n chars of input
% 2. construct an IOG ignoring the capture and passing the rest
% 3. apply the IOG, then repeat the process on the result.
% 4. the concatenation of the "captures" is the answer.
function final = nodup(dups, sz)
assert(mod(numel(dups),sz)==0);
final = '';
subs = ['p=' char(zeros(1,sz)+'A') ';'];
while numel(dups) > 0
next = dups(1:sz);
dups(1:sz) = [];
gr = ['g=pg;g=;p=''' next ''';' subs];
final = [final next]; %#ok<AGROW>
dups = run(dups, gr, 'A'); % gr needs 'A'
end
end
% Pack MATLAB chars into hexadecimal.
% This is a case of using MATLAB to do something essential, which
% apparently violates the stated challenge of bootstrapping everything.
% GEM is actually capable of this transformation (see char2hex)
% but MATLAB/C interface cannot transmit it.
% The problem is that machine language has lots of zeros (ascii NULL)
% which truncate the output.
% The MEX limitation could be got around, but it is not worth the effort.
function res = val(hexdigit)
if '0'<=hexdigit && hexdigit<='9', res = hexdigit-'0';
else res = hexdigit - 'A' + 10;
end
end
% Run hex string on underlying hardware.
% Squeeze 2 MATLAB chars into each code byte.
function rc = exe(hex)
switch computer
case {'PCWIN', 'GLNX86', 'MACI'}
otherwise
error('Intel x86 hardware required');
end
nbytes = numel(hex)/2;
code = zeros(1,nbytes,'uint8');
for pc = 1:nbytes % squeeze out leading zeros
b1 = val(hex(2*pc-1));
b2 = val(hex(2*pc));
code(pc) = bitor(bitshift(b1,4),b2);
end
rc = nonox(code);
rc = runX86(code);
end
function o = public()
o = struct;
o.self0 = self0; % lookable
o.self1 = self1; % long names
o.self = self; % usable
o.postfix0 = postfix0; % lookable
o.postfix = postfix; % usable
o.nostar1 = nostar1; % x* to X, <x>* to <x*>
o.nostar2 = nostar2; % add rules for X
o.nostar3 = nostar3; % add rules for <x*>
o.noplus = noplus; % remove x+ and <x>+
o.pretty0 = pretty0; % lookable
o.pretty = pretty; % usable
o.invert = invert; % decompiler builder
o.nowhite = nowhite; % deblanker
o.scan = @scan; % deblank
o.exe = @exe; % run X86 code
o.run = @run;
end
end