Code covered by the BSD License  

Highlights from
Extended Brookshear Machine emulator and assembler

image thumbnail

Extended Brookshear Machine emulator and assembler

by

 

05 Jan 2009 (Updated )

Emulator and assembler for a simple computer, a teaching aid for computer science courses.

assembler(file)
function machinecode = assembler(file)
%ASSEMBLER is an assembler for the extended Brookshear machine
%   MACHINECODE = ASSEMBLER(FILE) reads assembly language from the file
%   whose name is the argument. If this compiles successfully, a string
%   containing machine code instructions is returned.
%
%   The language for the input file is specified in ASSEMBHELP.M. The
%   format of the output is the same as that of a memory file as specified
%   in BMHELP.m.
%
%   See also ASSEMBHELP, BMHELP

%   Copyright 2008 University of Sussex and David Young

assembly = readtext(file);

% split input into lines - retain empty lines
lines = regexp(assembly, '\n', 'split');

nl = char(10);
address = 0;
machinecode = ['// Brookshear Machine, assembled from ' file nl '// at ' datestr(now) nl];
occupied = zeros(256);
labels = newlabels;

for i = 1:length(lines)
    line = lines{i};
    % initial split of line into fields
    [label, op, args] = parseline(line);
    
    if ~isempty(label)
        [address, labels] = notelabel(address, label, labels);
    end
    
    if isequal(upper(op), 'DATA')
        datastr = parsedata(args);
        nbytes = length(datastr)/2;
        occupied = checkmem(occupied, address, nbytes);
        machinecode = [machinecode dec2hex(address, 2) ': ' datastr];
        address = address + nbytes;
        
    elseif ~isempty(op)
        occupied = checkmem(occupied, address, 2);
        
        % get machine code for command and extend output
        [code, target_label] = parsecmd(op, args);
        tgtpos = length(machinecode) + 7;  % possible label position
        machinecode = [machinecode dec2hex(address, 2) ': ' code];
        address = address + 2;
        
        if ~isempty(target_label)
            labels = notetarget(tgtpos, target_label, labels);
        end
    end
    
    % add source code as comments. Note regexp('','^$') returns []
    if ~isempty(line) && isempty(regexp(line, '^\s*(?://|$)', 'once'))
        line = [' // ' line];
    end
    machinecode = [machinecode line nl];
end

machinecode = fixlabels(labels, machinecode);

end


function occupied = checkmem(occupied, address, nbytes)
if address > 256-nbytes
    throw(MException('bmachine:assembler:outofmem', ...
        ['Memory limit of 256 exceeded: ' line]));
end
if any(occupied(address+(1:nbytes)))
    throw(MException('bmachine:assembler:overwrite', ...
        ['Memory overwritten: ' line]));
end
occupied(address+(1:nbytes)) = true;
end


function [label, op, args] = parseline(line)
label = ''; op = ''; args = '';

line = regexprep(line, '\s*//.*$', '');        % decomment
labelpatt = '^\s*(?<label>\w*:)?';
exprpatt = '\s*(?<expr>\S.*)?$';
labelexp = regexp(line, [labelpatt exprpatt], 'names', 'once');

if ~isempty(labelexp)
    if length(labelexp.label) > 1
        label = labelexp.label(1:end-1);        % omit the colon
    end
    if ~isempty(labelexp.expr)
        oppatt = '^(?<op>[a-zA-Z]*)';
        argpatt = '(?<args>\W.*)?$';
        opargs = regexp(labelexp.expr, [oppatt argpatt], 'names', 'once');
        if isempty(opargs) || isempty(opargs.op)
            throw(MException('bmachine:assembler:parseline', ...
                ['Invalid operator in ' line]));
        end
        op = opargs.op;
        args = regexprep(opargs.args, {'^\s*' '\s*$'}, '');
    end
end

end


function labels = newlabels
labels.list = {};
labels.addr = {};
labels.refs = {};
end


function [address, labels] = notelabel(address, label, labels)

if ~isempty(regexp(label, '^[0-9a-fA-F]{2}$', 'once'))
    % have an address rather than a text label
    address = hex2dec(label);
    
elseif ~isempty(regexp(label, '^[a-zA-Z]\w{3,}$', 'once'))
    [known, loc] = ismember(label, labels.list);
    if known
        if ~isempty(labels.addr{loc})
            throw(MException('bmachine:assembler:reuselabel', ...
                ['Label used more than once: ' label]));
        end
    else
        loc = length(labels.list) + 1;
        labels.list{loc} = label;
        labels.refs{loc} = [];
    end
    labels.addr{loc} = address;
    
else
    throw(MException('bmachine:assembler:labelformat', ...
        ['Invalid label format: ' label]));
end

end


function labels = notetarget(tgtpos, label, labels)
[known, loc] = ismember(label, labels.list);
if ~known  
    loc = length(labels.list) + 1;
    labels.list{loc} = label;
    labels.addr{loc} = [];
    labels.refs{loc} = [];
end
labels.refs{loc} = [labels.refs{loc} tgtpos];
end


function machinecode = fixlabels(labels, machinecode)
for i = 1:length(labels.list)
    if isempty(labels.addr{i})
        throw(MException('bmachine:assembler:undef_label', ...
            ['Undefined label: ' labels.list{i}]));
    end
    addstr = dec2hex(labels.addr{i}, 2);
    refs = labels.refs{i};
    for j = 1:length(refs)
        pos = refs(j);
        machinecode(pos:pos+1) = addstr;
    end
end
end


function datastr = parsedata(args)
if ~isempty(regexp(args, '^".*"$', 'once'))
    s = (dec2hex(args(2:end-1), 2))';
    datastr = [(s(:))' '00'];    % null-terminated string
else
    data = regexp(args, '\s*,\s*', 'split');
    datastr = '';
    for i = 1:length(data)
        [a, mode, label] = parsearg(data{i});
        if ~isequal(mode, 'immediate') || ~isempty(label)
            throw(MException('bmachine:assembler:baddata', ...
                ['Invalid data: ' args]));
        end
        datastr = [datastr a];
    end
end
end


function [code, target_label] = parsecmd(op, args)
ops = {'NOP' 'MOV' 'ADDI' 'ADDF' 'OR' 'AND' 'XOR' 'ROT' 'JMP' 'JMPEQ' ...
    'JMPNE' 'JMPGE' 'JMPLE' 'JMPGT' 'JMPLT' 'HALT'};
parsefns = {@NOP @MOV @ADDI @ADDF @OR @AND @XOR @ROT @JMP @JMPEQ ...
    @JMPNE @JMPGE @JMPLE @JMPGT @JMPLT @HALT};
[isop, opno] = ismember(upper(op), ops);
if ~isop
    throw(MException('bmachine:assembler:parsecmd', ...
        ['Unknown operator ' op]));
end
parseargs = parsefns{opno};
[code, target_label] = parseargs(args);

end

function [code, target_label] = NOP(args)
if ~isempty(args)
    throw(MException('bmachine:assembler:NOP', ...
        ['NOP should have no arguments, found: ' args]));
end
code = '0FFF';
target_label = '';
end

function [code, target_label] = MOV(args)
pargs = regexp(args, '^(\S+)\s*(?:->)\s*(\S+)$', 'tokens', 'once');
if isempty(pargs)
    throw(MException('bmachine:assembler:MOV', ...
        ['MOVE arguments not well formed: ' args]));
end
[a1, m1, l1] = parsearg(pargs{1});
[a2, m2, l2] = parsearg(pargs{2});
target_label = '';
if isequal(m1, 'register')
    switch m2
        case 'immediate'
            throw(MException('bmachine:assembler:MOV', ...
                ['Invalid destination for move ' args]));
        case 'register'
            code = ['40' a1 a2];
        case 'direct'
            code = ['3' a1 a2];
            target_label = l2;
        case 'indirect'
            code = ['E0' a1 a2];
    end
else
    if ~isequal(m2, 'register')
        throw(MException('bmachine:assembler:MOV', ...
            ['Value can only be moved to register ' args]));
    end
    switch m1
        case 'immediate'
            code = ['2' a2 a1];
        case 'direct'
            code = ['1' a2 a1];
        case 'indirect'
            code = ['D0' a2 a1];
    end
    target_label = l1;
end
end

function [code, target_label] = ADDI(args)
code = ['5' parseopargs(args)];
target_label = '';
end

function [code, target_label] = ADDF(args)
code = ['6' parseopargs(args)];
target_label = '';
end

function [code, target_label] = OR(args)
code = ['7' parseopargs(args)];
target_label = '';
end

function [code, target_label] = AND(args)
code = ['8' parseopargs(args)];
target_label = '';
end

function [code, target_label] = XOR(args)
code = ['9' parseopargs(args)];
target_label = '';
end

function regstring = parseopargs(args)
% Parse arguments to a register operation instruction, of the form
%       OP R1, R2 -> R3
toks = regexp(args, '^R([0-9a-fA-F])\s*,\s*R([0-9a-fA-F])\s*(?:->)\s*R([0-9a-fA-F])$', ...
    'tokens', 'once');
if isempty(toks)
    throw(MException('bmachine:assembler:opargs', ...
        ['Invalid arguments for register operation: ' args]));
end
regstring = [toks{3} toks{1} toks{2}];
end

function [code, target_label] = ROT(args)
toks = regexp(args, '^R([0-9a-fA-F])\s*,\s*([0-7])$', 'tokens', 'once');
if isempty(toks)
    throw(MException('bmachine:assembler:rotargs', ...
        ['Invalid arguments for ROT: ' args]));
end
code = ['A' toks{1} '0' toks{2}];
target_label = '';
end

function [code, target_label] = JMP(args)
[a, mode, label] = parsearg(args, true);    % set hex only flag
if isequal(mode, 'immediate')
    code = ['B0' a];
elseif isequal(mode, 'register')
    code = ['F00' a];
else
    throw(MException('bmachine:assembler:JMP', ...
        ['Invalid target for JMP: ' args]));
end
target_label = label;
end

function [code, target_label] = JMPEQ(args)
[arg1, arg2] = parsejmpargs(args);
[a1, mode1, label1] = parsearg(arg1, true);    % set hex only flag
[a2, mode2, label2] = parsearg(arg2, true); %#ok<*NASGU>
if ~isequal(mode2, 'register')
    throw(MException('bmachine:assembler:JMPEQ', ...
        ['JMPEQ requires register for test: ', args]));
end
if isequal(mode1, 'immediate')
    code = ['B' a2 a1];
elseif isequal(mode1, 'register')
    code = ['F' a2 '0' a1];
else
    throw(MException('bmachine:assembler:JMP', ...
        ['Invalid target for JMPEQ: ' args]));
end
target_label = label1;
end

function [code, target_label] = JMPNE(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '1' a1];
target_label = '';
end

function [code, target_label] = JMPGE(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '2' a1];
target_label = '';
end

function [code, target_label] = JMPLE(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '3' a1];
target_label = '';
end

function [code, target_label] = JMPGT(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '4' a1];
target_label = '';
end

function [code, target_label] = JMPLT(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '5' a1];
target_label = '';
end

function [a2, a1] = jmptst(args)
[arg1, arg2] = parsejmpargs(args);
[a1, mode1, label1] = parsearg(arg1, true);  
[a2, mode2, label2] = parsearg(arg2, true);
if ~isequal(mode1, 'register') || ~isequal(mode2, 'register')
    throw(MException('bmachine:assembler:jmptst', ...
        ['Jump with test requires registers: ', args]));
end
end

function [a1, a2] = parsejmpargs(args)
pargs = regexp(args, '^(\S+)\s*,\s*(\S+)$', 'tokens', 'once');
if isempty(pargs)
    throw(MException('bmachine:assembler:jmpargs', ...
        ['Invalid format for jump arguments ' args]));
end
a1 = pargs{1};
a2 = pargs{2};
end


function [a, mode, label] = parsearg(arg, hexonly)
% Parses a single argument. Result a is 1 or 2 hex digits, mode is
% 'immediate', 'direct', 'register' or 'indirect'. A label may be used in
% immediate or direct mode - it is then returned in the label result and a is '00'.
% If second argument is given and is true, only hex values may be given
label = '';

% hex
toks = regexp(arg, '^([0-9a-fA-F]{2})h?$', 'tokens', 'once');
if ~isempty(toks)
    a = upper(toks{1});
    mode = 'immediate';
    return
end

if nargin < 2 || ~hexonly

    % binary
    toks = regexp(arg, '^([01]{8})b?$', 'tokens', 'once');
    if ~isempty(toks)
        a = dec2hex(bin2dec(toks{1}), 2);
        mode = 'immediate';
        return
    end

    % ascii
    toks = regexp(arg, '^"(.)"$', 'tokens', 'once');
    if ~isempty(toks)
        c = toks{1};
        if c > 127
            throw(MException('bmachine:assembler:parsearg', ...
                ['Non-ascii character: ' arg]));
        end
        a = dec2hex(c, 2);
        mode = 'immediate';
        return
    end

    % decimal integer
    toks = regexp(arg, '^([+-][0-9]+)d?$|^([0-9])$', 'tokens', 'once');
    if ~isempty(toks)
        a = dec2hex(dble2int8(str2double(toks{1})), 2);
        mode = 'immediate';
        return
    end

    % float
    toks = regexp(arg, '^([+-]?[0-9]+\.[0-9]*)$', 'tokens', 'once');
    if ~isempty(toks)
        a = dec2hex(dble2f8(str2double(toks{1})), 2);
        mode = 'immediate';
        return
    end

end

% label
toks = regexp(arg, '^([a-zA-Z]\w{3,})$', 'tokens', 'once');
if ~isempty(toks)
    label = toks{1};
    a = '00';
    mode = 'immediate';
    return
end

% register
toks = regexp(arg, '^R([a-fA-F0-9])$', 'tokens', 'once');
if ~isempty(toks)
    a = upper(toks{1});
    mode = 'register';
    return
end

% direct memory reference
toks = regexp(arg, '^\[([a-fA-F0-9]{2})\]$', 'tokens', 'once');
if ~isempty(toks)
    a = upper(toks{1});
    mode = 'direct';
    return
end

% direct memory reference - label
toks = regexp(arg, '^\[([a-zA-Z]\w{3,})\]$', 'tokens', 'once');
if ~isempty(toks)
    label = toks{1};
    a = '00';
    mode = 'direct';
    return
end

% register indirect
toks = regexp(arg, '^\[R([a-fA-F0-9])\]$', 'tokens', 'once');
if ~isempty(toks)
    a = upper(toks{1});
    mode = 'indirect';
    return
end

throw(MException('bmachine:assembler:parsearg', ...
    ['Unrecognised argument format: ' arg]));

end


function [code, target_label] = HALT(args)
if ~isempty(args)
    throw(MException('bmachine:assembler:HALT', ...
        ['HALT should have no arguments, found: ' args]));
end
code = 'C000';
target_label = '';
end

Contact us