Code covered by the BSD License  

Highlights from
Extended Brookshear Machine emulator and assembler

image thumbnail

Extended Brookshear Machine emulator and assembler

by

David Young (view profile)

 

05 Jan 2009 (Updated )

Emulator and assembler for a simple computer, a teaching aid for computer science courses.

assembler(file)
function machinecode = assembler(file)
%ASSEMBLER is an assembler for the extended Brookshear machine
%   MACHINECODE = ASSEMBLER(FILE) reads assembly language from the file
%   whose name is the argument. If this compiles successfully, a string
%   containing machine code instructions is returned.
%
%   The language for the input file is specified in ASSEMBHELP.M. The
%   format of the output is the same as that of a memory file as specified
%   in BMHELP.m.
%
%   See also ASSEMBHELP, BMHELP

%   Copyright 2008 University of Sussex and David Young

assembly = readtext(file);

% split input into lines - retain empty lines
lines = regexp(assembly, '\n', 'split');

nl = char(10);
address = 0;
machinecode = ['// Brookshear Machine, assembled from ' file nl '// at ' datestr(now) nl];
occupied = zeros(256);
labels = newlabels;

for i = 1:length(lines)
    line = lines{i};
    % initial split of line into fields
    [label, op, args] = parseline(line);
    
    if ~isempty(label)
        [address, labels] = notelabel(address, label, labels);
    end
    
    if isequal(upper(op), 'DATA')
        datastr = parsedata(args);
        nbytes = length(datastr)/2;
        occupied = checkmem(occupied, address, nbytes);
        machinecode = [machinecode dec2hex(address, 2) ': ' datastr];
        address = address + nbytes;
        
    elseif ~isempty(op)
        occupied = checkmem(occupied, address, 2);
        
        % get machine code for command and extend output
        [code, target_label] = parsecmd(op, args);
        tgtpos = length(machinecode) + 7;  % possible label position
        machinecode = [machinecode dec2hex(address, 2) ': ' code];
        address = address + 2;
        
        if ~isempty(target_label)
            labels = notetarget(tgtpos, target_label, labels);
        end
    end
    
    % add source code as comments. Note regexp('','^$') returns []
    if ~isempty(line) && isempty(regexp(line, '^\s*(?://|$)', 'once'))
        line = [' // ' line];
    end
    machinecode = [machinecode line nl];
end

machinecode = fixlabels(labels, machinecode);

end


function occupied = checkmem(occupied, address, nbytes)
if address > 256-nbytes
    throw(MException('bmachine:assembler:outofmem', ...
        ['Memory limit of 256 exceeded: ' line]));
end
if any(occupied(address+(1:nbytes)))
    throw(MException('bmachine:assembler:overwrite', ...
        ['Memory overwritten: ' line]));
end
occupied(address+(1:nbytes)) = true;
end


function [label, op, args] = parseline(line)
label = ''; op = ''; args = '';

line = regexprep(line, '\s*//.*$', '');        % decomment
labelpatt = '^\s*(?<label>\w*:)?';
exprpatt = '\s*(?<expr>\S.*)?$';
labelexp = regexp(line, [labelpatt exprpatt], 'names', 'once');

if ~isempty(labelexp)
    if length(labelexp.label) > 1
        label = labelexp.label(1:end-1);        % omit the colon
    end
    if ~isempty(labelexp.expr)
        oppatt = '^(?<op>[a-zA-Z]*)';
        argpatt = '(?<args>\W.*)?$';
        opargs = regexp(labelexp.expr, [oppatt argpatt], 'names', 'once');
        if isempty(opargs) || isempty(opargs.op)
            throw(MException('bmachine:assembler:parseline', ...
                ['Invalid operator in ' line]));
        end
        op = opargs.op;
        args = regexprep(opargs.args, {'^\s*' '\s*$'}, '');
    end
end

end


function labels = newlabels
labels.list = {};
labels.addr = {};
labels.refs = {};
end


function [address, labels] = notelabel(address, label, labels)

if ~isempty(regexp(label, '^[0-9a-fA-F]{2}$', 'once'))
    % have an address rather than a text label
    address = hex2dec(label);
    
elseif ~isempty(regexp(label, '^[a-zA-Z]\w{3,}$', 'once'))
    [known, loc] = ismember(label, labels.list);
    if known
        if ~isempty(labels.addr{loc})
            throw(MException('bmachine:assembler:reuselabel', ...
                ['Label used more than once: ' label]));
        end
    else
        loc = length(labels.list) + 1;
        labels.list{loc} = label;
        labels.refs{loc} = [];
    end
    labels.addr{loc} = address;
    
else
    throw(MException('bmachine:assembler:labelformat', ...
        ['Invalid label format: ' label]));
end

end


function labels = notetarget(tgtpos, label, labels)
[known, loc] = ismember(label, labels.list);
if ~known  
    loc = length(labels.list) + 1;
    labels.list{loc} = label;
    labels.addr{loc} = [];
    labels.refs{loc} = [];
end
labels.refs{loc} = [labels.refs{loc} tgtpos];
end


function machinecode = fixlabels(labels, machinecode)
for i = 1:length(labels.list)
    if isempty(labels.addr{i})
        throw(MException('bmachine:assembler:undef_label', ...
            ['Undefined label: ' labels.list{i}]));
    end
    addstr = dec2hex(labels.addr{i}, 2);
    refs = labels.refs{i};
    for j = 1:length(refs)
        pos = refs(j);
        machinecode(pos:pos+1) = addstr;
    end
end
end


function datastr = parsedata(args)
if ~isempty(regexp(args, '^".*"$', 'once'))
    s = (dec2hex(args(2:end-1), 2))';
    datastr = [(s(:))' '00'];    % null-terminated string
else
    data = regexp(args, '\s*,\s*', 'split');
    datastr = '';
    for i = 1:length(data)
        [a, mode, label] = parsearg(data{i});
        if ~isequal(mode, 'immediate') || ~isempty(label)
            throw(MException('bmachine:assembler:baddata', ...
                ['Invalid data: ' args]));
        end
        datastr = [datastr a];
    end
end
end


function [code, target_label] = parsecmd(op, args)
ops = {'NOP' 'MOV' 'ADDI' 'ADDF' 'OR' 'AND' 'XOR' 'ROT' 'JMP' 'JMPEQ' ...
    'JMPNE' 'JMPGE' 'JMPLE' 'JMPGT' 'JMPLT' 'HALT'};
parsefns = {@NOP @MOV @ADDI @ADDF @OR @AND @XOR @ROT @JMP @JMPEQ ...
    @JMPNE @JMPGE @JMPLE @JMPGT @JMPLT @HALT};
[isop, opno] = ismember(upper(op), ops);
if ~isop
    throw(MException('bmachine:assembler:parsecmd', ...
        ['Unknown operator ' op]));
end
parseargs = parsefns{opno};
[code, target_label] = parseargs(args);

end

function [code, target_label] = NOP(args)
if ~isempty(args)
    throw(MException('bmachine:assembler:NOP', ...
        ['NOP should have no arguments, found: ' args]));
end
code = '0FFF';
target_label = '';
end

function [code, target_label] = MOV(args)
pargs = regexp(args, '^(\S+)\s*(?:->)\s*(\S+)$', 'tokens', 'once');
if isempty(pargs)
    throw(MException('bmachine:assembler:MOV', ...
        ['MOVE arguments not well formed: ' args]));
end
[a1, m1, l1] = parsearg(pargs{1});
[a2, m2, l2] = parsearg(pargs{2});
target_label = '';
if isequal(m1, 'register')
    switch m2
        case 'immediate'
            throw(MException('bmachine:assembler:MOV', ...
                ['Invalid destination for move ' args]));
        case 'register'
            code = ['40' a1 a2];
        case 'direct'
            code = ['3' a1 a2];
            target_label = l2;
        case 'indirect'
            code = ['E0' a1 a2];
    end
else
    if ~isequal(m2, 'register')
        throw(MException('bmachine:assembler:MOV', ...
            ['Value can only be moved to register ' args]));
    end
    switch m1
        case 'immediate'
            code = ['2' a2 a1];
        case 'direct'
            code = ['1' a2 a1];
        case 'indirect'
            code = ['D0' a2 a1];
    end
    target_label = l1;
end
end

function [code, target_label] = ADDI(args)
code = ['5' parseopargs(args)];
target_label = '';
end

function [code, target_label] = ADDF(args)
code = ['6' parseopargs(args)];
target_label = '';
end

function [code, target_label] = OR(args)
code = ['7' parseopargs(args)];
target_label = '';
end

function [code, target_label] = AND(args)
code = ['8' parseopargs(args)];
target_label = '';
end

function [code, target_label] = XOR(args)
code = ['9' parseopargs(args)];
target_label = '';
end

function regstring = parseopargs(args)
% Parse arguments to a register operation instruction, of the form
%       OP R1, R2 -> R3
toks = regexp(args, '^R([0-9a-fA-F])\s*,\s*R([0-9a-fA-F])\s*(?:->)\s*R([0-9a-fA-F])$', ...
    'tokens', 'once');
if isempty(toks)
    throw(MException('bmachine:assembler:opargs', ...
        ['Invalid arguments for register operation: ' args]));
end
regstring = [toks{3} toks{1} toks{2}];
end

function [code, target_label] = ROT(args)
toks = regexp(args, '^R([0-9a-fA-F])\s*,\s*([0-7])$', 'tokens', 'once');
if isempty(toks)
    throw(MException('bmachine:assembler:rotargs', ...
        ['Invalid arguments for ROT: ' args]));
end
code = ['A' toks{1} '0' toks{2}];
target_label = '';
end

function [code, target_label] = JMP(args)
[a, mode, label] = parsearg(args, true);    % set hex only flag
if isequal(mode, 'immediate')
    code = ['B0' a];
elseif isequal(mode, 'register')
    code = ['F00' a];
else
    throw(MException('bmachine:assembler:JMP', ...
        ['Invalid target for JMP: ' args]));
end
target_label = label;
end

function [code, target_label] = JMPEQ(args)
[arg1, arg2] = parsejmpargs(args);
[a1, mode1, label1] = parsearg(arg1, true);    % set hex only flag
[a2, mode2, label2] = parsearg(arg2, true); %#ok<*NASGU>
if ~isequal(mode2, 'register')
    throw(MException('bmachine:assembler:JMPEQ', ...
        ['JMPEQ requires register for test: ', args]));
end
if isequal(mode1, 'immediate')
    code = ['B' a2 a1];
elseif isequal(mode1, 'register')
    code = ['F' a2 '0' a1];
else
    throw(MException('bmachine:assembler:JMP', ...
        ['Invalid target for JMPEQ: ' args]));
end
target_label = label1;
end

function [code, target_label] = JMPNE(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '1' a1];
target_label = '';
end

function [code, target_label] = JMPGE(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '2' a1];
target_label = '';
end

function [code, target_label] = JMPLE(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '3' a1];
target_label = '';
end

function [code, target_label] = JMPGT(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '4' a1];
target_label = '';
end

function [code, target_label] = JMPLT(args)
[a2, a1] = jmptst(args);
code = ['F' a2 '5' a1];
target_label = '';
end

function [a2, a1] = jmptst(args)
[arg1, arg2] = parsejmpargs(args);
[a1, mode1, label1] = parsearg(arg1, true);  
[a2, mode2, label2] = parsearg(arg2, true);
if ~isequal(mode1, 'register') || ~isequal(mode2, 'register')
    throw(MException('bmachine:assembler:jmptst', ...
        ['Jump with test requires registers: ', args]));
end
end

function [a1, a2] = parsejmpargs(args)
pargs = regexp(args, '^(\S+)\s*,\s*(\S+)$', 'tokens', 'once');
if isempty(pargs)
    throw(MException('bmachine:assembler:jmpargs', ...
        ['Invalid format for jump arguments ' args]));
end
a1 = pargs{1};
a2 = pargs{2};
end


function [a, mode, label] = parsearg(arg, hexonly)
% Parses a single argument. Result a is 1 or 2 hex digits, mode is
% 'immediate', 'direct', 'register' or 'indirect'. A label may be used in
% immediate or direct mode - it is then returned in the label result and a is '00'.
% If second argument is given and is true, only hex values may be given
label = '';

% hex
toks = regexp(arg, '^([0-9a-fA-F]{2})h?$', 'tokens', 'once');
if ~isempty(toks)
    a = upper(toks{1});
    mode = 'immediate';
    return
end

if nargin < 2 || ~hexonly

    % binary
    toks = regexp(arg, '^([01]{8})b?$', 'tokens', 'once');
    if ~isempty(toks)
        a = dec2hex(bin2dec(toks{1}), 2);
        mode = 'immediate';
        return
    end

    % ascii
    toks = regexp(arg, '^"(.)"$', 'tokens', 'once');
    if ~isempty(toks)
        c = toks{1};
        if c > 127
            throw(MException('bmachine:assembler:parsearg', ...
                ['Non-ascii character: ' arg]));
        end
        a = dec2hex(c, 2);
        mode = 'immediate';
        return
    end

    % decimal integer
    toks = regexp(arg, '^([+-][0-9]+)d?$|^([0-9])$', 'tokens', 'once');
    if ~isempty(toks)
        a = dec2hex(dble2int8(str2double(toks{1})), 2);
        mode = 'immediate';
        return
    end

    % float
    toks = regexp(arg, '^([+-]?[0-9]+\.[0-9]*)$', 'tokens', 'once');
    if ~isempty(toks)
        a = dec2hex(dble2f8(str2double(toks{1})), 2);
        mode = 'immediate';
        return
    end

end

% label
toks = regexp(arg, '^([a-zA-Z]\w{3,})$', 'tokens', 'once');
if ~isempty(toks)
    label = toks{1};
    a = '00';
    mode = 'immediate';
    return
end

% register
toks = regexp(arg, '^R([a-fA-F0-9])$', 'tokens', 'once');
if ~isempty(toks)
    a = upper(toks{1});
    mode = 'register';
    return
end

% direct memory reference
toks = regexp(arg, '^\[([a-fA-F0-9]{2})\]$', 'tokens', 'once');
if ~isempty(toks)
    a = upper(toks{1});
    mode = 'direct';
    return
end

% direct memory reference - label
toks = regexp(arg, '^\[([a-zA-Z]\w{3,})\]$', 'tokens', 'once');
if ~isempty(toks)
    label = toks{1};
    a = '00';
    mode = 'direct';
    return
end

% register indirect
toks = regexp(arg, '^\[R([a-fA-F0-9])\]$', 'tokens', 'once');
if ~isempty(toks)
    a = upper(toks{1});
    mode = 'indirect';
    return
end

throw(MException('bmachine:assembler:parsearg', ...
    ['Unrecognised argument format: ' arg]));

end


function [code, target_label] = HALT(args)
if ~isempty(args)
    throw(MException('bmachine:assembler:HALT', ...
        ['HALT should have no arguments, found: ' args]));
end
code = 'C000';
target_label = '';
end

Contact us