BerkeleyMadonna model import function for SimBiology

by

 

Imports ODE models in the BerkeleyMadonna(TM) format into a SimBiology(R) model

sbioimportBerkeleyMadonna.m
function simbioModel = sbioimportBerkeleyMadonna(FileName,varargin)
%SBIOIMPORTBERKELEYMADONNA import BerkeleyMadonna(TM) model files into
% SimBiology(R)
%   
%   MODEL = SBIOIMPORTBERKELEYMADONNA('FILENAME') imports a
%   BerkeleyMadonna model file, 'FILENAME' into a SimBiology MODEL.
%   MODEL = SBIOIMPORTBERKELEYMADONNA('FILENAME',...,'ModelName','MODELNAME',...)
%   uses the parameter/value pair 'ModelName' and a string 'MODELNAME' to
%   name the SimBiology model. If no 'ModelName' is provided, the default
%   model name 'BMModel' will be used.
%
%   MODEL = SBIOIMPORTBERKELEYMADONNA('FILENAME',...,'CompartmentName','COMPARTMENTNAME',...)
%   uses the parameter/value pair 'CompartmentName' and a string 'COMPARTMENTNAME' to
%   name the compartment of the SimBiology model. If no 'CompartmentName' is provided, the default
%   model name 'System' will be used.
%
%   MODEL = SBIOIMPORTBERKELEYMADONNA('FILENAME',...,'TranslateStepFunctions',LOGICAL,...)
%   uses the parameter/value pair 'TranslateStepFunctions' and a logical TRUE/FALSE to
%   indicate whether step functions in the BerkeleyMadonna model should be
%   translated into equivalent SimBiology constructs. The default is TRUE.
%
%   Some notes: 
%   * SimBiology does not support simulation start times !=0.
%     Therefore, stop times in the output model are set at (stoptime -
%     starttime) and all step functions are shifted accordingly
%   * SimBiology does not directly support step functions. Pulses can be
%     implemented using doses, but squarepulses and step functions require
%     a combination of event functions and repeated assignments. To include these 
%     step function replacements in the model, the helper species 'pseudoDoseSpecies_x' 
%     and helper parameter 'pseudoEventParam_x' (where x is a running index) are
%     added to the model. Though unlikely, it is best to avoid these identifiers in your model.
%   * If you set the parameter/value argument 'TranslateStepFunctions' to false
%     (default: true), then this function will error if it encounters a
%     BerkeleyMadonna step function. 
%   * This function does not handle notation used in BerkeleyMadonna for
%     STELLA(TM) compatibility. 

% Sven Mesecke
% January 4th, 2013
% Copyright 2013 The MathWorks, Inc.

numberRegexp = '^[-+]?\d*\.?\d+(e[-+]?\d+)?$';

%% parse inputs
p = inputParser();
p.addRequired('FileName',@isstr);
p.addParamValue('ModelName','BMModel',@isstr);
p.addParamValue('CompartmentName','System',@isstr);
p.addParamValue('TranslateStepFunctions',true,@islogical);
p.parse(FileName,varargin{:});
input = p.Results;

%% define internal representation of model
internalModel = struct();
% pseudo parameters and species needed for step function conversion
internalModel.pseudoDoseSpecies= 'pseudoDoseSpecies'; % should not be used in the BerkeleyMadonna model
internalModel.pseudoEventParam = 'pseudoEventParam';

internalModel.fileName = input.FileName;
internalModel.modelName = input.ModelName;
internalModel.compartmentName = input.CompartmentName;
internalModel.translateStepFunctions = input.TranslateStepFunctions;


%% key words and special symbols in Berkeley Madonna
bmModelDescriptor = struct();
% key words for declaring ODEs
bmModelDescriptor.ODEIndicators = {'d\/dt\s*\(_placeholder_\)','_placeholder_\''','flow _placeholder_'}; % _placeholder_ acting as a replace token
% key words for BerkeleyMadonna solver options
bmModelDescriptor.solverSymbols = {'starttime','stoptime','(?<!\/)dt','dtmin','dtmax','tolerance','dtout','roottol','time'};
% BerkeleyMadonna functions that are easily translated into MATLAB equivalents
bmModelDescriptor.supportedFunctions = {'abs', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctanh',...
                        'cos', 'cosh',   'exp', 'gammaln', 'int', 'log10', ...
                        'logn', 'mod', 'pi', 'round', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', ''};

% MATLAB functions equivalent to supported BerkeleyMadonna functions
sbioSupportedFunctions = containers.Map(bmModelDescriptor.supportedFunctions,...
                    {'abs', 'acos', 'acosh', 'asin', 'asinh', 'atan', 'atanh',...
                    'cos', 'cosh', 'exp', 'gammaln', 'fix','log10',...
                    'log', 'mod', 'pi', 'round', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', '-'}); % the last is used to account for situations where dashes are used accidentally instead of hyphens   
                    
% unsupported BerkeleyMadonna functions - throw an error           
bmModelDescriptor.unsupportedElements = {'arraysum', 'arraymean', 'arraystddev','binomial', 'delay', 'erf', 'erfc', 'graph', ...
                        'maxinflow', 'mean', 'min', 'mod', 'netflow', 'normal', 'ostate', 'outflow', 'qpop',...
                        'qpops', 'poisson', 'random', 'stepsize', 'sum', 'switch','if', 'then', 'else', 'next', 'inf',...
                        'conveyor','oven', 'queue', 'rooti', 'guess', 'limit', 'rename', 'and', 'or'}; 
                    
% unsupported BerkeleyMadonna operators - throw an error           
bmModelDescriptor.unsupportedOpRegexp = {'\[[ij0-9+-.,]*\]', '''{2,}','\+ ?dt', '\(t ?- ?dt\)','#'};  
bmModelDescriptor.unsupportedOpErrorMsg = containers.Map(...
                                          bmModelDescriptor.unsupportedOpRegexp,... % keys
                                          {'Arrays (declared using, e.g.,  k[i+1] etc.)',...
                                          'ODEs with higher-order equations (i.e., non first-order)',...
                                          'STELLA notation (e.g. R + dt or R(t - dt))',...
                                          'STELLA notation (e.g. R + dt or R(t - dt))',...
                                          'Calls to imported datasets'});  % values
                    
% BerkeleyMadonna functions that can be translated using SimBiology dose objects            
bmModelDescriptor.stepFunctions = {'squarepulse','pulse', 'step'};

%% import and prepare BerkeleyMadonna model
bmModel = fileread(internalModel.fileName);
% remove comments and whitespace
bmModel = regexprep(bmModel,';.*','','dotexceptnewline'); % remove single-line comments
bmModel = regexprep(bmModel,'{.*?}',''); % remove multi-line comments

% check for elements that are not supported (functions and operators)
ndx = checkUnsupportedElements(bmModel,bmModelDescriptor);
if any(ndx),
    error('These elements are unsupported:\t%s\n',bmModelDescriptor.unsupportedElements{ndx})
end

ndx = checkUnsupportedOperators(bmModel,bmModelDescriptor);
if any(ndx),
    errMessages = values(bmModelDescriptor.unsupportedOpErrorMsg,bmModelDescriptor.unsupportedOpRegexp(ndx));
    error('This Berkeley Madonna functionality is unsupported:\t%s\n',errMessages{:})
end

%bmModel = regexprep(bmModel,' *',''); % remove white space, makes it easier to write regular expressions
bmModel = lower(bmModel); % convert all uppercase to lowercase (BM is not case-sensitive, MATLAB/SimBiology is)

% convert supported functions to MATLAB equivalents
bmModel = regexprep(bmModel, strcat('\<',bmModelDescriptor.supportedFunctions,'\>'), values(sbioSupportedFunctions,bmModelDescriptor.supportedFunctions));
% convert ** operator to ^
bmModel = regexprep(bmModel, '\*\*','^');

%% extract solver options, species, ODEs and parameters
% solver options
[bmModel, internalModel.setSolverOptions, internalModel.startTime] = getBMSolverOptions(bmModel, bmModelDescriptor);

% find bm step functions and replace with pseudo dose/event species/parameters
[bmModel, internalModel.stepFunctions] = getAndReplaceBMDoseFunctions(bmModel, bmModelDescriptor, internalModel);

% identify species and their initial conditions
[bmModel, internalModel.speciesNames, internalModel.speciesValues] = getBMSpecies(bmModel);

% extract ODE rhs
[bmModel, internalModel.ODEs] = getODEs(bmModel, internalModel.speciesNames, bmModelDescriptor);

% get parameter names and values
[bmModel, internalModel.parameterNames, internalModel.parameterValues] = getParameters(bmModel); 

% clean up model and warn if not everything has been identified
verifyModelParsing(bmModel);

%% build SimBiology model
% set up simbio model
simbioModel = sbiomodel(internalModel.modelName);
comp = addcompartment(simbioModel,internalModel.compartmentName);

% add species
addBMSpecies(simbioModel, comp, internalModel.speciesNames, internalModel.speciesValues, numberRegexp);

% add parameters to  model
addBMParameters(simbioModel, internalModel.parameterNames, internalModel.parameterValues, numberRegexp);

% add ODEs
addODEs(simbioModel,internalModel.ODEs);

% add supported step functions
if ~isempty(fields(internalModel.stepFunctions))
    addStepFunctions(simbioModel, comp, internalModel);
end
% add supported simulation options (cannot be empty, as stoptime presumably
% needs to be defined)
modifySimulationOptions(simbioModel, internalModel);

end

%% utility functions

% extract from BerkeleyMadonna file and write them into internal Model
% struct
    function ndx = checkUnsupportedElements(bmModel,bmModelDescriptor)
        unsupported = regexp(bmModel,strcat('\<',bmModelDescriptor.unsupportedElements, '\>'),'match');
        ndx = cellfun(@(x)~isempty(x),unsupported);
    end

    function ndx = checkUnsupportedOperators(bmModel,bmModelDescriptor)
        %unsupportedOperators = regexptranslate('escape',bmModelDescriptor.unsupportedOpRegexp);
        unsupported = regexp(bmModel,bmModelDescriptor.unsupportedOpRegexp,'match');
        ndx = cellfun(@(x)~isempty(x),unsupported);
    end
    
    function ndx = getNdxNonEmptyCells(cellarray)
        if ~iscell(cellarray), error('getNdxNonEmptyCells requires a cell array as input'); end
        ndx = cellfun(@(x) ~isempty(x), cellarray);
    end

    function [bmModel, setSolverOptions, startTime] = getBMSolverOptions(bmModel, bmModelDescriptor)
       
        setSolverOptVals = regexp(bmModel, strcat('(?<=\<',bmModelDescriptor.solverSymbols,'\s*=).*'),'match','dotexceptnewline'); % modify, nested cell array TODO
        setSolverOptNames = bmModelDescriptor.solverSymbols(~cellfun(@isempty,setSolverOptVals));
        setSolverOptions = containers.Map(setSolverOptNames, [setSolverOptVals{:}]); % reshape solver options array, then use these as keys in a hash table storing the option values
        % to support start times ~= 0
        if ~isKey(setSolverOptions,'starttime'), 
            startTime = 0; %does BerkeleyMadonna support not declaring the start time?
        else
            startTime = str2double(setSolverOptions('starttime'));
        end
        bmModel = regexprep(bmModel, strcat('(?<!\/)\<',bmModelDescriptor.solverSymbols,'\>.*'),'','dotexceptnewline');% remove terms from model string
    end

    function [bmModel, stepFunctions] = getAndReplaceBMDoseFunctions(bmModel, bmModelDescriptor, internalModel)
    stepFunctions=struct();    
    k = 1;
        for i=1:length(bmModelDescriptor.stepFunctions)
            bmDosestmp = regexp(bmModel,['(?<=\<',bmModelDescriptor.stepFunctions{i},')\([a-zA-Z,0-9]*\)'],'match'); % no spaces allowed between function and brackets
            bmDoses = regexprep(bmDosestmp,{'\(','\)'},'');
            for j=1:length(bmDoses)
                if ~isempty(bmDoses)
                    if ~internalModel.translateStepFunctions
                        error('Option ''TranslateStepFunction'' is set to FALSE, but step functions are declared in the BerkeleyMadonna model'); 
                    end
                    stepFunctions(k).stepType = bmModelDescriptor.stepFunctions{i};
                    stepFunctions(k).pseudoDoseSptmp = [internalModel.pseudoDoseSpecies '_' num2str(k)];
                    stepFunctions(k).arguments = bmDoses{:};

                    bmModel = strrep(bmModel, [stepFunctions(k).stepType bmDosestmp{j}],stepFunctions.pseudoDoseSptmp);
                    k=k+1;
                end
            end
        end 
    end

    function [bmModel, speciesNames, speciesValues] = getBMSpecies(bmModel) %%% fromhere
        speciesiTokens = regexp(bmModel,'\<init.(\w*).*=(.*)','tokens', 'dotexceptnewline'); % species names using INIT    
        %speciesiTokens = regexp(bmModel,'\<init\(+(\w*)\)+=(.*)','tokens', 'dotexceptnewline'); % species names using INIT
        speciesiName = cellfun(@(s)s{1}, speciesiTokens, 'UniformOutput', 0);
        speciesiVal = cellfun(@(s)s{2}, speciesiTokens, 'UniformOutput', 0);

        speciessTokens = regexp(bmModel,'(\w*)\(+starttime\)+=(.*)','tokens', 'dotexceptnewline'); % species names using starttime
        speciessName = cellfun(@(s)s{1}, speciessTokens, 'UniformOutput', 0);
        speciessVal = cellfun(@(s)s{2}, speciessTokens, 'UniformOutput', 0);
        % unite 
        speciesNames = [speciesiName(:); speciessName(:)];
        speciesValues = [speciesiVal(:); speciessVal(:)];

        bmModel = regexprep(bmModel,'.*\<(init|starttime)\>.*','','dotexceptnewline'); % remove lines that contain INIT or STARTTIME
    end

    function [bmModel, ODEs] = getODEs(bmModel, speciesNames, bmModelDescriptor)
        ODEs = cell(size(speciesNames));
        for i = 1:length(speciesNames)
            tags = regexprep(bmModelDescriptor.ODEIndicators,'_placeholder_',speciesNames{i});
            keys = strcat('(?<=', tags, '\s*\=\s*).*'); rmkeys = strcat(tags, '.*'); 
            rhs = regexp(bmModel,keys,'dotexceptnewline','match'); % extract key matches
            equation = rhs{getNdxNonEmptyCells(rhs)}; % extract 
            ODEs{i} = [speciesNames{i} '=' equation{:}];

            % clean up remaining model %%%TODO
            bmModel = regexprep(bmModel,rmkeys,'','dotexceptnewline'); % remove key matches from model string

        end
    end

    function [bmModel, parameterNames, parameterValues] = getParameters(bmModel)
        parameterNames = strtrim(regexp(bmModel,'\w*(?=\s*\=\s*\w*)','match','dotexceptnewline')); % identify parameter names which are on the left side of an equals symbol
        parameterValues = strtrim(regexp(bmModel,'(?<=\w*\s*\=\s*).*','match','dotexceptnewline'));

        % clean up remaining Berkeley Madonna model string
        bmModel = regexprep(bmModel,strcat('\<', parameterNames, '.*'),'','dotexceptnewline'); 
    end

    function verifyModelParsing(bmModel)
        remainder = regexpi(bmModel,'\w+','match');
        if any(cellfun(@(x) ~isempty(x),remainder))
            warning('The following elements have not been identified: \t%s\n',remainder{:})
        end

    end

% write Berkeley Madonna model to SimBiology model
    function addBMSpecies(simbioModel, comp, speciesNames, speciesValues, numberRegexp)  
        speciesValues = strtrim(speciesValues);
        numericInitVals = cellfun(@(x) ~isempty(x),regexp(speciesValues,numberRegexp,'match')); % identify all-numeric initial values
        for i=1:length(speciesNames)
            simbioSpecies = addspecies(comp,speciesNames{i});
            if numericInitVals(i)
                simbioSpecies.InitialAmount = str2double(speciesValues{i});
            else
                addrule(simbioModel, [speciesNames{i} '=' speciesValues{i}],'initialAssignment');
            end
        end
    end

    function addBMParameters(simbioModel, parameterNames, parameterValues, numberRegexp)
        % separate numerical and symbolic assignments
        numericalAssignments = cellfun(@(x) ~isempty(x),regexp(parameterValues,numberRegexp,'match'));
        symbolicAssignments = ~numericalAssignments;

        % add parameters, initialize with 1 (default)
        for i=1:length(parameterNames)
            parameter = addparameter(simbioModel,parameterNames{i});
            % set those defined to actual value
            if numericalAssignments(i)
                set(parameter,'Value', str2double(parameterValues{i}));
            else
                set(parameter,'ConstantValue', false); % will be controlled by repeated assignment rules
            end
        end

        ruledParamNames = parameterNames(symbolicAssignments);
        ruledParamValues = parameterValues(symbolicAssignments);
        for i=1:length(ruledParamNames)
            addrule(simbioModel,[ruledParamNames{i} '=' ruledParamValues{i}],'repeatedAssignment'); 
        end
    end

    function addODEs(simbioModel,odes)
        for i = 1:length(odes)
            addrule(simbioModel,odes{i},'rate');
        end
    end

    function modifySimulationOptions(simbioModel, internalModel)
        cs = getconfigset(simbioModel);
        solverOptions = keys(internalModel.setSolverOptions);
        for i = 1:length(solverOptions)
            switch solverOptions{i}
                case 'stoptime'
                    cs.StopTime = str2double(internalModel.setSolverOptions(solverOptions{i})) - internalModel.startTime;
                case 'tolerance'
                    cs.SolverOptions.AbsoluteTolerance = str2double(internalModel.setSolverOptions(solverOptions{i}));
            end
        end
     end

    function addStepFunctions(simbioModel, comp, internalModel)
        stepFunctions = internalModel.stepFunctions;
        stopTime = str2double(internalModel.setSolverOptions('stoptime')); 
        startTime = internalModel.startTime;
        
        for i = 1:length(stepFunctions)
            addspecies(comp, stepFunctions(i).pseudoDoseSptmp,'BoundaryCondition',true); % use a pseudo species to replace the step function
            switch stepFunctions(i).stepType
                case 'pulse'
                    doseProps = regexp(stepFunctions(i).arguments,',','split');
                    
                    [args,ok] = str2num([doseProps{1}; doseProps{2}; doseProps{3}]); %#ok str2num
                    symbParameterndx = ~ok;
                    % replace symbolic parameters in the step function with
                    % its numerical values
                    if any(symbParameterndx)
                        symbParameters = doseProps(symbParameterndx); % test - pulse with symbolic parameters
                        args(symbParameterndx) = internalModel.parameterValues(strcmp(internalModel.parameterNames,symbParameters));
                    end
                   
                    dObj = adddose(simbioModel,['pulse_' num2str(i)],'schedule');
                    dObj.TargetName = stepFunctions(i).pseudoDoseSptmp;
                    dObj.Time = (args(2)-startTime):args(3):(stopTime-startTime); % shifting the pulses along the time axis to move starttime to 0
                    dObj.Amount = args(1).*ones(size(dObj.Time));
                    
                case 'squarepulse'
                    % extract function arguments
                    doseProps = regexp(stepFunctions(i).arguments,',','split');
                    pulseStart = str2double(doseProps{1})-startTime; 
                    pulseDuration = doseProps{2}; %keep it as a string so that we can simply concatenate it for the event function
                    pulseStop = [num2str(pulseStart) '+' pulseDuration]; % enables symbolic pulseDuration arguments
                    
                    eventParamName = [internalModel.pseudoEventParam '_' num2str(i)];
                    ep = addparameter(simbioModel,eventParamName,'Value',0,'ConstantValue',false);
                    addrule(simbioModel,[stepFunctions(i).pseudoDoseSptmp '='  eventParamName],'repeatedAssignment');
                    
                    if pulseStart==0, % to cover cases where the squarepulse is triggered right at the start
                        ep.Value = 1;
                    else
                        addevent(simbioModel,['time >=' num2str(pulseStart)], [eventParamName '= 1']);
                    end
                    addevent(simbioModel,['time >=' pulseStop], [eventParamName '= 0']);
                    
                case 'step'
                    doseProps = regexp(stepFunctions(i).arguments,',','split');
                    stepHeight = doseProps{1}; 
                    [stTimeTmp, ok] = str2num(doseProps{2}); %#ok
                    if ok,
                        stepTime = stTimeTmp;
                    else
                        stepTime = internalModel.parameterValues(strcmp(internalModel.parameterNames,doseProps{2}));
                    end
                    
                    if startTime ~= 0,
                        [stTimeTmp, ok] = str2num(doseProps{2}); %#ok
                        if ok, % cover the case where the second function argument is a model parameter
                            stepTime = stTimeTmp - startTime;
                        else
                            stTimeTmp = internalModel.parameterValues(strcmp(internalModel.parameterNames,doseProps{2}));
                            stepTime = stTimeTmp - startTime;
                        end
                    end
                    
                    if stepTime == startTime, % to cover the rare case where the step coincides with the simulation start
                        addrule(simbioModel,[stepFunctions(i).pseudoDoseSptmp '='  stepHeight],'repeatedAssignment');
                    else
                        eventParamName = [internalModel.pseudoEventParam '_' num2str(i)];
                        addparameter(simbioModel,eventParamName,'Value',0,'ConstantValue',false);
                    
                        addrule(simbioModel,[stepFunctions(i).pseudoDoseSptmp '='  eventParamName],'repeatedAssignment');
                        addevent(simbioModel,['time >=' num2str(stepTime)], [eventParamName '= ' stepHeight]);
                    end
            end
                    
            
        end
    end














Contact us