Code covered by the BSD License  

Highlights from
Convert SimBiology(R) model to COBRA Toolbox model

Convert SimBiology(R) model to COBRA Toolbox model

by

 

18 Jan 2013 (Updated )

Use SimBiology for dynamic modeling and COBRA Toolbox algorithms for constraint-based modeling.

sbioexportCobra(simbioModel,varargin)
function cobraModel = sbioexportCobra(simbioModel,varargin)
% SBIOEXPORTCOBRA convert SimBiology model to a COBRA Toolbox-compatible structure
%
%   COBRAMODEL = SBIOEXPORTCOBRA(SIMBIOLOGY_MODEL) converts a SimBiology(R) model
%   object SIMBIOLOGY_MODEL into a COBRA-Toolbox (see <https://github.com/opencobra/opencobra>) 
%   compatible structure COBRAMODEL. COBRA can then be used to perform constraint- and structure-based
%   model analysis. 
%   Not all SimBiology models can be converted. The original model should
%   follow the guidelines laid out in the COBRA SBML specification 
%   (see <http://doi:10.1038/protex.2011.234>).
%
%   COBRAMODEL = SBIOEXPORTCOBRA(SIMBIOLOGY_MODEL, 'Bounds', BOUNDS) uses
%   the parameter/value pair 'Bounds' and a scalar value BOUNDS as the upper and
%   lower bound (-BOUNDS) for the flux value. When no value is provided, the 
%   values in the SIMBIOLOGY_MODEL are kept.
%
%   Some notes:
%       * Metabolite IDs are lost when importing a COBRA-compliant SBML model into SimBiology. 
%         Therefore, new IDs are generated in this function using a running index, which, 
%         however, breaks full equality with COBRA Toolbox generated structures
%       * SimBiology uses SBML species names as identifiers. Some characters are not allowed 
%         (e.g., >, [], etc.) and are replaced upon import. When the
%         COBRA-compliant SBML file uses "[compartment_name]" to denote compartment names in
%         the metName field, this is replaced with "(compartment_name)" in
%         SimBiology. This function reverses this replacement, but cannot
%         reverse some other replacements (such as (4->5), where the > is
%         simply deleted). Hence, the metNames fields will not be identical
%         with COBRA Toolbox generated structures when going through this function
%   
% Sven Mesecke
% January 18th, 2013
% Copyright 2013 The MathWorks, Inc.

%% parse inputs
% create input parser
p = inputParser();
p.addRequired('simbioModel',@(x) isa(x,'SimBiology.Model'));
p.addParamValue('Bounds', NaN, @isnumeric);

p.parse(simbioModel,varargin{:});
input = p.Results;

% initialize variables
simbioModel = input.simbioModel;
parameters.Bounds = input.Bounds;

% generally needed variables
allSpeciesNames = get(simbioModel.Species,{'Name'});

%% create Cobra structure
cobraModel.description = get(simbioModel,'Name');

% metabolites - new ids generated (different from the original SBML file, those are not saved in the simbiology model object
% - and also different from the recommended COBRA id)
mets = sbioselect(simbioModel.Species, 'Where','BoundaryCondition','==',0);
nmets = numel(mets); ids = 1:nmets;
magn = ceil(log10(nmets)); % defines the number of leading zeros for the metabolite ids

idstr = cellstr(num2str(ids(:),['%0' num2str(magn) 'd']));
cobraModel.mets = strcat('s_',idstr);
metNames = get(mets,{'Name'});
metNames = regexprep(metNames,'\)$',']');
metNames = regexprep(metNames,'\((?=[^\(]*])','['); % change the way compartment names are included in the metabolite name (using "species [compartment name"])
cobraModel.metNames = metNames;

% reaction ids - new ids generated (different from the original SBML file)
nrxns = numel(simbioModel.Reactions); ids = 1:nrxns;
magn = ceil(log10(nrxns)); % defines the number of leading zeros for the reaction ids

idstr = cellstr(num2str(ids(:),['%0' num2str(magn) 'd']));
cobraModel.rxns = strcat('r_',idstr);

% extract genes and gene rules by parsing the reaction notes
cobraModel = parseReactionNotes(simbioModel, cobraModel);

% stoichiometric matrix
[S, ~, reactionNames] = getstoichmatrix(simbioModel);
% remove constant or boundary value species
constantSpecies = sbioselect(simbioModel.Species, 'Where','BoundaryCondition','==',1);
constantSpeciesNames = get(constantSpecies,{'Name'});
constantndx = ismember(allSpeciesNames,constantSpeciesNames);
cobraModel.S = S(~constantndx,:); % use only non-constant species   

% reversability
rev = cell2mat(get(simbioModel.Reactions,{'Reversible'}));
cobraModel.rev = rev;

% upper and lower flux value bounds and objective coefficient
cobraModel = getBoundsAndObjectives(simbioModel, cobraModel, parameters);

% reaction names
cobraModel.rxnNames = reactionNames;
end

% utility functions
function cobraModel = parseReactionNotes(simbioModel, cobraModel)
    nrxns = numel(simbioModel.Reactions);
    geneRules = repmat({''},nrxns,1);
    reactionNotes = get(simbioModel.Reactions,{'Notes'});
    
    gRulestmp = regexpi(reactionNotes,'(?<=GENE[ _]ASSOCIATION:).*(?=</p>)','match'); % extract reaction notes
    emptyCells = cellfun(@(x) isempty(x),gRulestmp);
    if all(emptyCells),
        lastwarn('Reaction Notes are empty or do not contain Gene Association data\n Function will continue but might not return a usable COBRA model')
    else
        geneRules(~emptyCells) = [gRulestmp{~emptyCells}];
        cobraModel.grRules = geneRules; % human-readable gene rules 
        
        tmpRules = regexprep(geneRules,'\<and\>|\<or\>|\(|\)','','ignorecase'); % remove brackets and boolean operators ('and', 'or')
        reactionGeneAssoc = regexp(tmpRules,'\s+','split'); % split using spaces, creates association between genes and reactions

        genes = unique([reactionGeneAssoc{:}]); % genes are sorted
        genes = genes(~cellfun(@isempty,genes)); % remove empty cells
        genes = genes(:); % create column vector
        cobraModel.genes = genes;
        
        % convert human-readable gene rules to evaluatable rules
        cobraModel.rules = createExecutableGeneRules(geneRules, genes);
        
        % create the reaction - gene map
        tmpMap = cellfun(@(x) ismember(genes,x),reactionGeneAssoc,'UniformOutput',false); % creates a nested nreactions cell array with each cell containing an ngenes logical array
        reactionGeneMap = sparse(double([tmpMap{:}]')); % rearranges the nested cell array into a sparse double array of size nreactions x ngenes using element extraction and transposition
        cobraModel.rxnGeneMat = reactionGeneMap;
    end
    % reaction notes
    cobraModel.rxnNotes = reactionNotes;
end

function cobraModel = getBoundsAndObjectives(simbioModel, cobraModel, parameters)
    % lower flux value bounds
    fluxlbParam = sbioselect(simbioModel,'Where','Name','==','LOWER_BOUND');
    if numel(fluxlbParam) ~= numel(simbioModel.Reactions), error('Flux Lower Bounds need to be specified for all reactions'), end % consistency checking
    fluxlb = cell2mat(get(fluxlbParam,{'Value'}));
    if ~isnan(parameters.Bounds)
        fluxlb(fluxlb<-parameters.Bounds) = -parameters.Bounds; % replace bounds with specified new bounds
    end
    cobraModel.lb = fluxlb;

    % upper flux value bounds
    fluxubParam = sbioselect(simbioModel,'Where','Name','==','UPPER_BOUND');
    if numel(fluxubParam) ~= numel(simbioModel.Reactions), error('Flux Upper Bounds need to be specified for all reactions'), end % consistency checking
    fluxub = cell2mat(get(fluxubParam,{'Value'}));
    if ~isnan(parameters.Bounds)
        fluxub(fluxub>parameters.Bounds) = parameters.Bounds;
    end
    cobraModel.ub = fluxub;

    % objective coefficient
    objectiveParam = sbioselect(simbioModel,'Where','Name','==','OBJECTIVE_COEFFICIENT');
    if numel(objectiveParam) ~= numel(simbioModel.Reactions), error('Flux Objective Coefficients need to be specified for all reactions'), end % consistency checking
    objectiveCoefficient = cell2mat(get(objectiveParam,{'Value'}));
    cobraModel.c = objectiveCoefficient;
end

function rules = createExecutableGeneRules(geneRules, genes)
    runndx = (1:length(genes))';
    execGenes = strcat(repmat({'x('},runndx(end),1),num2str(runndx),')'); % create cell array of x(i)   
    rules = regexprep(geneRules, {'\<and\>','\<or\>'},{'&','|'},'ignorecase');
    rules = regexprep(rules, strcat('\<',genes,'(?!\-\w+)'), execGenes);    
end

Contact us