Code covered by the BSD License  

Highlights from
ClustalW Interface

from ClustalW Interface by Steve Simon
MATLAB tools for working with ClustalW.

call_clustalw(S)
function varargout = call_clustalw(S)
% CALL_CLUSTALW call the ClustalW sequence alignment program
% CALL_CLUSTALW(S) uses the values specified in the fields of a CLUSTALWCMD
%   object to construct a string of command line arguments, which will be
%   passed to the ClustalW application.  The ClustalW application should be
%   located in the same directory as the CALL_CLUSTALW file.  The necessary
%   files are available for download here:
%
%     ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalW/
%
%   The dos/windows version includes an already compiled executable.  The
%   unix versions will need to be compiled.
%
% Known Limitations:
%   Apparent limited number of characters for Windows/DOS command line
%   parameters

% command line parameter separator
if ispc, % windows
    optsep = ' /';
else % unix/linux
    optsep = ' -';
end

% initialize command string
cmd = 'clustalw ';



% convert clustalwcmd object to a structure
S = struct(S);

if ~isempty(S.infile) % call normally
    
    if S.align
        % default     
    elseif S.tree
        cmd = [cmd ' TREE '];
    elseif ~isempty(S.bootstrap) && isnumeric(S.bootstrap) % not empty, and it's a true value (> 0)       
        % BOOTSTRAP  overrides ALIGN and TREE
        cmd = [cmd optsep 'BOOTSTRAP=' num2str(S.bootstrap)];        
    end  
    
    if ~isempty(S.convert)
        cmd = [cmd optsep 'CONVERT=' S.convert];
    end
    
    
    % get the filename and directory
    [infiledir,infilename, infileext] = fileparts(S.infile);
    cmd = [cmd optsep 'INFILE=' relativepath(pwd,infiledir) infilename infileext]; 
    
    % see if the infile is the same name as the outfile
    if ~isempty(S.outfile) && strcmpi(S.outfile,S.infile)
        % *** insert a warning or dialog here?
        btn = questdlg('The OUTIFLE is the same as the INFILE.  Do you want to continue?',...
            'Continue Operation','Yes','No','No');
        if strcmp(btn,'No')
            return
        end
    elseif isempty(S.outfile)
        btn = questdlg('The OUTIFLE is empty.  Tthe default name used will be the same as the INFILE.  Do you want to continue?',...
            'Continue Operation','Yes','No','No');
        if strcmp(btn,'No')
            return            
        end
    end        
    
elseif ~isempty(S.profile1) & ~isempty(S.profile2) %call for profiles
    cmd = [cmd optsep 'PROFILE1=' S.profile1 optsep 'PROFILE2=' S.profile2];
else % both are empty
    error('No input file(s) specified')
end


% check for protein/dna
isprotein  = isempty(S.type) | strcmpi(S.type,'protein');



% PROTEIN or DNA sequences
if ~isprotein
    cmd = [cmd optsep 'TYPE=dna'];
end

% protein alignment with negative values in matrix
if S.negative
    cmd = [cmd optsep 'NEGATIVE'];
end

% sequence alignment file name
if S.outfile
    [outfiledir,outfilename, outfileext] = fileparts(S.outfile);
    cmd = [cmd optsep 'OUTFILE=' relativepath(pwd,outfiledir) outfilename outfileext]; 
else
    error('Output file not specified.')
end

% GCG, GDE, PHYLIP, PIR or NEXUS
% default to clustal format (empty field)
if S.output 
    cmd = [cmd optsep 'OUTPUT=' S.output];
end

 if ~isempty(S.outorder) && ~strcmp(S.outorder,'align')
    cmd = [cmd optsep 'OUTORDER=' S.outorder];
end

% LOWER or UPPER (for GDE output only)
if strcmpi(S.output,'gde') & S.case
    cmd = [cmd optsep 'CASE=' S.case];
end

% OFF or ON (for Clustal output only)
if isempty(S.output) && ~isempty(S.seqnos) && ~strcmp(S.seqnos,'off')
    cmd = [cmd optsep 'SEQNOS=' S.seqnos];
end


% Initial Alignment

if S.quicktree    
    %%%%%%%%%%%%%%%%%%%%%%%   Fast Pairwise Alignments   %%%%%%%%%%%%%%%%%%%%%%%
    % use FAST algorithm for the alignment guide tree
    cmd = [cmd optsep 'QUICKTREE=1'];
    
    % word size
    if S.ktuple
        cmd = [cmd optsep 'KTUPLE=' num2str(S.ktuple)];
    end
    
    % number of best diags.
    if S.topdiags
        cmd = [cmd optsep 'TOPDIAGS=' num2str(S.topdiags)];
    end
    
    % window around best diags.
    if S.window
        cmd = [cmd optsep 'WINDOW=' num2str(S.window)];
    end
    
    % gap penalty
    if S.pairgap
        cmd = [cmd optsep 'PAIRGAP=' num2str(S.pairgap)];
    end
    
    % PERCENT or ABSOLUTE
    if S.score
        cmd = [cmd optsep 'PAIRGAP=' S.score];
    end
    
else    
    %%%%%%%%%%%%%%%%%%%%%%%   Slow Pairwise Alignments   %%%%%%%%%%%%%%%%%%%%%%%
    
    % check the type
    if isprotein
        % -PWMATRIX=    :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
        if S.pwmatrix & ~strcmpi(S.pwmatrix,'gonnet') % 'gonnet' is the default
            cmd = [cmd optsep 'PWMATRIX=' S.pwmatrix];
        end
    else
        % -PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
        if S.pwdnamatrix & ~strcmpi(S.pwdnamatrix,'iub')
            cmd = [cmd optsep 'PWDNAMATRIX=' S.pwdnamatrix];
        end
    end
    
    % -PWGAPOPEN=f  :gap opening penalty        
    if S.pwgapopen
        if (isprotein & S.pwgapopen ~= 10) | (~isprotein & S.pwgapopen ~= 15)
            cmd = [cmd  optsep 'PWGAPOPEN=' sprintf('%.2f',S.pwgapopen)];
        end
    end
    
    % -PWGAPEXT=f   :gap opening penalty
    if S.pwgapext
        if (isprotein & S.pwgapext ~= 0.1) | (~isprotein & S.pwgapext ~= 6.66)
            cmd = [cmd optsep 'PWGAPEXT=' sprintf('%0.2f',S.pwgapext)];
        end
    end    
    
    
end

%%%%%%%%%%%%%%%%%%%%%%%   Multiple Alignments   %%%%%%%%%%%%%%%%%%%%%%%

%newtree option doesn't seem to work, clustalw always bases the name on
%the input file's name
%create new tree or use old one
if S.newtree 
    % -NEWTREE=      :file for new guide tree
    cmd = [cmd optsep 'NEWTREE=' S.newtree];
elseif S.usetree
    % -USETREE=      :file for old guide tree
    cmd = [cmd optsep 'USETREE=' S.usetree];
end


% weight matrices
if isprotein
    % -MATRIX=       :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
    if S.matrix & ~strcmpi(S.matrix,'gonnet')
        cmd = [cmd optsep 'MATRIX=' S.matrix];
    end
else
    % -DNAMATRIX=    :DNA weight matrix=IUB, CLUSTALW or filename
    if S.dnamatrix & ~strcmpi(S.dnamatrix,'iub')
        cmd = [cmd optsep 'DNAMATRIX=' S.dnamatrix];
    end
end

% -GAPOPEN=f     :gap opening penalty      
if S.gapopen 
    if (isprotein & S.gapopen ~= 10) | (~isprotein & S.gapopen ~= 15)
        cmd = [cmd optsep 'GAPOPEN=' num2str(S.gapopen)];
    end
end

% -GAPEXT=f      :gap extension penalty
if S.gapext
    if (isprotein & S.gapext ~= 0.2) | (~isprotein & S.gapext ~= 6.66)
        cmd = [cmd optsep 'GAPEXT=' num2str(S.gapext)];
    end
end


% -ENDGAPS       :no end gap separation pen. 
 if S.endgaps
     cmd = [cmd optsep 'endgaps']; % *** lowercase on Unix?
 end
 
 
% -GAPDIST=n     :gap separation pen. range
if S.gapdist & S.gapdist ~= 4
    cmd = [cmd optsep 'GAPDIST=' num2str(S.gapdist)];
end


% -NOPGAP        :residue-specific gaps off  
if S.nopgap
    cmd = [cmd optsep 'nopgap']; % *** lowercase on Unix?
end

 
% -NOHGAP        :hydrophilic gaps off
if S.nohgap
    cmd = [cmd optsep 'nohgap']; % *** lowercase on Unix?
end


% -HGAPRESIDUES= :list hydrophilic res.  
if S.hgapresidues & ~strcmpi(sort(S.hgapresidues),'DEGKNPQRS')
    cmd = [cmd optsep 'HGAPRESIDUES=' S.hgapresidues];
end

% -MAXDIV=n      :% ident. for delay
if S.maxdiv & S.maxdiv ~= 30
    cmd = [cmd optsep 'MAXDIV=' num2str(S.maxdiv)];
end

% -TRANSWEIGHT=f :transitions weighting
if S.transweight >= 0  & S.transweight <= 1 & S.transweight ~= 0.5   % must be in 0:.1:1, default is 0.5
    tmptrans = sprintf('%.1f',S.transweight);   
    if S.transweight < 1,
        cmd = [cmd optsep 'TRANSWEIGHT=' tmptrans(2:end)]; %don't nead leading 0
    else
        cmd = [cmd optsep 'TRANSWEIGHT=1'];
    end
end

%%%%%%%%%%%%%%%%%%%%%%%   Profile Alignments   %%%%%%%%%%%%%%%%%%%%%%%

% -PROFILE      :Merge two alignments by profile alignment
if S.profile    
    cmd = [cmd optsep 'PROFILE'];
    
    
    % *** need to put in check to see that NEWTREE and USETREE aren't both used
    
    if (S.newtree1 | S.newtree2) & (S.usetree1 | S.usetree2)
        error('Cannot use NEWTREE1/NEWTREE2 and USETREE1/USETREE2 at the same time')
    end
    
    % -NEWTREE1=    :file for new guide tree for profile1
    % -NEWTREE2=    :file for new guide tree for profile2
    if S.newtree1 && S.newtree2,
        cmd = [cmd optsep 'NEWTREE1=' S.newtree1 optsep 'NEWTREE2=' S.newtree2];
    elseif (S.newtree1 && ~(S.newtree2)) || (~(S.newtree1) && S.newtree2)
        error('When used, both NEWTREE1 and NEWTREE2 need valid entries.')
    end
    
    
    % -USETREE1=    :file for old guide tree for profile1
    % -USETREE2=    :file for old guide tree for profile2
    if S.usetree1 && S.usetree2,
        cmd = [cmd optsep 'USETREE1=' S.usetree1 optsep 'USETREE2=' S.usetree2];
    elseif (S.usetree1 && ~(S.usetree2)) || (~(S.usetree1) && S.usetree2)
        error('When used, both USETREE1 and USETREE2 need valid entries.')
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%   Sequence to Profile Alignments   %%%%%%%%%%%%%%%%%%%%%%%
    % -SEQUENCES   :Sequentially add profile2 sequences to profile1 alignment
    if S.sequences
        cmd = [cmd optsep 'SEQUENCES'];       
        if S.newtree
            % -NEWTREE=    :file for new guide tree
            cmd = [cmd optsep 'NEWTREE=' S.newtree];
        elseif S.usetree
            % -USETREE=    :file for old guide tree
            cmd = [cmd optsep 'USETREE=' S.usetree];
        end
        
        %%%%%%%%%%%%%%%%%%%%%%%   Structure Alignments   %%%%%%%%%%%%%%%%%%%%%%%
        % -NOSECSTR1     :do not use secondary structure-gap penalty mask for profile 1 
        % -NOSECSTR2     :do not use secondary structure-gap penalty mask for profile 2
        
        % *** add
    end
end

% ******************Structure Alignments******************

% -SECSTROUT=STRUCTURE or MASK or BOTH or NONE   :output in alignment file
if S.secstrout
    cmd = [cmd optsep 'SECSTROUT=' S.secstrout];
end

% -HELIXGAP=n    :gap penalty for helix core residues 
if S.helixgap & S.helixgap ~= 4 % default
    cmd = [cmd optsep 'HELIXGAP=' num2str(S.helixgap)];
end

% -STRANDGAP=n   :gap penalty for strand core residues
if S.strandgap & S.strandgap ~= 4 % default
    cmd = [cmd optsep 'STRANDGAP=' num2str(S.helixgap)];
end

% -LOOPGAP=n     :gap penalty for loop regions
if S.terminalgap
    cmd = [cmd optsep 'LOOPGAP=' num2str(S.loopgap)];
end

% -TERMINALGAP=n :gap penalty for structure termini
if S.terminalgap
    cmd = [cmd optsep 'TERMINALGAP=' num2str(S.terminalgap)];
end

% -HELIXENDIN=n  :number of residues inside helix to be treated as terminal
if S.helixendin  & S.helixendin ~= 3 % default
    cmd = [cmd optsep 'HELIXENDIN=' num2str(S.helixendin)];
end

% -HELIXENDOUT=n :number of residues outside helix to be treated as terminal
if S.helixendout % & S.helixendout ~= 0% default, but 0 is not true
    cmd = [cmd optsep 'HELIXENDOUT=' num2str(S.helixendout)];
end    

% -STRANDENDIN=n :number of residues inside strand to be treated as terminal
if S.strandendin & S.strandendin ~= 1 % default
    cmd = [cmd optsep 'STRANDENDIN=' num2str(S.strandendin)];
end

% -STRANDENDOUT=n:number of residues outside strand to be treated as terminal 
if S.strandendout & S.strandendout ~= 1 % default
    cmd = [cmd optsep 'STRANDENDOUT=' num2str(S.strandendout)];
end    
%





%%%%%%%%%%%%%%%%%%%%%%%   Trees   %%%%%%%%%%%%%%%%%%%%%%%  
% -OUTPUTTREE=nj OR phylip OR dist OR nexus
if ~isempty(S.outputtree) & ~strcmpi(S.outputtree,'phylip')
    cmd = [cmd optsep 'OUTPUTTREE=' S.outputtree];
end

% -SEED=n        :seed number for bootstraps.
if S.seed
    cmd = [cmd optsep 'SEED=' S.seed];
end

% -KIMURA        :use Kimura's correction.   
if S.kimura
    cmd = [cmd optsep 'KIMURA'];
end

% -TOSSGAPS      :ignore positions with gaps.
if S.tossgaps
    cmd = [cmd optsep 'TOSSGAPS'];
end

% -BOOTLABELS=node OR branch :position of bootstrap values in tree display
if S.bootlabels & ~strcmpi(S.bootlabels,'branch')
    cmd = [cmd optsep 'BOOTLABELS=' S.bootlabels];
end






%%%%%%%%%%%%%%%%%%%%%%%   Call ClustalW   %%%%%%%%%%%%%%%%%%%%%%%
disp(cmd)

[s,w] = system(cmd);

if s    
    error(w) % *** should add error processing
else
    disp(w)    
end

if nargout, 
    varargout{1} = w;
end

Contact us at files@mathworks.com