function varargout = call_clustalw(S)
% CALL_CLUSTALW call the ClustalW sequence alignment program
% CALL_CLUSTALW(S) uses the values specified in the fields of a CLUSTALWCMD
% object to construct a string of command line arguments, which will be
% passed to the ClustalW application. The ClustalW application should be
% located in the same directory as the CALL_CLUSTALW file. The necessary
% files are available for download here:
%
% ftp://ftp-igbmc.u-strasbg.fr/pub/ClustalW/
%
% The dos/windows version includes an already compiled executable. The
% unix versions will need to be compiled.
%
% Known Limitations:
% Apparent limited number of characters for Windows/DOS command line
% parameters
% command line parameter separator
if ispc, % windows
optsep = ' /';
else % unix/linux
optsep = ' -';
end
% initialize command string
cmd = 'clustalw ';
% convert clustalwcmd object to a structure
S = struct(S);
if ~isempty(S.infile) % call normally
if S.align
% default
elseif S.tree
cmd = [cmd ' TREE '];
elseif ~isempty(S.bootstrap) && isnumeric(S.bootstrap) % not empty, and it's a true value (> 0)
% BOOTSTRAP overrides ALIGN and TREE
cmd = [cmd optsep 'BOOTSTRAP=' num2str(S.bootstrap)];
end
if ~isempty(S.convert)
cmd = [cmd optsep 'CONVERT=' S.convert];
end
% get the filename and directory
[infiledir,infilename, infileext] = fileparts(S.infile);
cmd = [cmd optsep 'INFILE=' relativepath(pwd,infiledir) infilename infileext];
% see if the infile is the same name as the outfile
if ~isempty(S.outfile) && strcmpi(S.outfile,S.infile)
% *** insert a warning or dialog here?
btn = questdlg('The OUTIFLE is the same as the INFILE. Do you want to continue?',...
'Continue Operation','Yes','No','No');
if strcmp(btn,'No')
return
end
elseif isempty(S.outfile)
btn = questdlg('The OUTIFLE is empty. Tthe default name used will be the same as the INFILE. Do you want to continue?',...
'Continue Operation','Yes','No','No');
if strcmp(btn,'No')
return
end
end
elseif ~isempty(S.profile1) & ~isempty(S.profile2) %call for profiles
cmd = [cmd optsep 'PROFILE1=' S.profile1 optsep 'PROFILE2=' S.profile2];
else % both are empty
error('No input file(s) specified')
end
% check for protein/dna
isprotein = isempty(S.type) | strcmpi(S.type,'protein');
% PROTEIN or DNA sequences
if ~isprotein
cmd = [cmd optsep 'TYPE=dna'];
end
% protein alignment with negative values in matrix
if S.negative
cmd = [cmd optsep 'NEGATIVE'];
end
% sequence alignment file name
if S.outfile
[outfiledir,outfilename, outfileext] = fileparts(S.outfile);
cmd = [cmd optsep 'OUTFILE=' relativepath(pwd,outfiledir) outfilename outfileext];
else
error('Output file not specified.')
end
% GCG, GDE, PHYLIP, PIR or NEXUS
% default to clustal format (empty field)
if S.output
cmd = [cmd optsep 'OUTPUT=' S.output];
end
if ~isempty(S.outorder) && ~strcmp(S.outorder,'align')
cmd = [cmd optsep 'OUTORDER=' S.outorder];
end
% LOWER or UPPER (for GDE output only)
if strcmpi(S.output,'gde') & S.case
cmd = [cmd optsep 'CASE=' S.case];
end
% OFF or ON (for Clustal output only)
if isempty(S.output) && ~isempty(S.seqnos) && ~strcmp(S.seqnos,'off')
cmd = [cmd optsep 'SEQNOS=' S.seqnos];
end
% Initial Alignment
if S.quicktree
%%%%%%%%%%%%%%%%%%%%%%% Fast Pairwise Alignments %%%%%%%%%%%%%%%%%%%%%%%
% use FAST algorithm for the alignment guide tree
cmd = [cmd optsep 'QUICKTREE=1'];
% word size
if S.ktuple
cmd = [cmd optsep 'KTUPLE=' num2str(S.ktuple)];
end
% number of best diags.
if S.topdiags
cmd = [cmd optsep 'TOPDIAGS=' num2str(S.topdiags)];
end
% window around best diags.
if S.window
cmd = [cmd optsep 'WINDOW=' num2str(S.window)];
end
% gap penalty
if S.pairgap
cmd = [cmd optsep 'PAIRGAP=' num2str(S.pairgap)];
end
% PERCENT or ABSOLUTE
if S.score
cmd = [cmd optsep 'PAIRGAP=' S.score];
end
else
%%%%%%%%%%%%%%%%%%%%%%% Slow Pairwise Alignments %%%%%%%%%%%%%%%%%%%%%%%
% check the type
if isprotein
% -PWMATRIX= :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
if S.pwmatrix & ~strcmpi(S.pwmatrix,'gonnet') % 'gonnet' is the default
cmd = [cmd optsep 'PWMATRIX=' S.pwmatrix];
end
else
% -PWDNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
if S.pwdnamatrix & ~strcmpi(S.pwdnamatrix,'iub')
cmd = [cmd optsep 'PWDNAMATRIX=' S.pwdnamatrix];
end
end
% -PWGAPOPEN=f :gap opening penalty
if S.pwgapopen
if (isprotein & S.pwgapopen ~= 10) | (~isprotein & S.pwgapopen ~= 15)
cmd = [cmd optsep 'PWGAPOPEN=' sprintf('%.2f',S.pwgapopen)];
end
end
% -PWGAPEXT=f :gap opening penalty
if S.pwgapext
if (isprotein & S.pwgapext ~= 0.1) | (~isprotein & S.pwgapext ~= 6.66)
cmd = [cmd optsep 'PWGAPEXT=' sprintf('%0.2f',S.pwgapext)];
end
end
end
%%%%%%%%%%%%%%%%%%%%%%% Multiple Alignments %%%%%%%%%%%%%%%%%%%%%%%
%newtree option doesn't seem to work, clustalw always bases the name on
%the input file's name
%create new tree or use old one
if S.newtree
% -NEWTREE= :file for new guide tree
cmd = [cmd optsep 'NEWTREE=' S.newtree];
elseif S.usetree
% -USETREE= :file for old guide tree
cmd = [cmd optsep 'USETREE=' S.usetree];
end
% weight matrices
if isprotein
% -MATRIX= :Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename
if S.matrix & ~strcmpi(S.matrix,'gonnet')
cmd = [cmd optsep 'MATRIX=' S.matrix];
end
else
% -DNAMATRIX= :DNA weight matrix=IUB, CLUSTALW or filename
if S.dnamatrix & ~strcmpi(S.dnamatrix,'iub')
cmd = [cmd optsep 'DNAMATRIX=' S.dnamatrix];
end
end
% -GAPOPEN=f :gap opening penalty
if S.gapopen
if (isprotein & S.gapopen ~= 10) | (~isprotein & S.gapopen ~= 15)
cmd = [cmd optsep 'GAPOPEN=' num2str(S.gapopen)];
end
end
% -GAPEXT=f :gap extension penalty
if S.gapext
if (isprotein & S.gapext ~= 0.2) | (~isprotein & S.gapext ~= 6.66)
cmd = [cmd optsep 'GAPEXT=' num2str(S.gapext)];
end
end
% -ENDGAPS :no end gap separation pen.
if S.endgaps
cmd = [cmd optsep 'endgaps']; % *** lowercase on Unix?
end
% -GAPDIST=n :gap separation pen. range
if S.gapdist & S.gapdist ~= 4
cmd = [cmd optsep 'GAPDIST=' num2str(S.gapdist)];
end
% -NOPGAP :residue-specific gaps off
if S.nopgap
cmd = [cmd optsep 'nopgap']; % *** lowercase on Unix?
end
% -NOHGAP :hydrophilic gaps off
if S.nohgap
cmd = [cmd optsep 'nohgap']; % *** lowercase on Unix?
end
% -HGAPRESIDUES= :list hydrophilic res.
if S.hgapresidues & ~strcmpi(sort(S.hgapresidues),'DEGKNPQRS')
cmd = [cmd optsep 'HGAPRESIDUES=' S.hgapresidues];
end
% -MAXDIV=n :% ident. for delay
if S.maxdiv & S.maxdiv ~= 30
cmd = [cmd optsep 'MAXDIV=' num2str(S.maxdiv)];
end
% -TRANSWEIGHT=f :transitions weighting
if S.transweight >= 0 & S.transweight <= 1 & S.transweight ~= 0.5 % must be in 0:.1:1, default is 0.5
tmptrans = sprintf('%.1f',S.transweight);
if S.transweight < 1,
cmd = [cmd optsep 'TRANSWEIGHT=' tmptrans(2:end)]; %don't nead leading 0
else
cmd = [cmd optsep 'TRANSWEIGHT=1'];
end
end
%%%%%%%%%%%%%%%%%%%%%%% Profile Alignments %%%%%%%%%%%%%%%%%%%%%%%
% -PROFILE :Merge two alignments by profile alignment
if S.profile
cmd = [cmd optsep 'PROFILE'];
% *** need to put in check to see that NEWTREE and USETREE aren't both used
if (S.newtree1 | S.newtree2) & (S.usetree1 | S.usetree2)
error('Cannot use NEWTREE1/NEWTREE2 and USETREE1/USETREE2 at the same time')
end
% -NEWTREE1= :file for new guide tree for profile1
% -NEWTREE2= :file for new guide tree for profile2
if S.newtree1 && S.newtree2,
cmd = [cmd optsep 'NEWTREE1=' S.newtree1 optsep 'NEWTREE2=' S.newtree2];
elseif (S.newtree1 && ~(S.newtree2)) || (~(S.newtree1) && S.newtree2)
error('When used, both NEWTREE1 and NEWTREE2 need valid entries.')
end
% -USETREE1= :file for old guide tree for profile1
% -USETREE2= :file for old guide tree for profile2
if S.usetree1 && S.usetree2,
cmd = [cmd optsep 'USETREE1=' S.usetree1 optsep 'USETREE2=' S.usetree2];
elseif (S.usetree1 && ~(S.usetree2)) || (~(S.usetree1) && S.usetree2)
error('When used, both USETREE1 and USETREE2 need valid entries.')
end
%%%%%%%%%%%%%%%%%%%%%%% Sequence to Profile Alignments %%%%%%%%%%%%%%%%%%%%%%%
% -SEQUENCES :Sequentially add profile2 sequences to profile1 alignment
if S.sequences
cmd = [cmd optsep 'SEQUENCES'];
if S.newtree
% -NEWTREE= :file for new guide tree
cmd = [cmd optsep 'NEWTREE=' S.newtree];
elseif S.usetree
% -USETREE= :file for old guide tree
cmd = [cmd optsep 'USETREE=' S.usetree];
end
%%%%%%%%%%%%%%%%%%%%%%% Structure Alignments %%%%%%%%%%%%%%%%%%%%%%%
% -NOSECSTR1 :do not use secondary structure-gap penalty mask for profile 1
% -NOSECSTR2 :do not use secondary structure-gap penalty mask for profile 2
% *** add
end
end
% ******************Structure Alignments******************
% -SECSTROUT=STRUCTURE or MASK or BOTH or NONE :output in alignment file
if S.secstrout
cmd = [cmd optsep 'SECSTROUT=' S.secstrout];
end
% -HELIXGAP=n :gap penalty for helix core residues
if S.helixgap & S.helixgap ~= 4 % default
cmd = [cmd optsep 'HELIXGAP=' num2str(S.helixgap)];
end
% -STRANDGAP=n :gap penalty for strand core residues
if S.strandgap & S.strandgap ~= 4 % default
cmd = [cmd optsep 'STRANDGAP=' num2str(S.helixgap)];
end
% -LOOPGAP=n :gap penalty for loop regions
if S.terminalgap
cmd = [cmd optsep 'LOOPGAP=' num2str(S.loopgap)];
end
% -TERMINALGAP=n :gap penalty for structure termini
if S.terminalgap
cmd = [cmd optsep 'TERMINALGAP=' num2str(S.terminalgap)];
end
% -HELIXENDIN=n :number of residues inside helix to be treated as terminal
if S.helixendin & S.helixendin ~= 3 % default
cmd = [cmd optsep 'HELIXENDIN=' num2str(S.helixendin)];
end
% -HELIXENDOUT=n :number of residues outside helix to be treated as terminal
if S.helixendout % & S.helixendout ~= 0% default, but 0 is not true
cmd = [cmd optsep 'HELIXENDOUT=' num2str(S.helixendout)];
end
% -STRANDENDIN=n :number of residues inside strand to be treated as terminal
if S.strandendin & S.strandendin ~= 1 % default
cmd = [cmd optsep 'STRANDENDIN=' num2str(S.strandendin)];
end
% -STRANDENDOUT=n:number of residues outside strand to be treated as terminal
if S.strandendout & S.strandendout ~= 1 % default
cmd = [cmd optsep 'STRANDENDOUT=' num2str(S.strandendout)];
end
%
%%%%%%%%%%%%%%%%%%%%%%% Trees %%%%%%%%%%%%%%%%%%%%%%%
% -OUTPUTTREE=nj OR phylip OR dist OR nexus
if ~isempty(S.outputtree) & ~strcmpi(S.outputtree,'phylip')
cmd = [cmd optsep 'OUTPUTTREE=' S.outputtree];
end
% -SEED=n :seed number for bootstraps.
if S.seed
cmd = [cmd optsep 'SEED=' S.seed];
end
% -KIMURA :use Kimura's correction.
if S.kimura
cmd = [cmd optsep 'KIMURA'];
end
% -TOSSGAPS :ignore positions with gaps.
if S.tossgaps
cmd = [cmd optsep 'TOSSGAPS'];
end
% -BOOTLABELS=node OR branch :position of bootstrap values in tree display
if S.bootlabels & ~strcmpi(S.bootlabels,'branch')
cmd = [cmd optsep 'BOOTLABELS=' S.bootlabels];
end
%%%%%%%%%%%%%%%%%%%%%%% Call ClustalW %%%%%%%%%%%%%%%%%%%%%%%
disp(cmd)
[s,w] = system(cmd);
if s
error(w) % *** should add error processing
else
disp(w)
end
if nargout,
varargout{1} = w;
end