Code covered by the BSD License  

Highlights from
ClustalW Interface

from ClustalW Interface by Steve Simon
MATLAB tools for working with ClustalW.

clustalalignread(filename)
function S = clustalalignread(filename)
% CLUSTALALIGNREAD - imports files containing Clustal alignments
% S = CLUSTALALIGNREAD(ALIGNFILE) will import the file ALIGNFILE and
%   convert it to a structure S.  ALIGNFILE should be in the Clustal
%   alignment format.  TS will contain one field for each sequence 
%   in the file, and one for the consensus line.

txt = textread(filename,'%s','delimiter','\n','whitespace','');

% check header line, confirm that it's a ClustalW file
if ~strncmpi(txt{1},'CLUSTAL',7)
    error('Header does not match CLUSTAL format')
end

% remove header line  
txt(1) = [];

% remove empty lines
while isempty(txt{1})
    txt(1) = [];
end

% find first empty string in cell array, which occurs after the first
% consensus line
mt = find(cellfun('isempty',txt));

% eliminate empty lines
txt(mt) = [];

% the first consensus line is in mt(1)-1
cons_loc = mt(1)-1;

% there are cons_loc-1 sequences
num_seq = cons_loc-1;

% create empty structure
S = struct;
for s = 1:num_seq,
    % make the name into a MATLAB-acceptable variable name
    name = cleantext(strtok(txt{s}),{'|','_';'.',''});
    
    % initialize field to hold sequence
    S.(name) = '';
    for r = s:cons_loc:size(txt,1),
        % make sure that there aren't sequence numbers at the end
        S.(name) = [S.(name) deblank(strtok(txt{r}(33:end),'0123456789'))];      
    end
end

% consensus line
S.consensus = '';
for r = cons_loc:cons_loc:size(txt,1)
    S.consensus = [S.consensus txt{r}(33:end)];
end







Contact us at files@mathworks.com