from
ClustalW Interface
by Steve Simon
MATLAB tools for working with ClustalW.
|
| clustalalignread(filename)
|
function S = clustalalignread(filename)
% CLUSTALALIGNREAD - imports files containing Clustal alignments
% S = CLUSTALALIGNREAD(ALIGNFILE) will import the file ALIGNFILE and
% convert it to a structure S. ALIGNFILE should be in the Clustal
% alignment format. TS will contain one field for each sequence
% in the file, and one for the consensus line.
txt = textread(filename,'%s','delimiter','\n','whitespace','');
% check header line, confirm that it's a ClustalW file
if ~strncmpi(txt{1},'CLUSTAL',7)
error('Header does not match CLUSTAL format')
end
% remove header line
txt(1) = [];
% remove empty lines
while isempty(txt{1})
txt(1) = [];
end
% find first empty string in cell array, which occurs after the first
% consensus line
mt = find(cellfun('isempty',txt));
% eliminate empty lines
txt(mt) = [];
% the first consensus line is in mt(1)-1
cons_loc = mt(1)-1;
% there are cons_loc-1 sequences
num_seq = cons_loc-1;
% create empty structure
S = struct;
for s = 1:num_seq,
% make the name into a MATLAB-acceptable variable name
name = cleantext(strtok(txt{s}),{'|','_';'.',''});
% initialize field to hold sequence
S.(name) = '';
for r = s:cons_loc:size(txt,1),
% make sure that there aren't sequence numbers at the end
S.(name) = [S.(name) deblank(strtok(txt{r}(33:end),'0123456789'))];
end
end
% consensus line
S.consensus = '';
for r = cons_loc:cons_loc:size(txt,1)
S.consensus = [S.consensus txt{r}(33:end)];
end
|
|
Contact us at files@mathworks.com