function Sequence = GFD6(Segments)
if size(Segments, 1) > 200
lSegs = size(Segments, 2);
[SortedHash Order] = sort(Segments * randn(lSegs, 1));
startSegs = Order(diff(cat(1, SortedHash, 0)) ~= 0)';
endSegs = startSegs;
nSegs = length(startSegs);
Description = cell(nSegs, 1);
for index = startSegs
Description{index} = index;
end
matchLength = lSegs-1; leftSegs = nSegs;
while matchLength > 0
RandWeight = randn(matchLength, 1);
LeftHash = Segments(endSegs, lSegs-matchLength+1:end) * RandWeight;
RightHash = (Segments(startSegs, 1:matchLength) * RandWeight)';
indexLeft = 1;
while indexLeft <= leftSegs
Match = findstr(LeftHash(indexLeft), RightHash);
if Match
indexRight = Match(1);
if (indexRight == indexLeft) & (length(Match) > 1) % Rare case, happens e.g. with strvcat('AAA','AAG','AGC')
indexRight = Match(2);
end
if indexRight ~= indexLeft
Description{startSegs(indexLeft)} = cat(2, Description{startSegs(indexLeft)}, lSegs-matchLength, Description{startSegs(indexRight)});
startSegs(indexRight) = startSegs(indexLeft);
startSegs(indexLeft) = []; endSegs(indexLeft) = [];
leftSegs = leftSegs - 1;
RightHash(indexRight) = RightHash(indexLeft);
RightHash(indexLeft) = []; LeftHash(indexLeft) = [];
if leftSegs == 1
matchLength = 1;
break;
end
else
indexLeft = indexLeft + 1;
end
else
indexLeft = indexLeft + 1;
end
end
matchLength = matchLength - 1;
end
if length(startSegs) > 1 % If disjoint segments in the end, we have to merge them !
for index = 2:length(startSegs)
Description{startSegs(1)} = [Description{startSegs(1)} lSegs Description{startSegs(index)}];
end
startSegs = startSegs(1);
end
Sequence(repmat(cumsum(cat(2, 0, Description{startSegs}(2:2:2*nSegs-2)))', 1, lSegs) + repmat(1:lSegs, nSegs, 1)) = Segments(Description{startSegs}(1:2:2*nSegs-1), :);
else
[nSegs, lSegs] = size(Segments);
[SortedHash Order] = sort(Segments * randn(lSegs,1)) ;
Segments = Segments(Order(diff(cat(1,SortedHash,0)) ~= 0), :);
[nSegs, lSegs] = size(Segments);
theSegments = cell(nSegs, 1);
theSegs = 1:nSegs;
for index = theSegs,
theSegments{index} = Segments(index, :);
end
copySegments = theSegments;
matchLength = lSegs-1;
while matchLength > 0
Segments(:, 1) = [];
for leftSeg = theSegs
Match = find(strncmp(Segments(leftSeg, :), copySegments, matchLength));
if Match
Select = Match(1);
rightSeg = theSegs(Select);
if rightSeg == leftSeg
if length(Match) > 1 % Rare case
temp = Match(Match ~= Select);
Select = temp(1);
rightSeg = theSegs(Select);
theSegments{rightSeg} = cat(2,theSegments{leftSeg}, theSegments{rightSeg}(1+matchLength:end));
i = find(theSegs == leftSeg);
theSegs(i) = [];
if length(theSegs) == 1
Sequence = theSegments{theSegs};
return;
end
copySegments(Select) = copySegments(i);
copySegments(i) = [];
end
else
theSegments{rightSeg} = cat(2,theSegments{leftSeg}, theSegments{rightSeg}(1+matchLength:end));
i = find(theSegs == leftSeg);
theSegs(i) = [];
if length(theSegs) == 1
Sequence = theSegments{theSegs};
return;
end
copySegments(Select) = copySegments(i);
copySegments(i) = [];
end
end
end
matchLength = matchLength - 1;
end
Sequence = [theSegments{theSegs}];
end
|