function Sequence = GFD5(Segments)
% Garde en place Segments et StartSegments
[nSegs, lSegs] = size(Segments);
%[SortedHash Order] = sort(Segments * randn(lSegs, 1));
%startSegs = Order(diff([SortedHash ; 0]) ~= 0)';
%endSegs = startSegs;
startSegs = 1:nSegs;
endSegs = 1:nSegs;
Description = cell(nSegs, 1);
for index = startSegs
Description{index} = index;
end
matchLength = lSegs;
while matchLength > 0
RandWeight = randn(matchLength, 1);
LeftHash = Segments(endSegs, lSegs-matchLength+1:end) * RandWeight;
RightHash = Segments(startSegs, 1:matchLength) * RandWeight;
Compare = LeftHash * (RightHash.^(-1))';
[leftIndices, rightIndices] = find(abs(Compare - 1) < 1e-10);
indexLeft = 1;
while indexLeft <= length(endSegs) & leftIndices
Match = rightIndices(leftIndices == indexLeft);
if Match
indexRight = Match(1);
if (indexRight == indexLeft) & (length(Match) > 1) % Rare case, happens e.g. with strvcat('AAA','AAG','AGC')
indexRight = Match(2);
end
if indexRight ~= indexLeft
Description{startSegs(indexLeft)} = [Description{startSegs(indexLeft)} lSegs-matchLength Description{startSegs(indexRight)}];
startSegs(indexRight) = startSegs(indexLeft); Compare(:, indexRight) = Compare(:, indexLeft);
startSegs(indexLeft) = [];
endSegs(indexLeft) = [];
t = find(rightIndices == indexRight); leftIndices(t) = []; rightIndices(t) = []; % Remove column iR
rightIndices(rightIndices == indexLeft) = indexRight; % Move column iL to iR
rightIndices(rightIndices > indexLeft) = rightIndices(rightIndices > indexLeft) - 1; % Delete column iL
t = find(leftIndices == indexLeft); leftIndices(t) = []; rightIndices(t) = []; % Delete row iL
leftIndices(leftIndices > indexLeft) = leftIndices(leftIndices > indexLeft) - 1; % continued
% ICI ON PEUT ELIMINER TOUTES LES LIGNES EN DESSOUS DE ILEFT
if length(endSegs) == 1
matchLength = 1;
break;
end
else
indexLeft = indexLeft + 1;
end
else
indexLeft = indexLeft + 1;
end
end
matchLength = matchLength - 1;
end
if length(startSegs) > 1 % If disjoint segments in the end, we have to merge them
for index = 2:length(startSegs)
Description{startSegs(1)} = [Description{startSegs(1)} lSegs Description{startSegs(index)}];
end
startSegs = startSegs(1);
end
Sequence(repmat(cumsum([0 Description{startSegs}(2:2:2*nSegs-2)])', 1, lSegs) + repmat(1:lSegs, nSegs, 1)) = Segments((Description{startSegs}(1:2:2*nSegs-1)), :);
|