Code covered by the BSD License  

Highlights from
DM Utils (data mining utils)

image thumbnail

DM Utils (data mining utils)

by

 

15 Jan 2012 (Updated )

The tools for dealing with distance matrix, improving data mining capabilities

pseudo_squareform(D,w,k)
function [out, ind] = pseudo_squareform(D,w,k)
% pseudo_squareform - a function to access distance matrix in vector form
% as if it were a square form.
% use:
%   [out, ind] = pseudo_squareform(D,w,k);
%   where:
%   D - distance in an vector form
%   w,k - indices (row column) in a form allowed for a referring to
%   rectangular Matlab matrix,
%   out - resulting values
%   ind - indices of resulting values in D
%
% example:
%   X=rand(5,100);
%   D=pair_dist_seq(X,'euclidean'); % or D=pdist(X,'euclidean');
%   pseudo_squareform(D,4,7); % dist between 4th and seventh element
%   pseudo_squareform(D,10,':') % 10th row (dist from 10th element to all)
%   pseudo_squareform(D,':',2) % 2nd col (dist from 2nd element to all)
%   pseudo_squareform(D,':',':') % whole matrix (useless)
%   pseudo_squareform(D,3:6,1:2) % submatrix 2nd col (dist from 2nd element to all)
%   pseudo_squareform(D,:,1:2) % submatrix 1st and 2nd col vs all rows
%
%   note that colon (:) operator in interactive mode can be given strictly f.e.:
%       pseudo_squareform(D,:,2)
%   meanwhile in m-file mode it has to be wrapped with quotation marks ':'
%   as a text in fact
%   
% Copyright 2011 - P. Skurowski
% Author : P. Skurowski
% Place: Institute of informatics, Silesian Univ. of  Technology
%   v. 0.1 - preliminary test version
%   v. 1.0 - final public version (minor bugs corrected)
% See also: PAIR_DIST_PAR, PAIR_DIST_SEQ, PDIST, SQUAREFORM

if size(w)~=size(k)
    error('sizes MUST match');
end

% wymiar=roots([1 -1 -2*length(D)]);
wymiar=(1+sqrt(1+8*length(D)))/2; %positive root

% de=wymiar>0;
% wymiar=wymiar(de);
if mod(wymiar,1)>0
    error('wrong vector length')
end
M=wymiar;

if strcmp(k,':') && strcmp(w,':')
    ind=tril(true(M),-1);
    try % if there is enough memory do it as vector operation
        out=zeros(M);
        out(ind)=D;
        out=out+out';
        return
    catch ME % if not do it iteratively
        clear ind
        clear out
        disp('Too low memory for vector operation - trying iteratively');
        out=zeros(M);%jesli malo pamieci to iteracyjnie
        for c=1:M
            kp=c-1;% poprzednich kolumn
            SK=kp*M-kp.*(2+kp-1)/2; %ciag arytm - ilosc elementow w poprzednich kolumnach
            for r=c+1:M
                SW=r-c; %ilosc w poprzednich wierszach biezacej kolumny
                ind=SK+SW;
                out(r,c)=D(ind);
                out(c,r)=D(ind);
            end
        end
        %     out=squareform(D);
        return
    end
end

if strcmp(k,':')
    K=repmat(1:M,[length(w) 1]);
    W=repmat(w',[1,M]);
elseif strcmp(w,':')
    W=repmat((1:M)',[1 length(k)]);
    K=repmat(k,[M,1]);
else
    W=repmat(w',[1,length(k)]);
    K=repmat(k,[length(w),1]);
end

out=nan(size(W));

z=W==K; % zeros for the diagonal
out(z)=0;
z=not(z);
        % swapping that a row is larger tahn column number (making triangular
        % left-lower matrix)
de=W<K; % zamiana zeby wiersz byl wiekszy kolumny (mac. dolna trojkatna)
pom=W(de);
W(de)=K(de);
K(de)=pom;

kp=K(z)-1; % poprzednich kolumn % no of previous columns

SK=kp*M-kp.*(kp+1)/2; % arithm progression - no of elements in prev. cols  %ciag arytm - ilosc elementow w poprzednich kolumnach
SW=W(z)-K(z); %no of prev elements in the colum from diag to current %ilosc w poprzednich wierszach biezacej kolumny
idx=SK+SW;

out(z)=D(idx);

if nargout==2
    ind=zeros(size(W));
    ind(z)=idx(1:end);
end

Contact us