image thumbnail

Connected Component Analysis on an Undirected Graph

by

 

03 Mar 2012 (Updated )

Connected component analysis on undirected graphs, with thresholding and connectivity constraints.

[groups,orphans]=graph_analysis(W,varargin)
%Tristan Ursell
%April 2012
%Connected component analysis on an undirected graph, with various
%thresholding and connectivity constraints.
%
% [groups,orphans] = graph_analysis(W);
% [groups,orphans] = graph_analysis(W,'field',value,...);
% [groups,~] = graph_analysis(W,'field',value,...);
%
% W is the N x N adjaceny matrix for a symmetric graph.  Thus W should be a
% symmetric matrix, if it is not, this function will give an error.  Self
% connections (i.e. diagonal elements) are not allowed and will be removed
% automatically. The values of the parameters below are applied as a union
% set, that is, all original elements must meet all of the conditions 
% specified by the parameters to be included in a group of connected
% components.
%
% If only W is given, then all components with W > 0 will be analyzed and
% grouped, with the default constraints.
%
% 'min_conn' (0 <= min_conn <= max_conn) specifies the minimum degree of
% connectivity (not including itself) for any element in W to be included
% in a group. The default 'min_conn' value is 1.
%
% 'max_conn' (min_conn <= max_conn <= N) specifies the maximum degree of
% connectivity for any element in W to be included in a group. The default
% 'max_conn' value is N.
%
% 'min_like' is the minimum likelihood value for an element in W to be
% included in any group. The default value is 0.  The 'likelihood' is not
% necessarily a probability, and hence is not bounded between zero and one.
% However, for any two elements in W, the ratio of likelihoods should be 
% equal to their ratio of probabilities.
%
% 'max_link' is the maximum number of linkages to search for in the
% network.  This parameter is useful when you know that the network has
% some maximum number of connections between the elements in the network 
% (e.g. if the graph has the property that any two nodes are no more
% than N connections away, you can set max_link = N, to speed up the code).
% Choosing smaller values of 'max_link' can significantly speed up the code. 
% The default value is the size of the current sub-block.
%
% 'max_rank' (1 <= max_rank <= N) is the number of highest likelihood
% values to use in forming connected groups. For instance, if max_rank = 3,
% and a row of W is:
%
%   1 3 6 4 4 5 6 8 0 3 1
%
% then the matrix row will become:
% 
%   0 0 1 0 0 0 1 1 0 0 0
% 
% If a row of W is:
%
%   1 3 6 4 4 6 6 6 0 3 1
% 
% with max_rank = 1,2,3 or 4, the row becomes:
%
%   0 0 1 0 0 1 1 1 0 0 0
% 
% with max_rank = 5, the same row becomes:
% 
%   0 0 1 1 1 1 1 1 0 0 0
%
% The default value of 'max_rank' is N.
%
% 'plot' with value 1 will generate plots of the grouping algorithm as
% it creates block diagonal groups in from top left to bottom right in W.
%
% The output 'groups' is a structure array with fields:
%
% groups(i).num_els = number of elements in group i.
% groups(i).block = sub-block identity of group i.
% groups(i).elements = elements of W that are in group i.
% groups(i).degrees = degrees of connection for each element in group i.
% orphans = elements of W that were not in any group, becasue they did not
% meet the constraints.
%
% The number of distinct groups is length(groups).
%
%Example with a block diagonal random matrix W:
%
%W=blkdiag(rand(50,50),rand(100,100),rand(200,200),rand(300,300));
%W=(W+W')/2;
%
%figure;
%imagesc(W)
%axis equal tight
%xlabel('Elements of W')
%ylabel('Elements of W')
%title('Random Block-Diagonal Adjacency Matrix')
%
% %more inclusive connections, less inclusive probability
%[groups,orphans]=graph_analysis(W,'min_like',0.95,'min_conn',3,'plot',1);
%
% %less inclusive connections, more inclusive probability
%[groups,orphans]=graph_analysis(W,'min_like',0.9,'min_conn',6,'plot',1);
%

function [groups,orphans]=graph_analysis(W,varargin)

%*******CHECK and PARSE INPUTS*****************
if size(W,1)~=size(W,2)
    error('The input adjacency matrix must be square.')
end

%check to make sure W is symmetric
if sum(sum(abs(W'-W)))>0
    error('W is not symmetric -- try symmetrizing it first.  This corresponds to an undirected graph.')
end

%size of W
N=size(W,1);

%number of input fields
f1=find(strcmp('min_conn',varargin));
f2=find(strcmp('max_conn',varargin));
f3=find(strcmp('min_like',varargin));
f4=find(strcmp('max_rank',varargin));
f5=find(strcmp('max_link',varargin));
f6=find(strcmp('plot',varargin));

%process fields
if ~isempty(f1)
    min_conn=round(varargin{f1+1});
    if min_conn<0
        error('The minimum degree of connectivity must be greater than 0. error in: min_conn');
    end
else
    min_conn=1;
end

if ~isempty(f2)
    max_conn=round(varargin{f2+1});
    if max_conn>N
        error('The maximum degree of connectivity must be less than the size of the matrix. error in: max_conn')
    elseif max_conn<min_conn
        error('The maximum degree of connectivity must be greater than the minimum degree of connectivity. error in: max_conn')
    end
else
    max_conn=N;
end

if ~isempty(f3)
    min_like=varargin{f3+1};
    if min_like>max(W(:))
        warning('There are no elements in the matrix meet the minimum likelihood requirement. error in: min_like')
    end
else
    min_like=0;
end

if ~isempty(f4)
    max_rank=round(varargin{f4+1});
    if max_rank<1
        error('The number of highest ranked elements to keep in a group must be greater than 1. error in: max_rank')
    elseif max_rank>max_conn
        error('The number of highest ranked elements to keep in a group must less than the maximum degree of connectivity. error in: max_rank')
    end
else
    max_rank=N;
end

if ~isempty(f5)
    max_link=round(varargin{f5+1});
    if max_link<1
        error('The number of highest ranked elements to keep in a group must be greater than 1. error in: max_link')
    elseif max_link>N
        max_link=-1;
    end
else
    max_link=-1;
end

if ~isempty(f6)
    if varargin{f6+1}==1
        plotq=1;
        if N>1000
            warning('For large matrices, plotting may take time.')
        end
    else
        plotq=0;
    end
else
    plotq=0;
end

%**********************************************************
%get rid of diagonal elements
W=W.*~eye(size(W));

%sort the rows of W for possible block diagonalization
[~,Wsort]=sort(W,2,'descend');

%individual row thresholds
row_list=(1:N)';
col_list=Wsort(:,max_rank);
list3=sub2ind(size(W),row_list,col_list);
W_thresh=W(list3);
clear Wsort

%perform ranked row thresholding
for i=1:N
    W(i,:)=and(W(i,:)>W_thresh(i),W(i,:)>=min_like);
end

%symmetrize the matrix
W=(W+W')>0;

%calculate the degrees of connection per element
Degs=sum(W,1);
max_deg=max(Degs);

%connectivity constraints
true_list=and(Degs>=min_conn,Degs<=max_conn);

mask1=zeros(size(W));
mask1(true_list,:)=1;
mask1(:,true_list)=1;   

%put in diagonal elements, and apply connection and min_like constraints
W_conn=((W+eye(size(W))).*mask1)>0;

clear mask1 W

%find block-diagonal super-groups in W_conn
[conn_rw,conn_cl]=find(tril(W_conn));
group_mat=zeros(N);
disp('Finding diagonal sub-spaces ...')
for i=1:length(conn_rw)
    group_mat(conn_cl(i):conn_rw(i),conn_cl(i):conn_rw(i))=1;
end
disp('... finished.')
group_mat=group_mat>0;

%parse super-groups from W_conn
L1=bwlabel(group_mat,4);

%******** Parse the network *******
%initialize group counting variable
if plotq==1
    h1=figure;
end

grp=0;
for i=1:max(L1(:))
    clear block

    %get super-group block
    block=L1==i;
    
    %find bounds of block in W_conn
    lin_proj=sum(block,1)>0;
    sub_list=find(lin_proj,1,'first'):find(lin_proj,1,'last');
    subspace=single(W_conn(sub_list,sub_list));
    rem_els=sum(subspace,2)>0;
    
    blck_grp=0;
    %perform parsing of network in this block
    %while and(sum(rem_els)>0,blck_grp<length(rem_els))
    while sum(rem_els)>0
        %find starting position
        init_val=find(rem_els,1,'first');
        
        start_el=zeros(size(rem_els));
        start_el(init_val)=1;
         
        %number of remaining elements
        N_els=sum(rem_els);
        
        disp(['Remaining elements: ' num2str(N_els)])
        
        %update maximum number of steps between elements dependent on user
        %defined maximum linkage
        if max_link==-1
            max_steps=N_els-1; %maximum number of possible steps to be taken in network
        else
            if max_link>(N_els-1)
                max_steps=N_els-1;
            else
                max_steps=max_link;
            end
        end
           
        %find elements that form group with init_val
        clear curr_group
        curr_group=start_el;
        for j=1:max_steps
            curr_group=(subspace*single(curr_group))>0;
        end
        
        %keep track of group numbers
        grp=grp+1;
        groups(grp).num_els=sum(curr_group==1);
        groups(grp).block=i;
        groups(grp).elements=sub_list(1)-1+find(curr_group)';
        groups(grp).degrees=Degs(groups(grp).elements);
           
        %find new starting position
        rem_els=rem_els-curr_group;
        blck_grp=blck_grp+1;
    end

    if plotq==1     
        figure(h1);
        sub_plot=(subspace-eye(size(subspace)))>0;
        for j=1:blck_grp
            subplot(1,2,1)
            imagesc(2*block+group_mat)
            axis equal tight
            xlabel('elements')
            ylabel('elements')
            colormap(gray)
            
            curr_group=groups(grp-blck_grp+j).elements-sub_list(1)+1;
            subsubspace1=zeros(size(sub_plot));
            subsubspace2=zeros(size(sub_plot));
            subsubspace1(curr_group,:)=1;
            subsubspace2(:,curr_group)=1;
            sub_diagram=subsubspace1.*subsubspace2;
            
            clear sb_gr
            sb_gr(:,:,1)=sub_diagram.*~sub_plot;
            sb_gr(:,:,2)=sub_diagram.*sub_plot;
            sb_gr(:,:,3)=sub_plot.*~sub_diagram;
            
            subplot(1,2,2)
            imagesc(sb_gr)
            title(['Block: ' num2str(i) ', Sub-Group: ' num2str(j) ', Group: ' num2str(grp-blck_grp+j) ', green = current group members, red = grouping mask, blue = non-group elements'])
            xlabel('sub-elements')
            ylabel('sub-elements')
            axis equal tight
            pause
        end
    end
end
if plotq==1
    close(h1);
end

%handle lack of output
if and(nargout==1,~exist('groups','var'))
    warning('No groups in the matrix met the given requirements.')
    groups=[];
    orphans=1:N;
    return
else
    orphans=setdiff(1:N,[groups.elements]);
end

if plotq==1
    h2=figure;
    %plot groups
    B(:,:,1)=0.5*(group_mat>0).*~W.*~eye(size(W));
    B(:,:,2)=0.5*(group_mat>0).*~W.*~eye(size(W));
    B(:,:,3)=0.6*(group_mat>0).*~W.*~eye(size(W));
    
    boxes=regionprops(L1,'BoundingBox');
    
    cmap=jet;
    for i=1:length(groups)
        curr_group=groups(i).elements;
        choose_mat=zeros(size(W));
        choose_mat(curr_group,:)=1;
        choose_mat(:,curr_group)=1;
        point_group=choose_mat.*W_conn;
        
        color_vec=cmap(modone(ceil(1/2*(1+rand)*64*i),64),:);
        
        B(:,:,1)=B(:,:,1)+(point_group==1)*color_vec(1);
        B(:,:,2)=B(:,:,2)+(point_group==1)*color_vec(2);
        B(:,:,3)=B(:,:,3)+(point_group==1)*color_vec(3);
    end
    
    subplot(2,2,[2,4])
    imagesc(B)
    axis equal tight
    title([num2str(length(groups)) ' groups in ' num2str(max(L1(:))) ' isolated sub-spaces'])
    xlabel('Elements')
    ylabel('Elements')
    
    for i=1:max(L1(:))
        if i>max(L1(:))/2
            coord1=[boxes(i).BoundingBox];
            text(coord1(1)-2,coord1(1)+coord1(3)/2,num2str(i),'color',[1 1 1]);
        else
            coord1=[boxes(i).BoundingBox];
            text(coord1(1)+coord1(3)+1,coord1(1)+coord1(3)/2,num2str(i),'color',[1 1 1]);
        end
    end
    
    subplot(2,2,1)
    hist(Degs,0:max(Degs))
    xlabel('Degrees of Connection')
    ylabel(['Number of elements (of ' num2str(N) ')'])
    
    subplot(2,2,3)
    hist([groups.num_els],1:1:max([groups.num_els]))
    xlabel('Number of Elements in Group')
    ylabel(['Number of Groups (of ' num2str(length(groups)) ')'])
end


%MODONE    Modulus after division starting from 1.
function r = modone(x,y)
if y==0
    r = NaN;
else
    n = floor(x/y);
    r = x - n*y;
    r(r==0)=y;
end



Contact us