Code covered by the BSD License  

Highlights from
MGraph

from MGraph by junbai wang
Probabilistic graphical models for reconstruction of genetic regulatory networks using DNA microarra

[old_parcorrf,delet_edge,record_edges,pdagG,GG]=MGraph_IG_forwardFast_BGscore(tdata,old_var,N,q,cutoff)
function [old_parcorrf,delet_edge,record_edges,pdagG,GG]=MGraph_IG_forwardFast_BGscore(tdata,old_var,N,q,cutoff)
global graph_form
%Input: old_var is the covariance of the data
%P/181, Graphical models in  applied multivariate statistics
%old_var=[ 302.29 125.78 100.43 105.07 116.07; ...
%        125.78 170.88 84.19 93.6 97.89;...
%        100.43 84.19 111.6 110.84 120.49; ...
%        105.07 93.6 110.84 217.88 153.77; ...
%   116.07 97.89 120.49 153.77 294.37]
%N=88 % number of observations
%q=5 % number of variables
%cutoff=0.05;
%old_corrf=old_var./sqrt(diag(old_var)*diag(old_var)');
%inv_old_var=pinv(old_var); %bug at here sometime the inverse of variance equal inf Dec 22. 2002
%old_parcorrf=-inv_old_var./sqrt(abs(diag(inv_old_var)*diag(inv_old_var)'));
%devs=-N*log(1-old_parcorrf.^2) %device of the pair of edges
%Need check with original IC algorithm and bug in forward alogrm!! sep. 2003

t0=clock;
%For forward algorithm
%inital model a+b+c+d...
initial_model=eye(q,q);
k_sets=1:q;
full_model=triu(ones(q,q));
[set_a1,set_a2]=find(initial_model);
set_a=[set_a1,set_a2];
ln_set_a=size(set_a,1);

%rest of edges
[set_b1,set_b2]=find(full_model);
set_b=[set_b1,set_b2];
ln_set_b=size(set_b,1);
need_test_sets=setdiff(set_b,set_a,'rows');

%compement set of set a
for i=1:ln_set_a
    comp_set_a{i}=setdiff(k_sets,set_a(i,:)); %complement of set a
end
idx=1; %index for recorded edges
repeated=1;
total_number_of_edges=(q^2-q)/2;
%initial graph
temp_G=zeros(q,q);
new_G=temp_G;
record_edges=[];
while 1
    %need update set_a, comp_set_a, need_test_sets in each loop
     
    %initial run computer all pair of edges device to speed up 
    if idx==1
        fit_var=MGraph_GaussEstimate_variance(old_var,set_a,comp_set_a);
        %initial model device
        det_old=det(old_var);
        det_fit=det(fit_var);
        total_devs=abs(N*(log(det_old)-log(det_fit)));
        df=size(need_test_sets,1);
        total_p=Chisq(total_devs,df);
    
        new_model_set=[];
        new_model_set=set_a;
        index=1:df;
        devs=[];
        chtest_p=[];
        new_comp_set={};
        record_test_edges=[];
        temp_dev=0;
        
        for i=1:df
            new_model_set(ln_set_a+1,:)=need_test_sets(i,:);
            for j=1:size(new_model_set,1)
                new_comp_set{j}=setdiff(k_sets,new_model_set(j,:));
            end
            new_fit_var=MGraph_GaussEstimate_variance(old_var,new_model_set,new_comp_set);
            det_old=det(fit_var);
            det_fit=det(new_fit_var);
            devs(i)=abs(N*(log(det_old)-log(det_fit)));
           
            if (devs(i)>temp_dev)
                %record maxium var
                temp_dev=devs(i);
                record_old_var=new_fit_var;
            end
            chtest_p(i)=Chisq(devs(i),1) ;   
            record_test_edges(i,:)=need_test_sets(i,:);
	
	        %add wang 10.06.2003
            %color of edges, 0 means undiscovered, 1 means visited, 2 means discovered edges
	        color_of_edge(i)=0;
	        %end add
        end 
        devs
        chtest_p
        %find maximu device and add this edge to initial model
        %[maxDev idx_maxDev]=max(devs);
        [sortDev idx_sortDev]=sort(devs);
        numberof_edges=length(sortDev);
        flip_sortDev=fliplr(sortDev);
        flip_idx_sortDev=fliplr(idx_sortDev);
        maxDev=flip_sortDev(1);
        idx_maxDev=flip_idx_sortDev(1);
        %sort edge order
        sort_record_test_edges=record_test_edges(flip_idx_sortDev,:);
        sort_record_chtest_p=chtest_p(flip_idx_sortDev);
	    %ADded wang
	    sort_color_of_edge=color_of_edge(flip_idx_sortDev);
	    %end add 10.06.2003

        
        max_chtest_p=chtest_p(flip_idx_sortDev(1));
	    sort_color_of_edge(1)=1; %set edge visited	

        %make initial add in graph
        temp_addedge=sort_record_test_edges(idx,:);
        temp_G(temp_addedge(1),temp_addedge(2))=1;
        temp_G(temp_addedge(2),temp_addedge(1))=1;
        %end add in
    end %end initial run
 
    if graph_form==1
        cond1=(max_chtest_p<cutoff & isDecomposableG(temp_G));
        cond2=(max_chtest_p>=cutoff | ~isDecomposableG(temp_G));
    else
        cond1=max_chtest_p<cutoff;
        cond2=max_chtest_p>=cutoff ;
    end
    
    
    if ( cond1 &idx>1) 
        record_old_var=new_fit_var; 
    elseif (max_chtest_p>=cutoff & idx==1)
        record_old_var=fit_var;
    end
    
     if ( cond1 & idx<=total_number_of_edges)
         repeated=1;
        
        %add edge
        add_edge=sort_record_test_edges(idx,:) %need_test_sets(idx_maxDev,:);

	    %change stat of edge to discovered
	    sort_color_of_edge(idx)=2;
        
	    new_G(add_edge(1),add_edge(2))=1;
        new_G(add_edge(2),add_edge(1))=1;
        
        
        set_a(end+1,:)=add_edge;
        comp_set_a{end+1}=setdiff(k_sets,set_a(end,:));
        
        old_need_test_sets=need_test_sets;
        need_test_sets=setdiff(old_need_test_sets,add_edge,'rows');
        ln_set_a=size(set_a,1);
       
        delet_edge= size(need_test_sets,1)+1;
        
        old_record_edges=record_edges;
        record_edges{idx}=num2str(add_edge);
	
        %record_old_var=new_fit_var;   
           
        %start to compute the next one'
        if idx<total_number_of_edges
            
            idx=idx+1;
            new_model_set=[];
            new_model_set=set_a;
            new_comp_set={};
            new_model_set(ln_set_a+1,:)=sort_record_test_edges(idx,:);
            %add in graph
            temp_addedge=sort_record_test_edges(idx,:);
            %change state of edge to visited
	        sort_color_of_edge(idx)=1;
	
	        temp_G=new_G;
            temp_G=new_G;
            temp_G(temp_addedge(1),temp_addedge(2))=1;
            temp_G(temp_addedge(2),temp_addedge(1))=1;
             %end add in
            for j=1:size(new_model_set,1)
                new_comp_set{j}=setdiff(k_sets,new_model_set(j,:));
            end
            %new_model_set
            new_fit_var=MGraph_GaussEstimate_variance(old_var,new_model_set,new_comp_set);
            %inv(new_fit_var)
            %inv(record_old_var)
             %pause
             
            det_old=det(record_old_var);
            det_fit=det(new_fit_var);
            max_devs=abs(N*(log(det_old)-log(det_fit)));
            max_chtest_p=Chisq(max_devs,1) ;
        else
            record_edges=old_record_edges
            delet_edge=size(old_need_test_sets,1)
            disp('break1')
            break;
        end
    %restart again avoid local maxmiu
    elseif  (cond2 & idx<total_number_of_edges )

        %get the good and unvisited edges and move it to current position
        temp_edge=sort_record_test_edges(idx,:);
	    temp_chtest_p=sort_record_chtest_p(idx);
        
        %move this good edges to an unvisited edge
        len_test_edges=length(sort_record_test_edges);
        sort_record_old_idx=idx:len_test_edges;
        len_sort_record_old_idx=length(sort_record_old_idx);
        
	    %find unvisited edges
	    unvisited_edge_idx=[];
	    visited_edge_idx=[];
	    unvisited_edge_idx=find(sort_color_of_edge==0);
	    if ~isempty(unvisited_edge_idx)
		%switch unvisited edge with this good edge
         	sort_record_test_edges(idx,:)=sort_record_test_edges(unvisited_edge_idx(1),:);
	 	    sort_color_of_edge(unvisited_edge_idx(1))=1; 
         	sort_record_test_edges(unvisited_edge_idx(1),:)=temp_edge;

  		    sort_record_chtest_p(idx)=sort_record_chtest_p(unvisited_edge_idx(1));
         	sort_record_chtest_p(unvisited_edge_idx(1))=temp_chtest_p;

        else
            %replace all visited edge as 0 and restart the search
            repeated=1;
		    visited_edge_idx=find(sort_color_of_edge==1);
            sort_color_of_edge(visited_edge_idx)=0;
            %switch current edge with the first visited but undiscovered edges
		    sort_record_test_edges(idx,:)=sort_record_test_edges(visited_edge_idx(1),:);
          	sort_record_test_edges(visited_edge_idx(1),:)=temp_edge;
            sort_color_of_edge(visited_edge_idx(1))=1; 
            
		    sort_record_chtest_p(idx)=sort_record_chtest_p(visited_edge_idx(1));
            sort_record_chtest_p(visited_edge_idx(1))=temp_chtest_p;
           
            
           % %switch current edge with visited but undiscovered edges
		   % sort_record_test_edges(idx,:)=sort_record_test_edges(visited_edge_idx(1),:);
           %	sort_record_test_edges(visited_edge_idx(1),:)=temp_edge;

		   % sort_record_chtest_p(idx)=sort_record_chtest_p(visited_edge_idx(1));
           %	sort_record_chtest_p(visited_edge_idx(1))=temp_chtest_p;
	    end

         
       
       
        
        %restart the edge test from position of idx again
        new_model_set=[];
        new_model_set=set_a;
        new_comp_set={};
        new_model_set(ln_set_a+1,:)=sort_record_test_edges(idx,:);
        %add in graph
        temp_addedge=sort_record_test_edges(idx,:);
        temp_G=new_G;
        temp_G=new_G;
        temp_G(temp_addedge(1),temp_addedge(2))=1;
        temp_G(temp_addedge(2),temp_addedge(1))=1;
        %end add in
        
        for jj=1:size(new_model_set,1)
                new_comp_set{jj}=setdiff(k_sets,new_model_set(jj,:));
        end
        new_fit_var=MGraph_GaussEstimate_variance(old_var,new_model_set,new_comp_set);
         
        det_old=det(record_old_var);
        det_fit=det(new_fit_var);
        max_devs=abs(N*(log(det_old)-log(det_fit)));
        max_chtest_p=Chisq(max_devs,1) ;
        
        repeated=repeated+1;
        if (repeated>=ceil(len_sort_record_old_idx*1) | (repeated>=10 & sort_record_chtest_p(idx)>=0.6))
            delet_edge= size(need_test_sets,1)
            record_edges
            repeated
            idx
            disp('break2');
            break;
        end
    else
        delet_edge= size(need_test_sets,1)
        record_edges
        disp('break3')
        break;
    end
end %end while

old_corrf=record_old_var./sqrt(diag(record_old_var)*diag(record_old_var)');
inv_old_var=pinv(record_old_var) ;%bug at here sometime the inverse of variance equal inf Dec 22. 2002
old_parcorrf=-inv_old_var./sqrt(abs(diag(inv_old_var)*diag(inv_old_var)'));

%Apply PC algorm to turn the directions
spareM=((abs(old_parcorrf)>0.0000000001 & abs(old_parcorrf)~=1))
disp('Begin Bscore search............');
[pdagG, GG] = wang_learn_struct_undirect2pdag(1,'cond_indep_fisher_z', q, 2,spareM, cov(tdata), N, 0.05);
 
   [old_parcorrf,pdagG ,GG, maxScore]=MGraph_BGscore_hillClimb2(tdata,pdagG,0.3);
   
disp('end Bscore');

etime(clock,t0)

Contact us at files@mathworks.com