Code covered by the BSD License  

Highlights from
MGraph

from MGraph by junbai wang
Probabilistic graphical models for reconstruction of genetic regulatory networks using DNA microarra

[old_parcorrf,delet_edge,record_edges,pdag,G]=MGraph_IG_forward_fastBScore(data,old_var,N,q,cutoff)
function [old_parcorrf,delet_edge,record_edges,pdag,G]=MGraph_IG_forward_fastBScore(data,old_var,N,q,cutoff)
global graph_form
%Input: old_var is the covariance of the data
%P/181, Graphical models in  applied multivariate statistics
%old_var=[ 302.29 125.78 100.43 105.07 116.07; ...
%        125.78 170.88 84.19 93.6 97.89;...
%        100.43 84.19 111.6 110.84 120.49; ...
%        105.07 93.6 110.84 217.88 153.77; ...
%   116.07 97.89 120.49 153.77 294.37]
%N=88 % number of observations
%q=5 % number of variables
%cutoff=0.05;
%old_corrf=old_var./sqrt(diag(old_var)*diag(old_var)');
%inv_old_var=pinv(old_var); %bug at here sometime the inverse of variance equal inf Dec 22. 2002
%old_parcorrf=-inv_old_var./sqrt(abs(diag(inv_old_var)*diag(inv_old_var)'));
%devs=-N*log(1-old_parcorrf.^2) %device of the pair of edges
%Need check with original IC algorithm and bug in forward alogrm!! sep. 2003

t0=clock;
%For forward algorithm
%inital model a+b+c+d...
initial_model=eye(q,q);
k_sets=1:q;
full_model=triu(ones(q,q));
[set_a1,set_a2]=find(initial_model);
set_a=[set_a1,set_a2];
ln_set_a=size(set_a,1);

%rest of edges
[set_b1,set_b2]=find(full_model);
set_b=[set_b1,set_b2];
ln_set_b=size(set_b,1);
need_test_sets=setdiff(set_b,set_a,'rows');

%compement set of set a
for i=1:ln_set_a
    comp_set_a{i}=setdiff(k_sets,set_a(i,:)); %complement of set a
end
idx=1; %index for recorded edges
repeated=1;
total_number_of_edges=(q^2-q)/2;
%initial graph
temp_G=zeros(q,q);
new_G=temp_G;

%add initial Bscore, take care the row or column of data
[bnt_nr bnt_nc]=size(data);
bnt_u0=median(data);
bnt_n=bnt_nc
bnt_L=bnt_nr
bnt_sigma=bnt_nc+2
bnt_alpha=bnt_nc+2
bnt_v=ones(1,bnt_n);
%initial structure, 1->2->3
bnt_G=temp_G;
for i=1:bnt_n-1
    bnt_G(i,i+1)=1;
end
bnt_b=gnt_graph_to_coeffiecent_b(bnt_G);
[current_score, bnt_T0, bnt_TL]=gnt_scoring_completeG(bnt_u0,bnt_v,bnt_b,bnt_sigma,bnt_alpha,bnt_n,bnt_L,data);
score(1)=(current_score);
%end add

while 1
    %need update set_a, comp_set_a, need_test_sets in each loop

    %initial run computer all pair of edges device to speed up 
    if idx==1
        fit_var=MGraph_GaussEstimate_variance(old_var,set_a,comp_set_a);
        %initial model device
        det_old=det(old_var);
        det_fit=det(fit_var);
        total_devs=abs(N*(log(det_old)-log(det_fit)));
        df=size(need_test_sets,1);
        total_p=Chisq(total_devs,df);
    
        new_model_set=[];
        new_model_set=set_a;
        index=1:df;
        devs=[];
        chtest_p=[];
        new_comp_set={};
        record_test_edges=[];
        temp_dev=0;
        
        for i=1:df
            new_model_set(ln_set_a+1,:)=need_test_sets(i,:);
            for j=1:size(new_model_set,1)
                new_comp_set{j}=setdiff(k_sets,new_model_set(j,:));
            end
            new_fit_var=MGraph_GaussEstimate_variance(old_var,new_model_set,new_comp_set);
            det_old=det(fit_var);
            det_fit=det(new_fit_var);
            devs(i)=abs(N*(log(det_old)-log(det_fit)));
           
            if (devs(i)>temp_dev)
                %record maxium var
                temp_dev=devs(i);
                record_old_var=new_fit_var;
            end
            chtest_p(i)=Chisq(devs(i),1) ;   
            record_test_edges(i,:)=need_test_sets(i,:);
        end 
        devs
        chtest_p
        %find maximu device and add this edge to initial model
        %[maxDev idx_maxDev]=max(devs);
        [sortDev idx_sortDev]=sort(devs);
        numberof_edges=length(sortDev);
        flip_sortDev=fliplr(sortDev);
        flip_idx_sortDev=fliplr(idx_sortDev);
        maxDev=flip_sortDev(1);
        idx_maxDev=flip_idx_sortDev(1);
        %sort edge order
        sort_record_test_edges=record_test_edges(flip_idx_sortDev,:);
        sort_record_chtest_p=chtest_p(flip_idx_sortDev);
        
        max_chtest_p=chtest_p(flip_idx_sortDev(1));
        %make initial add in graph
        temp_addedge=sort_record_test_edges(idx,:);
        temp_G(temp_addedge(1),temp_addedge(2))=1;
        temp_G(temp_addedge(2),temp_addedge(1))=1;
        %end add in
    end %end initial run
 
    if graph_form==1
        cond1=(max_chtest_p<cutoff & isDecomposableG(temp_G));
        cond2=(max_chtest_p>=cutoff | ~isDecomposableG(temp_G));
    else
        cond1=max_chtest_p<cutoff;
        cond2=max_chtest_p>=cutoff ;
    end
    
    
    if ( cond1 &idx>1) 
        record_old_var=new_fit_var; 
    elseif (max_chtest_p>=cutoff & idx==1)
        record_old_var=fit_var;
    end
    
     if ( cond1 & idx<=total_number_of_edges)
         repeated=1;
        
        %add edge
        add_edge=sort_record_test_edges(idx,:) %need_test_sets(idx_maxDev,:);
        new_G(add_edge(1),add_edge(2))=1;
        new_G(add_edge(2),add_edge(1))=1;
        %end add edge
        
        %add bnt score
        %check why score are change at the last one ???? Oct 7.2003
        clear bnt_pdag
        [bnt_pdag, bnt_G] = wang_learn_struct_undirect2pdag(1,'cond_indep_fisher_z', q, 5,new_G, old_var, N, 0.05);
        bnt_pdag
        score(idx+1) = (gnt_scoring_uncompleteG(abs(bnt_pdag),bnt_sigma,bnt_alpha,bnt_L,bnt_T0,bnt_TL))
        pause
        %score./sum(sum(score))
        %pause
        %end add bnt
        
        set_a(end+1,:)=add_edge;
        comp_set_a{end+1}=setdiff(k_sets,set_a(end,:));
        
        old_need_test_sets=need_test_sets;
        need_test_sets=setdiff(old_need_test_sets,add_edge,'rows');
        ln_set_a=size(set_a,1);
       
        delet_edge= size(need_test_sets,1)+1;
        
        old_record_edges=record_edges;
        record_edges{idx}=num2str(add_edge);
        %record_old_var=new_fit_var;   
           
        %start to compute the next one'
        if idx<total_number_of_edges
            
            idx=idx+1;
            new_model_set=[];
            new_model_set=set_a;
            new_comp_set={};
            new_model_set(ln_set_a+1,:)=sort_record_test_edges(idx,:);
            %add in graph
            temp_addedge=sort_record_test_edges(idx,:);
            temp_G=new_G;
            temp_G=new_G;
            temp_G(temp_addedge(1),temp_addedge(2))=1;
            temp_G(temp_addedge(2),temp_addedge(1))=1;
             %end add in
            for j=1:size(new_model_set,1)
                new_comp_set{j}=setdiff(k_sets,new_model_set(j,:));
            end
            %new_model_set
            new_fit_var=MGraph_GaussEstimate_variance(old_var,new_model_set,new_comp_set);
            %inv(new_fit_var)
            %inv(record_old_var)
             %pause
             
            det_old=det(record_old_var);
            det_fit=det(new_fit_var);
            max_devs=abs(N*(log(det_old)-log(det_fit)));
            max_chtest_p=Chisq(max_devs,1) ;
        else
            record_edges=old_record_edges
            delet_edge=size(old_need_test_sets,1)
            disp('break1')
            break;
        end
    %restart again avoid local maxmiu
    elseif  (cond2 & idx<total_number_of_edges )

        %get the good edges and move it to the end
        temp_edge=sort_record_test_edges(idx,:);
        
        %move this good edges to the end of sets
        %rand('state',sum(100*clock));
        len_test_edges=length(sort_record_test_edges);
        sort_record_old_idx=idx:len_test_edges;
        len_sort_record_old_idx=length(sort_record_old_idx);
        %perm_idx=sort_record_old_idx(randperm(len_sort_record_old_idx));
        
        %len_perm_idx=length(perm_idx);
        %for ii=1:len_perm_idx
        %    if perm_idx(ii)~=idx
        %        sort_record_test_edges(idx,:)=sort_record_test_edges(perm_idx(ii),:); 
        %        sort_record_test_edges(perm_idx(ii),:)=temp_edge;
        %        break;
        %    end
        %end
       
         sort_record_test_edges(idx,:)=sort_record_test_edges(idx+repeated,:); 
         sort_record_test_edges(idx+repeated,:)=temp_edge;
        
         
         %size(sort_record_test_edges)
         %size(sort_record_chtest_p)
         temp_chtest_p=sort_record_chtest_p(idx);
         sort_record_chtest_p(idx)=sort_record_chtest_p(idx+repeated);
         sort_record_chtest_p(idx+repeated)=temp_chtest_p;
        
        %restart the edge test from position of idx again
        new_model_set=[];
        new_model_set=set_a;
        new_comp_set={};
        new_model_set(ln_set_a+1,:)=sort_record_test_edges(idx,:);
        %add in graph
        temp_addedge=sort_record_test_edges(idx,:);
        temp_G=new_G;
        temp_G=new_G;
        temp_G(temp_addedge(1),temp_addedge(2))=1;
        temp_G(temp_addedge(2),temp_addedge(1))=1;
        %end add in
        
        for jj=1:size(new_model_set,1)
                new_comp_set{jj}=setdiff(k_sets,new_model_set(jj,:));
        end
        new_fit_var=MGraph_GaussEstimate_variance(old_var,new_model_set,new_comp_set);
         
        det_old=det(record_old_var);
        det_fit=det(new_fit_var);
        max_devs=abs(N*(log(det_old)-log(det_fit)));
        max_chtest_p=Chisq(max_devs,1) ;
        
        repeated=repeated+1;
        if (repeated>=ceil(len_sort_record_old_idx*1) | (repeated>=20 & sort_record_chtest_p(idx)>=0.8))
            delet_edge= size(need_test_sets,1)
            record_edges
            repeated
            idx
            disp('break2');
            break;
        end
    else
        delet_edge= size(need_test_sets,1)
        record_edges
        disp('break3')
        break;
    end
end %end while

old_corrf=record_old_var./sqrt(diag(record_old_var)*diag(record_old_var)');
inv_old_var=pinv(record_old_var) ;%bug at here sometime the inverse of variance equal inf Dec 22. 2002
old_parcorrf=-inv_old_var./sqrt(abs(diag(inv_old_var)*diag(inv_old_var)'));

%Apply PC algorm to turn the directions
spareM=((abs(old_parcorrf)>0.0000000001 & abs(old_parcorrf)~=1))
%figure
%draw_graph(spareM);record_
[pdag, G] = wang_learn_struct_undirect2pdag(1,'cond_indep_fisher_z', q, 5,spareM, record_old_var, N, 0.05);
%triu(new_G)-spareM
%[pdag, G] = wang_learn_struct_pdag_pc('dsep', q, 5, spareM);
etime(clock,t0)

Contact us at files@mathworks.com