No BSD License  

Highlights from
Robust Experimental Designs for Generalized Linear Models

from Robust Experimental Designs for Generalized Linear Models by Hovav Dror
Finding Local D-optimal and Robust Experimental Designs for GLM

Section6Example.m
% This file refers to section 6 (the INK example) of:
% Technical Report RP-SOR-0601, Robust Experimental Design for Multivariate Generalized Linear Models, Hovav A. Dror and David M. Steinberg, January 2006.
% Available at http://www.math.tau.ac.il/~dms/GLM_Design

% It is a work through for finding a robust design using clustering
% to a Poisson model example, 5-dimensional, normal prior, two competing models

% You can run the script and see its stages



% The Parameter Space
BetaV=[];
BetaV1=[];
BetaV2=[];
BetaMeans1=[   -2.3479   -5.5296   -2.9929   -3.9494   -0.8555    0.4139   -2.0666   -1.1263];
BetaSE1=[ 0.6863    0.9408    0.8164    0.5876    0.5389    0.3649    1.3195    0.9848];
BetaMeans2=[   -1.5165   -4.2972   -1.7859   -3.3852   -0.2806    0.2282];
BetaSE2=[  0.2058    0.2011    0.1581    0.2367    0.3185    0.3026];
  m=5; % m - number of variables
model1=[zeros(1,m) ; eye(m)  ;  1 1 zeros(1,m-2) ;  1 0 1 zeros(1,m-3)];% for the INK example
model2='linear';
if isnumeric(model1)
    p1=length(model1);
else
    p1=length(x2fx(ones(1,m),model1));
end;
if isnumeric(model2)
    p2=length(model2);
else
    p2=length(x2fx(ones(1,m),model2));
end;

 family='poisson';
 link='log';

  plot([0 0 1],[1 0 0],'k'); % empty plot - preparation only
  scrsz = get(0,'ScreenSize');
  set(gcf,'Position',[2*scrsz(3)/3 2*scrsz(4)/3 scrsz(3)/3 scrsz(4)/3]); % small figure upper right of the screen


 % ------------------------------------------------------------------------
 % This step is optional.
 % Its purpose is to choose the number of support points
 % in each Local D-optimal design
 fprintf('----------------------------------------------------------------------------------------------------\n');
 fprintf('\nThis is an optional step \n');
fprintf('It helps choosing the number of support \n');
fprintf('points for a Local D-optimal Design (richer model) \n');
t1=0; % time for first step
A=input('Choose "1" to use the option, \nor press "Enter" to skip: ');
if ~isempty(A)
    t1=clock;
    fprintf('The number of support points tested now is:');
     BetaMean= BetaMeans1; % The centroid of the beta space for the richer model
    NSP=p1:2^m;   % Sequence of support points values to check
    SPE=[];          % Vector with the Efficiency for each NSP value
    i=0;
    for SP=NSP
        i=i+1;
        fprintf(' %g',SP);
        [X,IM,SPE(i)]=DGLM(BetaMean,m,SP,model1,family,link,10^-2);
    end; % for SP
    fprintf(' \n \n');
    plot(NSP,SPE/max(SPE),'.-k');
    axis([min(NSP) max(NSP) 0 1]);
    grid on;
    t1=etime(clock,t1); % Time to compute
    fprintf('- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n');
    fprintf('(Finished optional part in %1.0f seconds)\n',t1);
    xlabel('Number of Support Points in Beta''s Centroid Local D-Optimal Design');
    ylabel('Relative Efficiency');
    title('Choose the proper number of support points for a Local Design');

    LSP=input('Using this graph, choose the number of \nSupport Points for Local Designs (default=8): ');
    if isempty(LSP),  LSP=8; end;
else % if skipped
    LSP=input('Choose the number of Support Points \nfor Local Designs (default=8): ');
    if isempty(LSP),  LSP=8; end;
end; % if use the option to choose LSP by graph

% -------------------------------------------------------------------------
% First Step: Create Local Designs all over the Parameter Space
N=1000; % Number of vectors from the parameter sapce
% Use a Niederreiter sequence of proper size
%NI=Nied(N,p); % If function exist: Niederreiter sequence of the proper size
%otherwise:
t2=clock; % timing the Local Database creation
NI2=load('Nied1000.m');
NI1=NI2(1:N,1:p1);
NI2=NI2(1:N,1:p2);
% Now transform it according to distribution
BetaV1 = norminv(NI1,ones(N,1)*BetaMeans1,ones(N,1)*BetaSE1);
BetaV2 = norminv(NI2,ones(N,1)*BetaMeans2,ones(N,1)*BetaSE2);

% Now Search for the local Designs, and put them in the matrix X
X=[]; % Designs
IM=[]; %Information Matrices
IMD=[]; % D-Criteria value of the regression matrices
fprintf('\n----------------------------------------------------------------------------------------------------\n');
fprintf('Next: Finding 100 Local D-Optimal designs for each model \n');
N2=input('press "enter" to continue, \n(or choose a different number of Local Designs): ');
if ~isempty(N2), N=N2; else N=100; end;
for ModelN=1:2
    fprintf('\nModel%g, Now Finding Design #:',ModelN);
    if ModelN==1 
        model=model1; 
        BetaV=BetaV1;
    else
        model=model2;
        BetaV=BetaV2;
    end;
    for i=1:N
        fprintf(' %g',i);
        [Xi,IMi, Di]=DGLM(BetaV(i,:),m,LSP,model,family,link);
        X=[ X ; Xi ] ;
        %IM{i}=IMi;
        IMD=[IMD ; Di];
    end;
end; %for ModelN
X=X-sign(X).*rand(size(X)).*0.0001; % Jittering
t2=etime(clock,t2);
fprintf('\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -');
fprintf('\nDone! Database Creation time %2.0f seconds.\n \n',t2);

% -----------------------------------------------------------------------------------------
% Next Step is choosing the number of Support points for the compromise
% design (What should be the K of the K-means clustering?)
t3=clock; %timing this step
fprintf('----------------------------------------------------------------------------------------------------\n');
fprintf('Time to choose "K" for K-means Clustering. \n');
fprintf('Repeateing the procedure only once for each K \n');
fprintf('(may cause the graph to be a bit jumpy) \n');
input('press "enter" to continue');
fprintf('Now Trying K= ');
KS=[8:16 18:2:30 32:4:64]; % sequence of tested K values
KX=[]; % Design matrix by clustering (for different K's);
Eff=[];
MedEff=[];
MinEff=[];
i=0;
for K=KS
    i=i+1;
    fprintf(' %g',K);
    [IDX,KX,s] =kmeans(X,K,'maxiter',500,'emptyaction','singleton','distance','cityblock');
    for j=1:N
        Eff(i,j)=( det( InfoMtrxGLM(x2fx(KX,model1),BetaV1(j,:)',family,link) )^(1/p1) ) / K / IMD(j);
        Eff(i,N+j)=( det( InfoMtrxGLM(x2fx(KX,model2),BetaV2(j,:)',family,link) )^(1/p2) ) / K / IMD(N+j);
    end; % for j
    MedEff(i)=median(Eff(i,:));
    MinEff(i)=min(Eff(i,:));
end; % for K
t3=etime(clock,t3);
fprintf('\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -');
fprintf('\nDone in %2.0f seconds (All phases togeter %2.0f seconds)\n',[t3 t1+t2+t3]);
plot(KS,MedEff,'.-k');
hold on
plot(KS,MinEff,'s:k','markersize',3);
hold off
xlabel('Number of Support Points');
ylabel('Approximate Efficiency');
legend('Median Efficiency','Minimum Efficiency','Location','East')
title('Choose "K" - the number of support points for the robust design');

K=input('Using this graph, choose the number of Support Points, K, \nfor The Robust Design (default=48): ');
if isempty(K),  K=48; end;

% -----------------------------------------------------------------------------------------
% Now use K-means clustering to produce the Robust Design
fprintf('\n----------------------------------------------------------------------------------------------------\n');
fprintf('Last step - repeat the clustering with the chosen K (%g), \nand choose the Robust Design\n',K);
%Nrepeat=N; % Number of times the clustering will be performed
Nrepeat=40; % Number of times the clustering will be performed
Nrepeats=input(sprintf('Choose the number of clustering repetitions \n(or press "enter" to use %g repetitions): ',Nrepeat));
if isempty(Nrepeats), Nrepeats=Nrepeat; end;
fprintf('Number of Clustering repetition processed: ');
t4=clock; % timing the step
BDV = -inf ; % Best D-criteria value found
for i=1:Nrepeats
    fprintf(' %g',i);
    [IDX,KX,s] =kmeans(X,K,'maxiter',500,'emptyaction','singleton','distance','cityblock');
    DV=1; % D-criteria representing value for this clustering
    for j=1:N
        DV=DV + log( det(InfoMtrxGLM(x2fx(KX,model1),BetaV1(j,:)',family,link) ) )/p1;
        DV=DV + log( det(InfoMtrxGLM(x2fx(KX,model2),BetaV2(j,:)',family,link) ) )/p2;
    end; % for j
    if DV>BDV
        BX=KX;
        BDV=DV;
    end; % if improved the design update BX - the best design found so far
end; % for i (Nrpeats)
t4=etime(clock,t4);
fprintf('\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n');
fprintf('Done! This step took %2.0f seconds. Overall the process took %2.0f seconds',[t4 t1+t2+t3+t4]);
fprintf('\nThe best design found, the matrix BX, is ready to be used\n');
fprintf('----------------------------------------------------------------------------------------------------\n');


% -------------------------------------------------------------------------
% Last, without computing "many" local designs, we can't produce
% a good histogram of efficiencies.
% The following is a rough approximation, based on the N designs created
% before
input('Press "enter" for a plot representing a rough estimation \nof the efficiency histogram\n');
HEff=[];
for i=1:N
%     HEff = [ HEff  ( det( InfoMtrxGLM(x2fx(BX,model1),BetaV1(i,:)',family,link) )^(1/p1) ) / K / IMD(j) ];
 %    HEff = [ HEff  ( det( InfoMtrxGLM(x2fx(BX,model2),BetaV2(i,:)',family,link) )^(1/p2) ) / K / IMD(N+j) ];
        HEff(i)=( det( InfoMtrxGLM(x2fx(BX,model1),BetaV1(i,:)',family,link) )^(1/p1) ) / K / IMD(i);
        HEff(N+i)=( det( InfoMtrxGLM(x2fx(BX,model2),BetaV2(i,:)',family,link) )^(1/p2) ) / K / IMD(N+i);

end;
hist(HEff);
x=axis;
axis([0 1 x(3:4)]);
xlabel('Approximate Efficiency');
title(sprintf('Efficiency Estimation based on the same %g local designs',N));

Contact us at files@mathworks.com