function [EpsHF EpsList EpsGG]=GenCalcHFEps(Y,BTFacs,WInFacs,S)
%function [EpsHF EpsList EpsGG]=GenCalcHFEps(Y,BTFacs,WInFacs,S)
%
% This will calculate the Geisser-Greenhouse and Huynh-Feldt epsilon value
% for the general case when given a univariate dataset with any amount of
% between or within subject factors.
%
% After calling this function, the intention is to multiply the degrees of
% freedom for tests in a repeated measures ANOVA by the corresponding
% epsilon value given by this code to obtain a corrected p-value. The
% F-statistic does not change as a result of this.
%
% Inputs:
% Y- A column vector of the dependent variable (the value
% measured) for each data point.
% BTFacs- A matrix of between subject factors. Must have the same
% number of rows as Y. Each factor is down a different column
% of BTFacs, and the value in each row denotes the level of
% that factor for each corresponding datapoint in Y. If there
% are no between subject factors, input an empty matrix, [].
% WInFacs- A matrix of within subject factors. Must have the same
% number of rows as Y. Each factor is down a different column
% of WInFacs, and the value in each row denotes the level of
% that factor for each corresponding datapoint in Y.
% S- A column vector of subject numbers corresponding to each
% datapoint in Y. Must have the same number of rows as Y. If
% left empty, the program assumes all subjects are entered in
% the same order for all combinations of factors.
%
% Outputs:
% EpsHF- A row vector of all the possible values for the Huynh-Feldt
% epsilon, correspondding to all main effects for ecah within
% subject factor, and all interactions of within subject
% factors.
% EpsList- A cell array of a text list of the effects corresponding to
% the positions in EpsHF. For example 'A' means a main effect
% for the first (within subject) factor, 'AB' means a 2way
% interaction for the first two (within subject) factors,
% etc.
% EpsGG- The Geisser-Greenhouse epsilon values, in teh same form as
% EpsHF.
%
% The Huynh-Feldt epsilon value is less conservative while maintaining the
% proper Type I error rate, which is why the program title focuses on that.
% The Geisser-Greenhouse epsilon is calculated first as part of the
% process of calculating the Huynh-Feldt value, so I've allowed it to be
% optionally returned as well.
%
% This code follows a procedure described in Huynh(1978), but includes a
% modification of the last step, as described in Chen & Dunlap(1994).
%
% I've tested this with results from SPSS for a couple of sample datasets
% I've found in various websites, and it gives results that match
% According to what I've seen and read though, SPSS and SAS give results
% for EpsHF that are wrong when both between and with-in factors are
% present, and thus they slightly differ from the results this program,
% although the results for EpsGG are identical. To reproduce the SPSS
% results, uncomment line 153. See other comments near there for references
% on why I think SPSS/SAS is wrong.
%
%
% References:
%
% Huynh H. "Some approximate tests for repeated measurement designs",
% Psychometrika (1978)
%
% Chen, RS and Dunlap, WP "A MonteCarlo STudy on the Performance of a
% Corrected Formula for eps(tilda) suggested by Lecoutre", Journal of
% Educational Statistics (1994)
%
% written on 090202 by Matthew Nelson
if nargin<2 || isempty(BTFacs);
BT.nFacs=1;
BT.nTreats=1;
g=1;
else
BT.Facs=BTFacs;
BT.nFacs=size(BTFacs,2);
BT.GNums=cell(1,BT.nFacs);
BT.nTreats=repmat(0,1,BT.nFacs);
%BT.dfs=BT.nTreats;
for iBF=1:BT.nFacs
BT.GNums{iBF}=unique(BT.Facs(:,iBF));
BT.nTreats(iBF)=length(BT.GNums{iBF});
end
%BTdfs=BTnTreats-1; %I don't think this is ever used...
g=prod(BT.nTreats);
end
if nargin<3 || isempty(WInFacs);
disp('In GenCalcHFEps- no Within Factors entered; leaving without calculating any epsilons')
return
else
WIn.Facs=WInFacs;
WIn.nFacs=size(WIn.Facs,2);
WIn.GNums=cell(1,WIn.nFacs);
WIn.nTreats=repmat(0,1,WIn.nFacs);
%WIn.dfs=WIn.nTreats;
WIn.FacMult=WIn.nTreats;
for iBF=1:WIn.nFacs
WIn.GNums{iBF}=unique(WIn.Facs(:,iBF));
WIn.nTreats(iBF)=length(WIn.GNums{iBF});
end
%WIn.dfs=WIn.nTreats-1; %I don't think this is ever used...
WIn.nCombs=prod(WIn.nTreats);
%Calc FactMult for determining curComb later
for iFac=1:WIn.nFacs-1
WIn.FacMult(iFac)=prod(WIn.nTreats(iFac+1:end));
end
WIn.FacMult(end)=1;
end
if nargin<4 || isempty(S)
S=[];
N=length(Y)/ (WIn.nCombs*prod(BT.nTreats)); %if S is not input, we assume that there is data for each subject in each cell...
else N=length(unique(S))/prod(BT.nTreats); %divide the num of unique subjects by the num of BT subj treatments to get the N for each ind. cell
end
%Now Calc Z (SSP) matrix using recursive loops
Z=repmat(0,WIn.nCombs,WIn.nCombs);
Z=BTRLoop(1, repmat(1,length(Y),1),Z, N,Y,BT,WIn,S );
%Calc M matrices using other program
[M EpsList]=GenOrthogComps(WIn.nTreats);
%combine M and Z to calc epsilons
EpsHF=repmat(0,1,length(M));
EpsGG=EpsHF;
%calc totN, used for HF calc below
totN=N*prod(BT.nTreats);
for im=1:length(M)
S=M{im}*Z*M{im}';
r=length(S);
%calc EpsGG
NumSS=0;
for is1=1:r
NumSS=NumSS+S(is1,is1);
end
DenSS=0;
for is1=1:r
for is2=1:r
DenSS=DenSS+S(is1,is2)^2;
end
end
EpsGG(im)=(NumSS^2)/(r*DenSS);
%Calc EpsHF
EpsHF(im)=( (totN-g+1)*r*EpsGG(im)-2)/( r*(totN-g-r*EpsGG(im)) );
%EpsHF(im)=( N*r*EpsGG(im)-2)/( r*(N-g-r*EpsGG(im)) ); %I have observed that to match SAS and SPSS, you would want to use this line of code instead of the line above it
%BUT... According to what I've read, SPSS and SAS are wrong in this respect...
%see: http://archives.devshed.com/forums/development-94/huynh-feldt-r-vs-sas-bug-293023.html
% and: Chen, RS and Dunlap, WP "A MonteCarlo STudy on teh Performance of a Corrected Formula for eps suggested by Lecoutre",
% Journal of Educational Statistics (1994)
end
%these vals can't be more than 1
EpsGG=min(EpsGG,1);
EpsHF=min(EpsHF,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Z=BTRLoop(curFac,curSelInds,Z, N,Y,BT,WIn,S )
for iT=1:BT.nTreats(curFac)
if BT.nTreats(curFac)==1 && BT.nFacs==1
nextSelInds=curSelInds;
else
nextSelInds=curSelInds & BT.Facs(:,curFac)==BT.GNums{curFac}(iT);
end
if curFac==BT.nFacs %for DevMat for FacLevList
DevMat= WInRLoop(1, nextSelInds, repmat(0,WIn.nCombs,N),repmat(0,1,WIn.nFacs), N,Y,WIn,S );
Z=Z+DevMat*DevMat';
else
Z=BTRLoop(curFac+1, nextSelInds,Z ,N,Y,BT,WIn,S );
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function DevMat=WInRLoop(curFac,curSelInds,DevMat,FacLevList, N,Y,WIn,S )
%if curFac==1; DevMat=repmat(0,WIn.nCombs,N); end
for iT=1:WIn.nTreats(curFac)
FacLevList(curFac)=iT;
if curFac==WIn.nFacs
finSelInds = curSelInds & WIn.Facs(:,curFac)==WIn.GNums{curFac}(iT);
curComb=sum( (FacLevList-1).*WIn.FacMult )+1;
if isempty(S)
DevMat(curComb,:)=Y(finSelInds)-mean(Y(finSelInds)); %works if subjects are all input in the same order...
else
[junk finSelInds2]=sort(S(finSelInds)); %still assuming one val for each subj, but not assuming that they've been input in teh proper order... NOTE- this could be changed later if it'es ever needed to account for more than one val per subject...
tmpInds=find(finSelInds);
DevMat(curComb,:)=Y(tmpInds(finSelInds2))-mean(Y(finSelInds));
end
else
DevMat=WInRLoop(curFac+1, curSelInds & WIn.Facs(:,curFac)==WIn.GNums{curFac}(iT) , DevMat,FacLevList, N,Y,WIn,S );
end
end