function STATS=kwtest(x)
%Kruskal-Wallis test for the non parametric ANOVA
%In statistics, the Kruskal–Wallis one-way analysis of variance by ranks (named
%after William Kruskal and W. Allen Wallis) is a non-parametric method for
%testing equality of population medians among groups. It is identical to a
%one-way analysis of variance with the data replaced by their ranks. It is an
%extension of the Mann–Whitney U test to 3 or more groups. Since it is a
%non-parametric method, the Kruskal–Wallis test does not assume a normal
%population, unlike the analogous one-way analysis of variance. However, the
%test does assume an identically-shaped and scaled distribution for each group,
%except for any difference in medians. The exact distribution of the
%Kruskal-Wallis statistics is very time, space and memory expensive, so it is
%approximated using other distribution.
%The MatLab function KRUSKALWALLIS only uses the chi-square distribution that
%is the most conservative (and this means that it accepts the H0 hypothesis more
%than you want). This function computes also the F, Beta and Gamma
%approximations (the F distribution is the less conservative and this means that
%it refuses the H0 more than you want), giving a more informative output (if you
%want, see http://www.jmu.edu/assessment/JPM%20AERA%20SP%2008.pdf). If you
%believe that the p-value you choose is smaller than your cut-off (usually
%0.05), you can use my function Dunn-Sidak to isolate the differences among
%groups (http://www.mathworks.com/matlabcentral/fileexchange/12827).
%
% Syntax: STATS=kwtest(X)
%
% Inputs:
% X - data matrix (Size of matrix must be n-by-2; data=column 1, group=column 2).
%
% Outputs:
% - Statistics of each group (samples, median, sum of ranks and mean rank)
% - Correction factor for ties and H statistics
% - Chi-square approximation
% - F approximation
% - Beta approximation
% - Gamma approximation
% If STATS nargout was specified the results will be stored in the STATS
% struct.
%
% Example:
%
% Sample
% -------------------------
% 1 2 3
% -------------------------
% 7.89 8.84 8.65
% 9.16 9.92 10.70
% 7.34 7.20 10.24
% 10.28 9.25 8.62
% 9.12 9.45 9.94
% 9.24 9.14 10.55
% 8.40 9.99 10.13
% 8.60 9.21 9.78
% 8.04 9.06 9.01
% 8.45
% 9.51
% 8.15
% 7.69
% -------------------------
%
% Data matrix must be:
% x=[7.79 9.16 7.64 10.28 9.12 9.24 8.40 8.60 8.04 8.45 9.51 8.15 7.69 8.84 9.92 7.20 9.25 9.45 9.14 9.99 9.21 9.06 8.65 10.70 10.24 8.62 9.94 10.55 10.13 9.78 9.01;
% repmat(1,1,13) repmat(2,1,9) repmat(3,1,9)]'
%
% Calling on Matlab the function: kwtest(x)
%
% Answer is:
%
% KRUSKAL-WALLIS TEST
% --------------------------------------------------------------------------------
% Group Sample Median Ranks sum Mean rank
% --------------------------------------------------------------------------------
% 1 13 8.4500 146.0000 11.2308
% 2 9 9.2100 152.0000 16.8889
% 3 9 9.9400 198.0000 22.0000
% --------------------------------------------------------------------------------
% Correction factor for ties: 0.0000 H: 7.5823
% --------------------------------------------------------------------------------
%
% Chi-square approximation (the most conservative)
% --------------------------------------------------------------------------------
% chi-square=H df=2 - p-value (2 tailed): 0.0226
% --------------------------------------------------------------------------------
%
% F-statistic approximation (the less conservative)
% --------------------------------------------------------------------------------
% F=5.0734 df-num=2 df-denom=29 - p-value (2 tailed): 0.0135
% --------------------------------------------------------------------------------
%
% Beta distribution approximation
% --------------------------------------------------------------------------------
% B=0.2878 alpha=0.9438 beta=11.4893 - p-value (2 tailed): 0.0181
% --------------------------------------------------------------------------------
%
% Gamma distribution approximation
% --------------------------------------------------------------------------------
% G=H alpha=1.1035 beta=1.8124 - p-value (2 tailed): 0.0190
% --------------------------------------------------------------------------------
%
% Created by Giuseppe Cardillo
% giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:
% Cardillo G. (2009). KWTEST: Kruskal-Wallis non parametric test for ANalysis Of VAriance
% http://www.mathworks.com/matlabcentral/fileexchange/25860
%Input Error handling
if isempty(x)
error('Warning: X matrix is empty...')
end
if isvector(x)
error('Warning: x must be a matrix, not a vector.');
end
if ~all(isfinite(x(:))) || ~all(isnumeric(x(:)))
error('Warning: all X values must be numeric and finite')
end
%check if x is a Nx2 matrix
if size(x,2) ~= 2
error('Warning: KWTEST requires a Nx2 input matrix')
end
%check if x(:,2) are all whole elements
if ~all(x(:,2) == round(x(:,2)))
error('Warning: all elements of column 2 of input matrix must be whole numbers')
end
tr=repmat('-',1,80);% set divisor
disp('KRUSKAL-WALLIS TEST')
disp(tr)
fprintf('Group\tSample\tMedian\tRanks sum\tMean rank\n')
disp(tr)
N=size(x,1);%total observation
%One first ranks all the observations without regard for which treatment group
%they are in.
[R,T]=tiedrank(x(:,1));
%Next, compute the ranks sum for each group.
k=max(x(:,2)); %number of groups
KW=zeros(4,k); %matrix preallocation
for I=1:k
KW(1,I)=length(x(x(:,2)==I)); %elements of the i-th group
KW(2,I)=median(x(x(:,2)==I),1); %median of the i-th group
KW(3,I)=sum(R(x(:,2)==I)); %the ranks sum
KW(4,I)=KW(3,I)/KW(1,I); %the mean rank
fprintf('%u\t%u\t%0.4f\t%0.4f\t%0.4f\n',I,KW(1,I),KW(2,I),KW(3,I),KW(4,I))
end
disp(tr)
%the mean rank computed when tratment has no effect.
Rbar=(N+1)/2;
%We will use the sum of squared deviations between each sample group's average
%rank and the overall average rank, weighted by the sizes of each group, as a
%measure of variability between the observations and what you would expect if
%the hypothesis of no treatment effect was true. Call this sum D.
D=sum(KW(1,:).*(KW(4,:)-Rbar).^2);
Hbiased=12*D/N/(N+1); %Kruskal-Wallis statistic uncorrected for ties
if T==0 %if there are not ties
CF=0;
H=Hbiased;
else
CF=1-2*T/N/(N^2-1); %correction factor for ties
H=Hbiased/CF;
end
fprintf('Correction factor for ties: %0.4f\tH: %0.4f\n',CF,H)
disp(tr); disp(' ')
df=k-1; %degrees of freedom
fprintf('Chi-square approximation (the most conservative)\n')
disp(tr)
P1=1-chi2cdf(H,df); %probability associated to the Chi-squared-statistic.
fprintf('chi-square=H df=%u - p-value (2 tailed): %0.4f\n',df,P1)
disp(tr); disp(' ')
fprintf('F-statistic approximation (the less conservative)\n')
disp(tr)
dfd=N-df; %degrees of freedom for denominator
F=((dfd+1)*H)/((k-1)*(N-1-H)); %F-statistic approximation.
P2=1-fcdf(F,df,N-k-1); %probability associated to the F-statistic.
fprintf('F=%0.4f df-num=%u df-denom=%u - p-value (2 tailed): %0.4f\n',F,df,dfd,P2)
disp(tr); disp(' ')
m=k-1; %the expected value
s2=2*df-2*(3*k^2-6*k+N*(2*k^2-6*k+1))/(5*N*(N+1))-(6/5*sum(1./KW(1,:))); %variance
eta=(N^3-sum(KW(1,:).^3))/N/(N+1); %maximum value of H
fprintf('Beta distribution approximation\n')
disp(tr)
B=H/eta;
alpha1=m*((m*(eta-m)-s2)/(eta*s2));
beta1=alpha1*((eta-m)/m);
P3=1-betacdf(B,alpha1,beta1); %probability associated to the Beta distribution.
fprintf('B=%0.4f alpha=%0.4f beta=%0.4f - p-value (2 tailed): %0.4f\n',B,alpha1,beta1,P3)
disp(tr); disp(' ')
fprintf('Gamma distribution approximation\n')
disp(tr)
alpha2=m^2/s2;
beta2=s2/m;
P4=1-gamcdf(H,alpha2,beta2); %probability associated to the Gamma Distribution.
fprintf('G=H alpha=%0.4f beta=%0.4f - p-value (2 tailed): %0.4f\n',alpha2,beta2,P4)
disp(tr)
if nargout
STATS.Hbiased=Hbiased;
STATS.CF=CF;
STATS.H=H;
STATS.Chi_square.chi2=H;
STATS.Chi_square.df=df;
STATS.Chi_square.pvalue=P1;
STATS.F.F=F;
STATS.F.dfn=df;
STATS.F.dfd=dfd;
STATS.F.pvalue=P2;
STATS.Beta.m=m;
STATS.Beta.s2=s2;
STATS.Beta.eta=eta;
STATS.Beta.B=B;
STATS.Beta.alpha=alpha1;
STATS.Beta.beta=beta1;
STATS.Beta.pvalue=P3;
STATS.Gamma.m=m;
STATS.Gamma.s2=s2;
STATS.Gamma.G=H;
STATS.Gamma.alpha=alpha2;
STATS.Gamma.beta=beta2;
STATS.Gamma.pvalue=P4;
end