function [outAOVII] = outAOVII(X,alpha)
% OUTAOVII Identification of outliers in a one-way analysis of variance model II (random effects).
% In a one-way analysis of variance random effects model one would expect that the measurements
% lie close together because the same quantity was measured and states that the class effects stem
% from a common source and therefore should not difer too much. In the random model some variation
% is allowed and the decision is assisted by the assumption of the distribution of the random class
% effects. But one can observe some type of 'outliers', that according to Barnett and Lewis (1994),
% it is an observation or subset of observations which appears to be inconsistent with the remainder
% of that set of data. In random effects model can be distinguished three types of outliers:
% 1. Within the classes
% 2. Within the random effects
% 3. Respect to scale (objects occupy the same relative positions in one
% measurement space as they do in the other)
% If the model is satisfied, these outliers are not likely to occur because of the light tails of the
% normal distribution and homoscedasticity, and it is considered to describe the ideal situation without
% outliers.
% Therefore, it is important to set up formal rules that identify these outliers. Wellmann and Gather
% (2003) provide rules and details of a robust procedure which involves the median-based estimators.
% Here, we developed a Matlab function that deals with such a fundamentals.
%
% **NOTE: For an unbalanced design we are still working on in order to
% implement the procedure to detect the class scale-outliers.
% Please, keep on date of the file updatings.**
%
% Syntax: function [outAOVII] = outAOVII(X,alpha)
%
% Inputs:
% X - data matrix (column 1= data;column 2=class code).
% alpha - significance (default = 0.05).
% Outputs:
% A complete summary of the identification of outliers
% - within the i-th class.
% - within the random effects.
% - as a scale-outlier class.
%
% Example: From Kreienbrock et al. (1999), for the intercomparisions of detectors of measurements
% of alpha-energy emitted by radioactive radon gas taken under identical conditions. Each
% detector supplies one measurement after praparation in a laboratory. Five laboratories
% took part each with five detectors. One would expect that the measurements lie close
% together because the same quantity was measured and the laboratories used the same
% standardized analytical technique. We are interested to identify with a significance of
% 0.1 any outliers.
%
% Laboratory
% -----------------------------------------
% 1 2 3 4 5
% -----------------------------------------
% 166 166 167 167 179
% 156 161 167 154 165
% 145 237 259 208 272
% 186 161 166 134 145
% 148 144 143 135 97
% -----------------------------------------
%
% Data matrix must be:
% X = [166 1;166 1;167 1;167 1;179 1;156 2;161 2;167 2;154 2;165 2;145 3;237 3;259 3;
% 208 3;272 3;186 4;161 4;166 4;134 4;145 4;148 5;144 5;143 5;135 5;97 5];
%
% Calling on Matlab the function:
% outAOVII(X,0.1)
%
% Answer is:
%
% It is a balanced AOV Model II (random effects) design.
%
% The number of classes are: 5
%
% With a given significance of: 0.10
%
% Identification of an observation to be an
% outlier within the ith class.
% ------------------------------------------
% Obs. Class
% ------------------------------------------
% 145.0000 3
% 272.0000 3
% 97.0000 5
% ------------------------------------------
%
% Identification of a random effect as
% outlier within the random effects.
% ---------------------------------------
% Class
% ---------------------------------------
% 3
% ---------------------------------------
%
% No scale-outlier class is identified.
%
%
% Created by A. Trujillo-Ortiz, R. Hernandez-Walls and A. Castro-Perez
% Facultad de Ciencias Marinas
% Universidad Autonoma de Baja California
% Apdo. Postal 453
% Ensenada, Baja California
% Mexico.
% atrujo@uabc.mx
%
% Copyright (C) October 2, 2005.
%
% To cite this file, this would be an appropriate format:
% Trujillo-Ortiz, A., R. Hernandez-Walls and A. Castro-Perez. (2005). outAOVII: Identification
% of outliers in a one-way analysis of variance model II (random effects). A MATLAB file.
% [WWW document]. URL http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=8819
%
% References:
% Barnett, V. and Lewis, T. (1994), Outliers in Statistical Data.
% New-York:John Wiley & Sons, Inc.
% Kreienbrock, L., Poffijn, A., Tirmache, M., Feider, M., Kies, A. and Darby, S. C. (1999),
% Intercomparisions of passive radon-detectors under field conditions in
% epidemiological studies. Health Physics, 76(5):558-563.
% Wellmann, J. and Gather, U. (2003), Identification of outliers in a one-way random effects
% model. Statistical Papers, 44(3):335-348.
%
if nargin < 2,
alpha = 0.05; %(default)
end;
if (alpha <= 0 | alpha >= 1)
fprintf('Warning: significance level must be between 0 and 1\n');
return;
end;
l = max(X(:,2)); %number of classes
%Analysis of Variance Procedure.
med=[];mad=[];nn=[];
indice = X(:,2);
for i = 1:l
Xe = find(indice==i);
eval(['X' num2str(i) '=X(Xe,1);']);
eval(['med' num2str(i) '=median(X' num2str(i) ');'])
eval(['mad' num2str(i) '=median(abs(X' num2str(i) ' - med' num2str(i) '));'])
eval(['n' num2str(i) '=length(X' num2str(i) ') ;'])
eval(['xmed= med' num2str(i) ';'])
eval(['xmad= mad' num2str(i) ';'])
eval(['xn= n' num2str(i) ';'])
med=[med xmed];mad=[mad xmad];nn=[nn xn];
end;
mm = median(med); %median of medians
r = abs(med-mm); %absolute residuals
c = 1.4826; %factor reciprocal to mad of the N(0,1)-distribution
l = length(med); %number of classes
%ms = n;
m = nn;
n = sum(nn);
if nn(i) ~= n/l,
disp('It is an unbalanced AOV Model II (random effects) design.')
ds = 1;
else
disp('It is a balanced AOV Model II (random effects) design.')
ds = 2;
end;
disp(' ')
fprintf('The number of classes are:%2i\n\n', l);
%Factors to asures that the MAD of m or l stochastically independent
%normally distributed random variables is an approximately unbiased
%estimator for the underlying standard deviation
EM = [];EL = [];
for i = 1:l,
if 2 >= m(i) <=9,
bm = [1.196 1.495 1.363 1.206 1.200 1.140 1.129 1.107];
y(i) = bm(m(i)-1);
em = c*y(i);
else (m(i) > 9);
bm(i) = m(i)/(m(i)-0.8);
em = c*bm(i);
end;
EM = [EM em];
end;
if 2 >= l <=9,
bl = [1.196 1.495 1.363 1.206 1.200 1.140 1.129 1.107];
y = bl(l-1);
el = c*y;
else (l > 9);
bl = l/(l-0.8);
el = c*bl;
end;
s = diag(mad'*EM); %Normalized MAD used to describe variation within the classes
eE = 0.9797+1.1188*(l-3.5592)/n; %factor to achieve that the estimator of
%the variance of the individual measurements error (s2E) becomes approximately
%unbiased
s2E = eE*median(s.^2);
eU = l/(l+1.56)*el^2; %factor to achieve that the mean of s2U is approximately
%equal to this variance in the balanced case
s2U = eU*median((med-mm).^2);
g = sqrt(s2U/s2E); %estimation of the ratio sU/sE; used to get the critical values
a = alpha;
if a == 0.01,
a = 1;
elseif a == 0.025,
a = 2;
elseif a == 0.05,
a = 3;
elseif a == 0.075,
a = 4;
elseif a == 0.1,
a = 5;
elseif a == 0.125,
a = 6;
elseif a == 0.15,
a = 7;
else a == 0.2,
a = 8;
end;
ll = length(med);
oddl = bitand(abs(ll),1);
if oddl == 1;
ll = 2;
else
ll = 1;
end;
l = length(med);
ms = m;
oddm = [];
for i = 1:l,
odd = bitand(abs(m(i)),1);
if odd == 1;
ms = 2;
else
ms = 1;
end;
oddm = [oddm;ms];
end;
e=[];
for i = 1:l,
ee = [a ll oddm(i)];
e=[e;ee];
end;
%Table for approximation for CE(a,g)
MA = [43.2945 0.2168 0.0983;98.7835 0.0000 0.4675;
66.7645 1.6639 5.2218;149.7461 0.0000 0.0000;
28.7092 0.1976 0.0811;67.3591 0.0841 1.3156;
43.9021 0.4244 1.3694;90.4922 2.3905 499517.3;
20.0756 0.1701 0.0826;46.8162 0.1144 0.3953;
29.2568 0.2408 0.4617;59.8869 0.3051 6.7851;
15.1429 0.1546 0.0861;35.7368 0.1304 0.2639;
22.4798 0.1939 0.3233;45.2405 0.1852 1.9774;
11.8195 0.1424 0.0923;28.7363 0.1355 0.2151;
18.0670 0.1693 0.2694;36.2594 0.1606 0.9856;
9.4091 0.1316 0.1006;23.9408 0.1339 0.2078;
14.8461 0.1538 0.2436;29.9873 0.1542 0.6556;
7.5064 0.1224 0.1103;20.3330 0.1300 0.2105;
12.3927 0.1397 0.2351;25.3297 0.1469 0.5351;
4.5492 0.1062 0.1262;14.5664 0.1227 0.2069;
8.7006 0.1194 0.2333;18.6299 0.1342 0.4301];
rMA = [1 1 1;1 1 2;1 2 1;1 2 2;2 1 1;2 1 2;2 2 1;2 2 2;
3 1 1;3 1 2;3 2 1;3 2 2;4 1 1;4 1 2;4 2 1;4 2 2;5 1 1;
5 1 2;5 2 1;5 2 2;6 1 1;6 1 2;6 2 1;6 2 2;7 1 1;7 1 2;
7 2 1;7 2 2;8 1 1;8 1 2;8 2 1;8 2 2];
E = [];
for i = 1:l,
for j = 1:length(rMA),
if e(i,:) == rMA(j,:);
EE = j;
E = [E;EE];
end;
end;
end;
TE1 = [];
for i = 1:l,
te1 = MA(E(i),1);
TE1 = [TE1 te1];
end;
TE2 = [];
for i = 1:l,
te2 = MA(E(i),2);
TE2 = [TE2 te2];
end;
TE3 = [];
for i = 1:l,
te3 = MA(E(i),3);
TE3 = [TE3 te3];
end;
%Table for approximation for CU(a,g)
MB = [22.1191 1.7531 -0.8739;38.0453 2.5403 -1.0935;
14.7482 0.9919 -0.8284;19.1942 2.4546 -0.9435;
11.5456 -0.2643 -0.8212;10.9473 2.3549 -0.8344;
11.5518 -1.9997 -0.8608;7.6956 2.2635 -0.7732;
13.4682 -4.3361 -0.9282;5.8465 2.1881 -0.7278;
21.6681 -8.2832 -1.0666;4.6777 2.1073 -0.6938;
44.9717 -13.7962 -1.2527;3.8730 2.0134 -0.6676;
45.0000 -19.4365 -1.2523;2.8178 1.7854 -0.6282];
rMB = [1 1;1 2;2 1;2 2;3 1;3 2;4 1;4 2;5 1;5 2;
6 1;6 2;7 1;7 2;8 1;8 2];
u = [a ll];
for i = 1:length(rMB),
if rMB(i,:) == u;
U = i;
end;
end;
TU1 = MB(U,1);
TU2 = MB(U,2);
TU3 = MB(U,3);
a = alpha;
l = length(med);
n = sum(nn);
an = 1-(1-a)^(1/n); %probability to get an outlier observation, depending
%of a prespecified alpha-value
al = 1-(1-a)^(1/l); %probability to get an outlier random effect, depending
%of a prespecified alpha-value
cE=[];
for i = 1:l,
ce = norminv(1-an/2)+((l+TE1(i))/n)+(TE2(i)*(g/(g+TE3(i)))^2);
cE=[cE ce];
end;
%Identification of an observation to be a location-a_n-outlier within the
%ith class
u = [];
for i =1:l,
x = ((m(i)*s2U)/(s2E+(m(i)*s2U)))*(med(i)-mm);
u = [u x];
end;
OE = [];
for i = 1:l;
eval(['oe' num2str(i) '=abs(X' num2str(i) ' - mm - u(i));']);
eval(['xoe= oe' num2str(i) ';']);
OE = [OE;xoe];
end;
sdE = sqrt(s2E);
vE = cE*sdE;
OE = [OE X(:,2)];
C = X(:,2);
O1 = [];
indice = OE(:,2);
for i = 1:l
Oe = find(indice==i);
eval(['OE' num2str(i) '=OE(Oe,1);']);
eval(['C' num2str(i) '=OE(Oe,2);']);
eval(['o' num2str(i) '=find(OE' num2str(i) ' > vE(i));']);
eval(['xo= (X' num2str(i) '(o' num2str(i) '));']);
eval(['xi= (C' num2str(i) '(o' num2str(i) '));']);
if ~isempty(xo),
O1 = [O1;xo xi];
end;
end;
fprintf('With a given significance of: %.2f\n', alpha);
disp(' ')
disp('Identification of an observation to be an ')
disp('outlier within the ith class.')
fprintf('------------------------------------------\n');
disp(' Obs. Class')
fprintf('------------------------------------------\n');
fprintf('%12.4f%15i\n',[O1(:,1),O1(:,2)].');
fprintf('------------------------------------------\n');
cU = norminv(1-al/2)+(TU1*(l-TU2)^TU3); %within the random effects
%Identification of a random effect as location-a_l-outlier within the
%random effects
sdU = sqrt(s2U);
vU = cU*sdU;
O2 = [find(abs(u) > vU)]';
disp(' ')
disp('Identification of a random effect as ')
disp('outlier within the random effects.')
fprintf('---------------------------------------\n');
disp(' Class')
fprintf('---------------------------------------\n');
fprintf('%12i\n',[O2].');
fprintf('---------------------------------------\n');
disp(' ')
if ds==1;
m = m;
disp('NOTE: For an unbalanced design we are still working on in order to implement the procedure to detect the class scale-outliers.')
disp('Please, keep on date of the file updatings.')
else (ds==2);
m = sum(m)/l;
a = alpha;
if a == 0.01,
a = 1;
elseif a == 0.025,
a = 2;
elseif a == 0.05,
a = 3;
elseif a == 0.075,
a = 4;
elseif a == 0.1,
a = 5;
elseif a == 0.125,
a = 6;
elseif a == 0.15,
a = 7;
else a == 0.2,
a = 8;
end;
oddl = bitand(abs(l),1);
if oddl == 1;
l = 2;
else
l = 1;
end;
oddm = bitand(abs(m),1);
if oddm == 1;
m = 2;
else
m = 1;
end;
%Table for approximation for CS(a,g)
MC = [0.4477 15.8189 2.5456 0.1735;0.4477 15.3729 2.3991 -0.9559;
0.4125 14.4887 3.1908 0.2629;0.4125 14.0051 3.0530 -0.9224;
0.3832 13.5444 3.9216 0.3426;0.3832 12.9677 3.7308 -0.8865;
0.3663 12.9185 4.4797 0.3627;0.3663 12.3486 4.2342 -0.8680;
0.3532 12.4912 4.9454 0.3879;0.3532 11.9512 4.7216 -0.8481;
0.3426 12.2155 5.4184 0.4213;0.3426 11.6554 5.1678 -0.8325;
0.3336 11.9883 5.8711 0.4410;0.3336 11.4529 5.6309 -0.8162;
0.3185 11.6439 6.6663 0.5010;0.3185 11.0477 6.4034 -0.7937];
rMC = [1 1;1 2;2 1;2 2;3 1;3 2;4 1;4 2;5 1;5 2;
6 1;6 2;7 1;7 2;8 1;8 2];
s = [a m];
for i = 1:length(rMC),
if rMC(i,:) == s;
S = i;
end;
end;
TS1 = MC(S,1);
TS2 = MC(S,2);
TS3 = MC(S,3);
TS4 = MC(S,4);
cS = TS1+((TS2*l)/((l+TS3)*(m+TS4))); %of a class as scale
%Identification of a class as scale-a_l-outlier
s = mad*em;
h2 = abs(log(s)-log(sdE));
h2 = find(h2 > cS);
XX2 = X(:,2);
o3 = XX2(h2);
if isempty(h2) == 1,
disp('No scale-outlier class is identified.');
else
disp('The follow class(es) is(are) identified as scale-outlier(s):');
o3
end;
end;
hold on
l = max(X(:,2));
m = sum(nn)/l;
plot(X(:,2),X(:,1),'*r',O1(:,2),O1(:,1),'sb');
axis([0 (1+l) (min(X(:,1))-min(X(:,1))*.7) (max(X(:,1))+max(X(:,1))*.3)])
legend(['+',[' ' 2] ' = Outlier(s) '],0);
xlabel('Class');
ylabel('Measurement');
if ds==1;
st = (['Classes = ',num2str(l) ', ' 'alpha-value = ',num2str(alpha)]);
title({'\fontsize{14}' 'Identification of Outliers in a One-Way ANOVA Random Effects Model: It is an unbalanced design.' num2str(st) ''});
else (ds==2);
st = (['Classes = ',num2str(l) ', ' 'Sizes = ',num2str(m) ', ' 'alpha-value = ',num2str(alpha)]);
title({'\fontsize{14}' 'Identification of Outliers in a One-Way ANOVA Random Effects Model: It is a balanced design.' num2str(st) ''});
end;
return;