function [b,bint,n] = Bartregress(x,y,alpha)
%BARTREGRESS Bartlett's Three Group Regression.
% Model II regression should be used when the two variables in the
% regression equation are random and subject to error, i.e. not
% controlled by the researcher. Model I regression using ordinary least
% squares underestimates the slope of the linear relationship between the
% variables when they both contain error. According to Sokal and Rohlf
% (1995), the subject of Model II regression is one on which research and
% controversy are continuing and definitive recommendations are difficult
% to make.
%
% BARTREGRESS is a Model II procedure. It is an approach to estimate the
% functional relationship in Model II by estimating the slope from
% segments of the sample arrayed by magnitude of the x variate. Ricker
% (1973) argues that this method yield biased estimates of functional
% relationships. Its main handicap is that the regression lines are not
% the same depending on whether the grouping (into three groups) is made
% based on x or y. The regression line is not guaranteed to pass through
% the centroid of the scatter of points and the slope estimator is not
% symmetric. However, here it is developed the computational procedure to
% all those people interested on it.
%
% [B,BINT] = BARTREGRESS(X,Y,ALPHA) returns the vector B of
% regression coefficients in the linear Model, a matrix BINT of the given
% confidence intervals for B, and the N three groups size vector.
%
% BARTREGRESS treats NaNs in X or Y as missing values, and removes them.
%
% Syntax: [b,bint,n] = Bartregress(x,y,alpha)
%
% Example. From the Box 14.12 (California fish cabezon [Scorpaenichthys
% marmoratus]) of Sokal and Rohlf (1995). The data are:
%
% x=[14,17,24,25,27,33,34,37,40,41,42];
% y=[61,37,65,69,54,93,87,89,100,90,97];
%
% Calling on Matlab the function:
% [b,bint,n] = Bartregress(x,y)
%
% Answer is:
%
% b =
% 21.8909 1.8000
%
% bint =
% 0.8913 2.5851
% -1.9484 49.4817
%
% n =
% 4 3 4
%
% Created by A. Trujillo-Ortiz and R. Hernandez-Walls
% Facultad de Ciencias Marinas
% Universidad Autonoma de Baja California
% Apdo. Postal 453
% Ensenada, Baja California
% Mexico.
% atrujo@uabc.edu.mx
%
% Copyright (C) June 7, 2010.
%
% To cite this file, this would be an appropriate format:
% Trujillo-Ortiz, A. and R. Hernandez-Walls. (2010). Bartregress:
% Bartletts Three Group Regression. A MATLAB file. [WWW document]. URL
% http://www.mathworks.com/matlabcentral/fileexchange/27919-bartregress
%
% References:
% Sokal, R. R. and Rohlf, F. J. (1995), Biometry. The principles and
% practice of the statistics in biologicalreserach. 3rd. ed.
% New-York:W.H.,Freeman. [Sections 14.13 and 15.7]
%
if nargin < 2
error('Bartregress:TooFewInputs', ...
'BARTREGRESS requires at least two input arguments.');
elseif nargin == 2
alpha = 0.05;
end
x = x(:); y = y(:);
% Check that matrix (X) and rigth hand side (Y) have compatible dimensions
[n,ncolx] = size(x);
if ~isvector(y)
error('Bartregress:InvalidData', 'Y must be a vector.');
elseif numel(y) ~= n
error('Bartregress:InvalidData', ...
'The number of rows in Y must equal the number of rows in X.');
end
% Remove missing values, if any
wasnan = (isnan(y) | any(isnan(x),2));
havenans = any(wasnan);
if havenans
y(wasnan) = [];
x(wasnan,:) = [];
n = length(y);
end
N = n;
A = [x,y];
r = n/3;
B = A';
C = sortrows(B')';
mX = mean(C(1,:)); %mean of X total data
mY = mean(C(2,:)); %%mean of Y total data
n1 = length(1:fix(r)+1);
n2 = length(fix(r)+2:n-fix(r)-1);
n3 = length(n-fix(r):n);
mX1 = mean(C(1,1:fix(r)+1)); %mean of X first third
mY1 = mean(C(2,1:fix(r)+1)); %mean of Y first third
sX1 = sum(C(1,1:fix(r)+1));
sY1 = sum(C(2,1:fix(r)+1));
sX2 = sum(C(1,fix(r)+2:n-fix(r)-1));
sY2 = sum(C(2,fix(r)+2:n-fix(r)-1));
mX3 = mean(C(1,n-fix(r):n)); %mean of X last third
mY3 = mean(C(2,n-fix(r):n)); %mean of X last third
sX3 = sum(C(1,n-fix(r):n));
sY3 = sum(C(2,n-fix(r):n));
x2k = (sX1^2/n1)+(sX2^2/n2)+(sX3^2/n3);
y2k = (sY1^2/n1)+(sY2^2/n2)+(sY3^2/n3);
xyk = (sX1*sY1/n1)+(sX2*sY2/n2)+(sX3*sY3/n3);
bp = (mY3-mY1)/(mX3-mX1); %slope
ap = mY-bp*mX; %intercept
b = [ap bp];
n = [n1 n2 n3];
S = cov(x,y);
x2 = (N-1)*S(1,1)+(N*mX^2);
y2 = (N-1)*S(2,2)+(N*mY^2);
xy = (N-1)*S(1,2)+(N*mX*mY);
SCx = x2-x2k;
SCy = y2-y2k;
SCxy = xy-xyk;
t = tinv(1-(alpha/2),N-length(n));
C = n1*(mX3-mX1)^2*(N-3)/(2*t^2);
B = (bp*C)-SCxy;
E = sqrt((C*(((bp^2)*SCx)+SCy-(2*bp*SCxy)))-(SCx*SCy)-SCxy^2);
F = C-SCx;
b1 = (B-E)/F;
b2 = (B+E)/F;
a1 = mY-b2*mX;
a2 = mY-b1*mX;
bint = [b1,b2;a1,a2];
return,