Code covered by the BSD License

Bartregress

Antonio Trujillo-Ortiz (view profile)

16 Jun 2010 (Updated )

Bartlett's Three Group Regression.

Bartregress.m
```function [b,bint,n] = Bartregress(x,y,alpha)
%BARTREGRESS Bartlett's Three Group Regression.
%   Model II regression should be used when the two variables in the
%   regression equation are random and subject to error, i.e. not
%   controlled by the researcher. Model I regression using ordinary least
%   squares underestimates the slope of the linear relationship between the
%   variables when they both contain error. According to Sokal and Rohlf
%   (1995), the subject of Model II regression is one on which research and
%   controversy are continuing and definitive recommendations are difficult
%   to make.
%
%   BARTREGRESS is a Model II procedure. It is an approach to estimate the
%   functional relationship in Model II by estimating the slope from
%   segments of the sample arrayed by magnitude of the x variate. Ricker
%   (1973) argues that this method yield biased estimates of functional
%   relationships. Its main handicap is that the regression lines are not
%   the same depending on whether the grouping (into three groups) is made
%   based on x or y. The regression line is not guaranteed to pass through
%   the centroid of the scatter of points and the slope estimator is not
%   symmetric. However, here it is developed the computational procedure to
%   all those people interested on it.
%
%   [B,BINT] = BARTREGRESS(X,Y,ALPHA) returns the vector B of
%   regression coefficients in the linear Model, a matrix BINT of the given
%   confidence intervals for B, and the N three groups size vector.
%
%   BARTREGRESS treats NaNs in X or Y as missing values, and removes them.
%
%   Syntax: [b,bint,n] = Bartregress(x,y,alpha)
%
%   Example. From the Box 14.12 (California fish cabezon [Scorpaenichthys
%   marmoratus]) of Sokal and Rohlf (1995). The data are:
%
%   x=[14,17,24,25,27,33,34,37,40,41,42];
%   y=[61,37,65,69,54,93,87,89,100,90,97];
%
%   Calling on Matlab the function:
%                [b,bint,n] = Bartregress(x,y)
%
%
%   b =
%     21.8909    1.8000
%
%   bint =
%      0.8913    2.5851
%     -1.9484   49.4817
%
%   n =
%      4     3     4
%
%   Created by A. Trujillo-Ortiz and R. Hernandez-Walls
%             Universidad Autonoma de Baja California
%             Apdo. Postal 453
%             Mexico.
%             atrujo@uabc.edu.mx
%
%   Copyright (C)  June 7, 2010.
%
%   To cite this file, this would be an appropriate format:
%   Trujillo-Ortiz, A. and R. Hernandez-Walls. (2010). Bartregress:
%      Bartletts Three Group Regression. A MATLAB file. [WWW document]. URL
%      http://www.mathworks.com/matlabcentral/fileexchange/27919-bartregress
%
%   References:
%   Sokal, R. R. and Rohlf, F. J. (1995), Biometry. The principles and
%              practice of the statistics in biologicalreserach. 3rd. ed.
%              New-York:W.H.,Freeman. [Sections 14.13 and 15.7]
%

if  nargin < 2
error('Bartregress:TooFewInputs', ...
'BARTREGRESS requires at least two input arguments.');
elseif nargin == 2
alpha = 0.05;
end

x = x(:); y = y(:);

% Check that matrix (X) and rigth hand side (Y) have compatible dimensions
[n,ncolx] = size(x);
if ~isvector(y)
error('Bartregress:InvalidData', 'Y must be a vector.');
elseif numel(y) ~= n
error('Bartregress:InvalidData', ...
'The number of rows in Y must equal the number of rows in X.');
end

% Remove missing values, if any
wasnan = (isnan(y) | any(isnan(x),2));
havenans = any(wasnan);
if havenans
y(wasnan) = [];
x(wasnan,:) = [];
n = length(y);
end

N = n;
A = [x,y];
r = n/3;
B = A';
C = sortrows(B')';
mX = mean(C(1,:)); %mean of X total data
mY = mean(C(2,:)); %%mean of Y total data

n1 = length(1:fix(r)+1);
n2 = length(fix(r)+2:n-fix(r)-1);
n3 = length(n-fix(r):n);
mX1 = mean(C(1,1:fix(r)+1)); %mean of X first third
mY1 = mean(C(2,1:fix(r)+1)); %mean of Y first third
sX1 = sum(C(1,1:fix(r)+1));
sY1 = sum(C(2,1:fix(r)+1));
sX2 = sum(C(1,fix(r)+2:n-fix(r)-1));
sY2 = sum(C(2,fix(r)+2:n-fix(r)-1));
mX3 = mean(C(1,n-fix(r):n)); %mean of X last third
mY3 = mean(C(2,n-fix(r):n)); %mean of X last third
sX3 = sum(C(1,n-fix(r):n));
sY3 = sum(C(2,n-fix(r):n));
x2k = (sX1^2/n1)+(sX2^2/n2)+(sX3^2/n3);
y2k = (sY1^2/n1)+(sY2^2/n2)+(sY3^2/n3);
xyk = (sX1*sY1/n1)+(sX2*sY2/n2)+(sX3*sY3/n3);

bp = (mY3-mY1)/(mX3-mX1); %slope
ap = mY-bp*mX; %intercept
b = [ap bp];
n = [n1 n2 n3];

S = cov(x,y);
x2 = (N-1)*S(1,1)+(N*mX^2);
y2 = (N-1)*S(2,2)+(N*mY^2);
xy = (N-1)*S(1,2)+(N*mX*mY);
SCx = x2-x2k;
SCy = y2-y2k;
SCxy = xy-xyk;

t = tinv(1-(alpha/2),N-length(n));
C = n1*(mX3-mX1)^2*(N-3)/(2*t^2);
B = (bp*C)-SCxy;
E = sqrt((C*(((bp^2)*SCx)+SCy-(2*bp*SCxy)))-(SCx*SCy)-SCxy^2);
F = C-SCx;
b1 = (B-E)/F;
b2 = (B+E)/F;
a1 = mY-b2*mX;
a2 = mY-b1*mX;
bint = [b1,b2;a1,a2];

return,```