Code covered by the BSD License

# MyRegression

### Giuseppe Cardillo (view profile)

02 Jul 2007 (Updated )

A simple function on LS linear regression with many informative outputs

[slope,intercept,STAT]=myregr(x,y,verbose)
```function [slope,intercept,STAT]=myregr(x,y,verbose)
%MYREGR: Perform a least-squares linear regression.
%This function computes a least-square linear regression suppling several
%output information.
%
% Syntax: 	myregr(x,y)
%
%     Inputs:
%           X - Array of the independent variable
%           Y - Dependent variable. If Y is a matrix, the i-th Y row is a
%           repeated measure of i-th X point. The mean value will be used
%           verbose - Flag to display all information (default=1)
%     Outputs:
%           - Slope with standard error an 95% C.I.
%           - Intercept with standard error an 95% C.I.
%           - Pearson's Correlation coefficient with 95% C.I. and its
%             adjusted form (depending on the elements of X and Y arrays)
%           - Spearman's Correlation coefficient
%           - Regression Standard Error
%           - Total Variability
%           - Variability due to regression
%           - Residual Variability
%           - Student's t-Test on Slope (to check if slope=0)
%           - Student's t-Test on Intercept (to check if intercept=0)
%           - Power of the regression
%           - Modified Levene's test for homoschedasticity of residuals
%           - a plot with:
%                o Data points
%                o Least squares regression line
%                o Red dotted lines: 95% Confidence interval of regression
%                o Green dotted lines: 95% Confidence interval of new y
%                                       evaluation using this regression.
%           - Residuals plot
%
%   [Slope]=myregr(...) returns a structure of slope containing value, standard
%   error, lower and upper bounds 95% C.I.
%
%   [Slope,Intercept]=myregr(...) returns a structure of slope and intercept
%   containing value, standard error, lower and upper bounds 95% C.I.
%
% Example:
%       x = [1.0 2.3 3.1 4.8 5.6 6.3];
%       y = [2.6 2.8 3.1 4.7 4.1 5.3];
%
%   Calling on Matlab the function:
%             myregr(x,y)
%
%
%                         Slope
% -----------------------------------------------------------
%      Value          S.E.                95% C.I.
% -----------------------------------------------------------
%    0.50107        0.09667        0.23267        0.76947
% -----------------------------------------------------------
%
%                       Intercept
% -----------------------------------------------------------
%      Value          S.E.                95% C.I.
% -----------------------------------------------------------
%    1.83755        0.41390        0.68838        2.98673
% -----------------------------------------------------------
%
%             Pearson's Correlation Coefficient
% -----------------------------------------------------------
% -----------------------------------------------------------
%    0.93296        0.49988        0.99281        0.91620
% -----------------------------------------------------------
% Spearman's Correlation Coefficient: 0.9429
%
%                       Other Parameters
% ------------------------------------------------------------------------
%      R.S.E.                         Variability
%      Value        Total            by regression           Residual
% ------------------------------------------------------------------------
%    0.44358        6.07333        5.28627 (87.0407%)     0.78706 (12.9593%)
% ------------------------------------------------------------------------
%
% Student's t-test on slope=0
% ----------------------------------------------------------------
% t = 5.1832    Critical Value = 2.7764     p = 0.0066
% Test passed: slope ~= 0
% ----------------------------------------------------------------
%
% Student's t-test on intercept=0
% ----------------------------------------------------------------
% t = 4.4396    Critical Value = 2.7764     p = 0.0113
% Test passed: intercept ~= 0
% ----------------------------------------------------------------
%
% Power of regression
% ----------------------------------------------------------------
% alpha = 0.05  n = 6     Zrho = 1.6807  std.dev = 0.5774
% Power of regression: 0.6046
% ----------------------------------------------------------------
%
% ...and the plot, of course.
%
%
%           Created by Giuseppe Cardillo
%           giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:
% Cardillo G. (2007) MyRegression: a simple function on LS linear
% regression with many informative outputs.
% http://www.mathworks.com/matlabcentral/fileexchange/15473

%Input error handling
if ~isvector(x)
error('X must be an array.');
end

if isvector(y)
if any(size(x)~=size(y))
error('X and Y arrays must have the same length.');
end
else
if any(length(x)~=size(y,1))
error('the length of X and the rows of Y must be equal.');
end
end

if nargin==2
verbose=1;
else
verbose=logical(verbose);
end

x=x(:);
if isvector(y)
yt=y(:); %columns vectors
else
yt=mean(y,2);
end

if ~issorted(x) %x must be monotonically crescent
z=[x yt];
z=sortrows(z,[1 2]);
x=z(:,1);
yt=z(:,2);
clear z
end

ux=unique(x);
if length(ux)~=length(x)
uy=zeros(size(ux));
for I=1:length(ux)
c=sum(x==ux(I));
if c==1
uy(I)=yt(x==ux(I));
else
uy(I)=mean(yt(x==ux(I)));
end
end
x=ux(:); yt=uy(:);
clear uy I
end
clear ux

xtmp=[x ones(length(x),1)]; %input matrix for regress function
ytmp=yt;

%regression coefficients
[p,pINT,R,Rint] = regress(ytmp,xtmp);

%check the presence of outliers
outl=find(ismember(sign(Rint),[-1 1],'rows')==0);
if ~isempty(outl)
fprintf(['These points are outliers at 95%% fiducial level: ' repmat('%i ',1,length(outl)) '\n'],outl);
reply = input('Do you want to delete outliers? Y/N [Y]: ', 's');
disp(' ')
ytmp(outl)=[]; xtmp(outl,:)=[];
[p,pINT,R] = regress(ytmp,xtmp);
end
end

xtmp(:,2)=[]; %delete column 2
%save coefficients value
m(1)=p(1); q(1)=p(2);

n=length(xtmp);
xm=mean(xtmp); xsd=std(xtmp);

%standard error of regression coefficients
%Student's critical value
if isvector(y)
cv=tinv(0.975,n-2);
else
cv=tinv(0.975,sum(size(y))-3);
end
m(2)=(pINT(3)-p(1))/cv; %slope standard error
m=[m pINT(1,:)]; %add slope 95% C.I.
q(2)=(pINT(4)-p(2))/cv; %intercept standard error
q=[q pINT(2,:)]; %add intercept 95% C.I.

%Pearson's Correlation coefficient
[rp,pr,rlo,rup]=corrcoef(xtmp,ytmp);
r(1)=rp(2); r(2)=realsqrt((1-r(1)^2)/(n-2)); r(3)=rlo(2); r(4)=rup(2);
r(5)=sign(r(1))*(abs(r(1))-((1-abs(r(1)))/(n-2)));

%Spearman's Correlation coefficient
[rx]=tiedrank(xtmp);
[ry]=tiedrank(ytmp);
d=rx-ry;
rs=1-(6*sum(d.^2)/(n^3-n));

%Total Variability
ym=polyval(p,xm);
vtot=sum((ytmp-ym).^2);

%Regression Variability
ystar=ytmp-R;
vreg=sum((ystar-ym).^2);

%Residual Variability
vres=sum(R.^2);

%regression standard error (RSE)
if isvector(y)
RSE=realsqrt(vres/(n-2));
else
y2=y; y2(outl)=[];
RSE=realsqrt((vres+sum(sum((y2-repmat(ytmp,1,size(y,2))).^2)))/(sum(size(y2))-3));
else
RSE=realsqrt((vres+sum(sum((y-repmat(yt,1,size(y,2))).^2)))/(sum(size(y))-3));
end
end

%Confidence interval at 95% of regression
sy=RSE*realsqrt(1/n+(((xtmp-xm).^2)/((n-1)*xsd^2)));
cir=[ystar+cv*sy ystar-cv*sy];

%Confidence interval at 95% of a new observation (this is the confidence
%interval that should be used when you evaluate a new y with a new observed
%x)
sy2=realsqrt(sy.^2+RSE^2);
cir2=[ystar+cv*sy2 ystar-cv*sy2];

%display results
if verbose==1
disp('REGRESSION SETTING X AS INDEPENDENT VARIABLE')
tr=repmat('-',1,80);
fprintf('\t\t\t\tSlope\n')
disp(tr)
fprintf('Value\t\tS.E.\t\t\t   95%% C.I.\n')
disp(tr)
fprintf('%0.4f\t\t%0.4f\t\t%0.4f\t\t\t%0.4f\n',m)
disp(tr)
disp(' ')
fprintf('\t\t\t\tIntercept\n')
disp(tr)
fprintf('Value\t\tS.E.\t\t\t   95%% C.I.\n')
disp(tr)
fprintf('%0.4f\t\t%0.4f\t\t%0.4f\t\t\t%0.4f\n',q)
disp(tr)
disp(' ')
fprintf('\t\t\tPearson''s Correlation Coefficient\n')
disp(tr)
disp(tr)
fprintf('%0.4f\t\t%0.4f\t\t%0.4f\t\t\t%0.4f\t\t%0.4f\n',r)
disp(tr)
disp(' ')
fprintf('Spearman''s Correlation Coefficient: %0.4f\n',rs)
disp(' ')
fprintf('\t\t\t\tOther Parameters\n')
disp(tr)
fprintf('R.S.E.\t\t\t\tVariability\n')
fprintf('Value\t\tTotal\t\tBy regression\t\tResidual\n')
disp(tr)
fprintf('%0.4f\t\t%0.4f\t%0.4f (%0.1f%%)\t%0.4f (%0.1f%%)\n',RSE,vtot,vreg,vreg/vtot*100,vres,vres/vtot*100)
disp(tr)
disp(' ')
disp('Press a key to continue'); pause; disp(' ')

%test on slope
t=abs(m(1)/m(2)); %Student's t
disp('Student''s t-test on slope=0')
disp(tr)
if pr(2)<1e-4
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4e\n',t,cv,pr(2))
else
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4f\n',t,cv,pr(2))
end
if t>cv
disp('Test passed: slope ~= 0')
else
disp('Test not passed: slope = 0')
m(1)=0;
end
try
powerStudent(t,n-1,2,0.05)
catch ME
disp(ME)
disp('I am trying to download the powerStudent function by Antonio Trujillo Ortiz from FEX')
if Status
unzip(F)
powerStudent(t,n-1,2,0.05)
end
clear F Status
end
%test on intercept
t=abs(q(1)/q(2)); %Student's t
p=(1-tcdf(t,n-2))*2; %p-value
disp('Student''s t-test on intercept=0')
disp(tr)
if p<1e-4
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4e\n',t,cv,p)
else
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4f\n',t,cv,p)
end
if t>cv
disp('Test passed: intercept ~= 0')
else
disp('Test not passed: intercept = 0')
q(1)=0;
end
powerStudent(t,n-1,2,0.05)
%Power of regression
Zrho=0.5*reallog((1+abs(r(1)))/(1-abs(r(1)))); %normalization of Pearson's correlation coefficient
sZ=realsqrt(1/(n-3)); %std.dev of Zrho
pwr=1-tcdf(1.96-Zrho/sZ,n-2)*2; %power of regression
disp('Power of regression')
disp(tr)
fprintf('alpha = 0.05  n = %d\tZrho = %0.4f\tstd.dev = %0.4f\n',n,Zrho,sZ)
fprintf('Power of regression: %0.4f\n',pwr)
disp(tr)
disp(' ')
%Test for homoschedasticity of residuals (Modified Levene's test)
xme=median(xtmp);
e1=R(xtmp<=xme); me1=median(e1); d1=abs(e1-me1); dm1=mean(d1); l1=length(e1);
e2=R(xtmp>xme);  me2=median(e2); d2=abs(e2-me2); dm2=mean(d2); l2=length(e2);
gl=(l1+l2-2); S2p=(sum((d1-dm1).^2)+sum((d2-dm2).^2))/gl;
t=abs(dm1-dm2)/realsqrt(S2p*(1/l1+1/l2)); cv=tinv(0.975,gl);
p=(1-tcdf(t,gl)); %p-value
disp('Modified Levene''s t-test for homoshedasticity of residuals')
disp(tr)
if p<1e-4
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4e\n',t,cv,p)
else
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4f\n',t,cv,p)
end
if t>cv
disp('Test not passed: Residuals are not homoschedastic')
else
disp('Test passed: Residuals are homoschedastic')
end

%plot regression
subplot(1,2,1);
if isvector(y)
plot(x,yt,'bo',xtmp,ystar,xtmp,cir,'r:',xtmp,cir2,'g:');
else
hold on
plot(x',yt,'LineStyle','none','Marker','o','MarkerEdgeColor','b')
plot(xtmp,ystar,'k',xtmp,cir,'r:',xtmp,cir2,'g:');
hold off
end
axis square
txt=sprintf('Red dotted lines: 95%% Confidence interval of regression\nGreen dotted lines: 95%% Confidence interval of new y evaluation using this regression');
title(txt)
%plot residuals
subplot(1,2,2);
xl=[min(x) max(x)];
plot(xtmp,R,'bo',xl,[0 0],'k-',xl,[RSE RSE],'g--',xl,[-RSE -RSE],'g--',...
xl,1.96.*[RSE RSE],'m--',xl,-1.96.*[RSE RSE],'m--',...
xl,2.58.*[RSE RSE],'r--',xl,-2.58.*[RSE RSE],'r--')
axis square
n=length(xtmp);
sx=sum(xtmp);
sy=sum(ytmp);
sx2=sum(xtmp.^2);
sy2=sum(ytmp.^2);
sxy=sum(xtmp.*ytmp);
mx=mean(xtmp);
my=mean(ytmp);
vx=var(xtmp);
vy=var(ytmp);
lambda=vx/vy;
devx=sx2-sx^2/n;
devy=sy2-sy^2/n;
codev=sxy-sx*sy/n;

regrym=devy/codev;
regryq=-regrym*(mx-codev/devy*my);
disp(' ')
disp('REGRESSION SETTING Y AS INDEPENDENT VARIABLE')
disp(tr)
fprintf('Slope: %0.4f\t\tIntercept: %0.4f\n',regrym,regryq)
disp(tr)

demingm=(devy-((1/lambda)*devx))/(2*codev)+realsqrt(((devy-((1/lambda)*devx))/(2*codev))^2+(1/lambda));
demingq=my-demingm*mx;
disp(' ')
disp('DEMING''S REGRESSION')
disp(tr)
fprintf('Lambda: %0.4f\t\tSlope: %0.4f\t\tIntercept: %0.4f\n',lambda,demingm,demingq)
disp(tr)
end

switch nargout
case 1
slope.value=m(1);
slope.se=m(2);
slope.lv=m(3);
slope.uv=m(4);
case 2
slope.value=m(1);
slope.se=m(2);
slope.lv=m(3);
slope.uv=m(4);
intercept.value=q(1);
intercept.se=q(2);
intercept.lv=q(3);
intercept.uv=q(4);
case 3
slope.value=m(1);
slope.se=m(2);
slope.lv=m(3);
slope.uv=m(4);
intercept.value=q(1);
intercept.se=q(2);
intercept.lv=q(3);
intercept.uv=q(4);
STAT.rse=RSE;
STAT.cv=cv;
STAT.n=n;
STAT.xm=mean(x);
STAT.ym=ym;
STAT.sse=sum((xtmp-xm).^2);
STAT.r=r;
end```