function [slope,intercept,STAT]=myregr(x,y,verbose)
%MYREGR: Perform a least-squares linear regression.
%This function computes a least-square linear regression suppling several
%output information.
%
% Syntax: myregr(x,y)
%
% Inputs:
% X - Array of the independent variable
% Y - Dependent variable. If Y is a matrix, the i-th Y row is a
% repeated measure of i-th X point. The mean value will be used
% verbose - Flag to display all information (default=1)
% Outputs:
% - Slope with standard error an 95% C.I.
% - Intercept with standard error an 95% C.I.
% - Pearson's Correlation coefficient with 95% C.I. and its
% adjusted form (depending on the elements of X and Y arrays)
% - Spearman's Correlation coefficient
% - Regression Standard Error
% - Total Variability
% - Variability due to regression
% - Residual Variability
% - Student's t-Test on Slope (to check if slope=0)
% - Student's t-Test on Intercept (to check if intercept=0)
% - Power of the regression
% - Modified Levene's test for homoschedasticity of residuals
% - a plot with:
% o Data points
% o Least squares regression line
% o Red dotted lines: 95% Confidence interval of regression
% o Green dotted lines: 95% Confidence interval of new y
% evaluation using this regression.
% - Residuals plot
%
% [Slope]=myregr(...) returns a structure of slope containing value, standard
% error, lower and upper bounds 95% C.I.
%
% [Slope,Intercept]=myregr(...) returns a structure of slope and intercept
% containing value, standard error, lower and upper bounds 95% C.I.
%
% Example:
% x = [1.0 2.3 3.1 4.8 5.6 6.3];
% y = [2.6 2.8 3.1 4.7 4.1 5.3];
%
% Calling on Matlab the function:
% myregr(x,y)
%
% Answer is:
%
% Slope
% -----------------------------------------------------------
% Value S.E. 95% C.I.
% -----------------------------------------------------------
% 0.50107 0.09667 0.23267 0.76947
% -----------------------------------------------------------
%
% Intercept
% -----------------------------------------------------------
% Value S.E. 95% C.I.
% -----------------------------------------------------------
% 1.83755 0.41390 0.68838 2.98673
% -----------------------------------------------------------
%
% Pearson's Correlation Coefficient
% -----------------------------------------------------------
% Value 95% C.I. ADJ
% -----------------------------------------------------------
% 0.93296 0.49988 0.99281 0.91620
% -----------------------------------------------------------
% Spearman's Correlation Coefficient: 0.9429
%
% Other Parameters
% ------------------------------------------------------------------------
% R.S.E. Variability
% Value Total by regression Residual
% ------------------------------------------------------------------------
% 0.44358 6.07333 5.28627 (87.0407%) 0.78706 (12.9593%)
% ------------------------------------------------------------------------
%
% Student's t-test on slope=0
% ----------------------------------------------------------------
% t = 5.1832 Critical Value = 2.7764 p = 0.0066
% Test passed: slope ~= 0
% ----------------------------------------------------------------
%
% Student's t-test on intercept=0
% ----------------------------------------------------------------
% t = 4.4396 Critical Value = 2.7764 p = 0.0113
% Test passed: intercept ~= 0
% ----------------------------------------------------------------
%
% Power of regression
% ----------------------------------------------------------------
% alpha = 0.05 n = 6 Zrho = 1.6807 std.dev = 0.5774
% Power of regression: 0.6046
% ----------------------------------------------------------------
%
% ...and the plot, of course.
%
% SEE also myregrinv, myregrcomp
%
% Created by Giuseppe Cardillo
% giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:
% Cardillo G. (2007) MyRegression: a simple function on LS linear
% regression with many informative outputs.
% http://www.mathworks.com/matlabcentral/fileexchange/15473
%Input error handling
if ~isvector(x)
error('X must be an array.');
end
if isvector(y)
if any(size(x)~=size(y))
error('X and Y arrays must have the same length.');
end
else
if any(length(x)~=size(y,1))
error('the length of X and the rows of Y must be equal.');
end
end
if nargin==2
verbose=1;
else
verbose=logical(verbose);
end
x=x(:);
if isvector(y)
yt=y(:); %columns vectors
else
yt=mean(y,2);
end
if ~issorted(x) %x must be monotonically crescent
z=[x yt];
z=sortrows(z,[1 2]);
x=z(:,1);
yt=z(:,2);
clear z
end
ux=unique(x);
if length(ux)~=length(x)
uy=zeros(size(ux));
for I=1:length(ux)
c=sum(x==ux(I));
if c==1
uy(I)=yt(x==ux(I));
else
uy(I)=mean(yt(x==ux(I)));
end
end
x=ux(:); yt=uy(:);
clear uy I
end
clear ux
xtmp=[x ones(length(x),1)]; %input matrix for regress function
ytmp=yt;
%regression coefficients
[p,pINT,R,Rint] = regress(ytmp,xtmp);
%check the presence of outliers
outl=find(ismember(sign(Rint),[-1 1],'rows')==0);
if ~isempty(outl)
fprintf(['These points are outliers at 95%% fiducial level: ' repmat('%i ',1,length(outl)) '\n'],outl);
reply = input('Do you want to delete outliers? Y/N [Y]: ', 's');
disp(' ')
if isempty(reply) || upper(reply)=='Y'
ytmp(outl)=[]; xtmp(outl,:)=[];
[p,pINT,R] = regress(ytmp,xtmp);
end
end
xtmp(:,2)=[]; %delete column 2
%save coefficients value
m(1)=p(1); q(1)=p(2);
n=length(xtmp);
xm=mean(xtmp); xsd=std(xtmp);
%standard error of regression coefficients
%Student's critical value
if isvector(y)
cv=tinv(0.975,n-2);
else
cv=tinv(0.975,sum(size(y))-3);
end
m(2)=(pINT(3)-p(1))/cv; %slope standard error
m=[m pINT(1,:)]; %add slope 95% C.I.
q(2)=(pINT(4)-p(2))/cv; %intercept standard error
q=[q pINT(2,:)]; %add intercept 95% C.I.
%Pearson's Correlation coefficient
[rp,pr,rlo,rup]=corrcoef(xtmp,ytmp);
r(1)=rp(2); r(2)=realsqrt((1-r(1)^2)/(n-2)); r(3)=rlo(2); r(4)=rup(2);
%Adjusted Pearson's Correlation coefficient
r(5)=sign(r(1))*(abs(r(1))-((1-abs(r(1)))/(n-2)));
%Spearman's Correlation coefficient
[rx]=tiedrank(xtmp);
[ry]=tiedrank(ytmp);
d=rx-ry;
rs=1-(6*sum(d.^2)/(n^3-n));
%Total Variability
ym=polyval(p,xm);
vtot=sum((ytmp-ym).^2);
%Regression Variability
ystar=ytmp-R;
vreg=sum((ystar-ym).^2);
%Residual Variability
vres=sum(R.^2);
%regression standard error (RSE)
if isvector(y)
RSE=realsqrt(vres/(n-2));
else
if ~isempty(outl) && (isempty(reply) || upper(reply)=='Y')
y2=y; y2(outl)=[];
RSE=realsqrt((vres+sum(sum((y2-repmat(ytmp,1,size(y,2))).^2)))/(sum(size(y2))-3));
else
RSE=realsqrt((vres+sum(sum((y-repmat(yt,1,size(y,2))).^2)))/(sum(size(y))-3));
end
end
%Confidence interval at 95% of regression
sy=RSE*realsqrt(1/n+(((xtmp-xm).^2)/((n-1)*xsd^2)));
cir=[ystar+cv*sy ystar-cv*sy];
%Confidence interval at 95% of a new observation (this is the confidence
%interval that should be used when you evaluate a new y with a new observed
%x)
sy2=realsqrt(sy.^2+RSE^2);
cir2=[ystar+cv*sy2 ystar-cv*sy2];
%display results
if verbose==1
disp('REGRESSION SETTING X AS INDEPENDENT VARIABLE')
tr=repmat('-',1,80);
fprintf('\t\t\t\tSlope\n')
disp(tr)
fprintf('Value\t\tS.E.\t\t\t 95%% C.I.\n')
disp(tr)
fprintf('%0.4f\t\t%0.4f\t\t%0.4f\t\t\t%0.4f\n',m)
disp(tr)
disp(' ')
fprintf('\t\t\t\tIntercept\n')
disp(tr)
fprintf('Value\t\tS.E.\t\t\t 95%% C.I.\n')
disp(tr)
fprintf('%0.4f\t\t%0.4f\t\t%0.4f\t\t\t%0.4f\n',q)
disp(tr)
disp(' ')
fprintf('\t\t\tPearson''s Correlation Coefficient\n')
disp(tr)
fprintf('Value\t\tS.E.\t\t\t 95%% C.I.\t\t\tADJ\n')
disp(tr)
fprintf('%0.4f\t\t%0.4f\t\t%0.4f\t\t\t%0.4f\t\t%0.4f\n',r)
disp(tr)
disp(' ')
fprintf('Spearman''s Correlation Coefficient: %0.4f\n',rs)
disp(' ')
fprintf('\t\t\t\tOther Parameters\n')
disp(tr)
fprintf('R.S.E.\t\t\t\tVariability\n')
fprintf('Value\t\tTotal\t\tBy regression\t\tResidual\n')
disp(tr)
fprintf('%0.4f\t\t%0.4f\t%0.4f (%0.1f%%)\t%0.4f (%0.1f%%)\n',RSE,vtot,vreg,vreg/vtot*100,vres,vres/vtot*100)
disp(tr)
disp(' ')
disp('Press a key to continue'); pause; disp(' ')
%test on slope
t=abs(m(1)/m(2)); %Student's t
disp('Student''s t-test on slope=0')
disp(tr)
if pr(2)<1e-4
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4e\n',t,cv,pr(2))
else
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4f\n',t,cv,pr(2))
end
if t>cv
disp('Test passed: slope ~= 0')
else
disp('Test not passed: slope = 0')
m(1)=0;
end
try
powerStudent(t,n-1,2,0.05)
catch ME
disp(ME)
disp('I am trying to download the powerStudent function by Antonio Trujillo Ortiz from FEX')
[F,Status]=urlwrite('http://www.mathworks.com/matlabcentral/fileexchange/2907-powerstudent?controller=file_infos&download=true','powerStudent.zip');
if Status
unzip(F)
powerStudent(t,n-1,2,0.05)
end
clear F Status
end
%test on intercept
t=abs(q(1)/q(2)); %Student's t
p=(1-tcdf(t,n-2))*2; %p-value
disp('Student''s t-test on intercept=0')
disp(tr)
if p<1e-4
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4e\n',t,cv,p)
else
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4f\n',t,cv,p)
end
if t>cv
disp('Test passed: intercept ~= 0')
else
disp('Test not passed: intercept = 0')
q(1)=0;
end
powerStudent(t,n-1,2,0.05)
%Power of regression
Zrho=0.5*reallog((1+abs(r(1)))/(1-abs(r(1)))); %normalization of Pearson's correlation coefficient
sZ=realsqrt(1/(n-3)); %std.dev of Zrho
pwr=1-tcdf(1.96-Zrho/sZ,n-2)*2; %power of regression
disp('Power of regression')
disp(tr)
fprintf('alpha = 0.05 n = %d\tZrho = %0.4f\tstd.dev = %0.4f\n',n,Zrho,sZ)
fprintf('Power of regression: %0.4f\n',pwr)
disp(tr)
disp(' ')
%Test for homoschedasticity of residuals (Modified Levene's test)
xme=median(xtmp);
e1=R(xtmp<=xme); me1=median(e1); d1=abs(e1-me1); dm1=mean(d1); l1=length(e1);
e2=R(xtmp>xme); me2=median(e2); d2=abs(e2-me2); dm2=mean(d2); l2=length(e2);
gl=(l1+l2-2); S2p=(sum((d1-dm1).^2)+sum((d2-dm2).^2))/gl;
t=abs(dm1-dm2)/realsqrt(S2p*(1/l1+1/l2)); cv=tinv(0.975,gl);
p=(1-tcdf(t,gl)); %p-value
disp('Modified Levene''s t-test for homoshedasticity of residuals')
disp(tr)
if p<1e-4
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4e\n',t,cv,p)
else
fprintf('t = %0.4f\tCritical Value = %0.4f\tp = %0.4f\n',t,cv,p)
end
if t>cv
disp('Test not passed: Residuals are not homoschedastic')
else
disp('Test passed: Residuals are homoschedastic')
end
%plot regression
subplot(1,2,1);
if isvector(y)
plot(x,yt,'bo',xtmp,ystar,xtmp,cir,'r:',xtmp,cir2,'g:');
else
hold on
plot(x',yt,'LineStyle','none','Marker','o','MarkerEdgeColor','b')
plot(xtmp,ystar,'k',xtmp,cir,'r:',xtmp,cir2,'g:');
hold off
end
axis square
txt=sprintf('Red dotted lines: 95%% Confidence interval of regression\nGreen dotted lines: 95%% Confidence interval of new y evaluation using this regression');
title(txt)
%plot residuals
subplot(1,2,2);
xl=[min(x) max(x)];
plot(xtmp,R,'bo',xl,[0 0],'k-',xl,[RSE RSE],'g--',xl,[-RSE -RSE],'g--',...
xl,1.96.*[RSE RSE],'m--',xl,-1.96.*[RSE RSE],'m--',...
xl,2.58.*[RSE RSE],'r--',xl,-2.58.*[RSE RSE],'r--')
axis square
n=length(xtmp);
sx=sum(xtmp);
sy=sum(ytmp);
sx2=sum(xtmp.^2);
sy2=sum(ytmp.^2);
sxy=sum(xtmp.*ytmp);
mx=mean(xtmp);
my=mean(ytmp);
vx=var(xtmp);
vy=var(ytmp);
lambda=vx/vy;
devx=sx2-sx^2/n;
devy=sy2-sy^2/n;
codev=sxy-sx*sy/n;
regrym=devy/codev;
regryq=-regrym*(mx-codev/devy*my);
disp(' ')
disp('REGRESSION SETTING Y AS INDEPENDENT VARIABLE')
disp(tr)
fprintf('Slope: %0.4f\t\tIntercept: %0.4f\n',regrym,regryq)
disp(tr)
demingm=(devy-((1/lambda)*devx))/(2*codev)+realsqrt(((devy-((1/lambda)*devx))/(2*codev))^2+(1/lambda));
demingq=my-demingm*mx;
disp(' ')
disp('DEMING''S REGRESSION')
disp(tr)
fprintf('Lambda: %0.4f\t\tSlope: %0.4f\t\tIntercept: %0.4f\n',lambda,demingm,demingq)
disp(tr)
end
switch nargout
case 1
slope.value=m(1);
slope.se=m(2);
slope.lv=m(3);
slope.uv=m(4);
case 2
slope.value=m(1);
slope.se=m(2);
slope.lv=m(3);
slope.uv=m(4);
intercept.value=q(1);
intercept.se=q(2);
intercept.lv=q(3);
intercept.uv=q(4);
case 3
slope.value=m(1);
slope.se=m(2);
slope.lv=m(3);
slope.uv=m(4);
intercept.value=q(1);
intercept.se=q(2);
intercept.lv=q(3);
intercept.uv=q(4);
STAT.rse=RSE;
STAT.cv=cv;
STAT.n=n;
STAT.xm=mean(x);
STAT.ym=ym;
STAT.sse=sum((xtmp-xm).^2);
STAT.r=r;
end