function PassingBablok (x,y)
%% Passing Bablok regression
% Perform a Passing Bablok non parametric regression.
% INPUT: x and y are arrays in column of values obtained measuring
% the same sample with 2 different analytical methods. It means
% that if you have i.e. i = 1....N samples, x (i) is the value obtained
% measuring the sample (i) with the method A and y (i) is the value obtained
% measuring the same sample (i) with the method B.
% OUTPUT:
% 1) Slope of the regression line
% 2) 95% CI of the slope
% 3) intercept of the regression line
% 4) 95% CI of the intercept
% This function also perform a statistical test of linearity and return
% the p value, using the cumsum adapted method (Kolgomogorv-Smirnov adapted
% test) described by Passing and Bablok.
% Three Graphs are plotted:
% a. Regression Graph, scatter plot with regression line
% b. Ranked residual Graph
% c. cumsum statistic
% ------
% Written in July 2009 by Andrea Padoan, PADOVA, ITALY
% Copyright (c) 2009, ANDREA PADOAN, PADOVA, ITALY
% All rights reserved.
% Redistribution and use in source and binary forms, with or without
%modification, are permitted provided that the following conditions are met:
% * Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
% * Redistributions in binary form must reproduce the above copyright
% notice, this list of conditions and the following disclaimer in the
% documentation and/or other materials provided with the distribution.
%THIS SOFTWARE IS PROVIDED BY ANDREA PADOAN ''AS IS'' AND ANY
%EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
%WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
%DISCLAIMED. IN NO EVENT SHALL ANDRE PADOAN BE LIABLE FOR ANY
%DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
%(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
%LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
%ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
%(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
%SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
display ( ' ' );
display ('Passing Bablok regression for Matlab.');
display ('A new Biometrical Procedure for Testing the Equality of Measurements');
display ('from two Different analytical methods. J. Clin. Chem. Clin. Biochem.');
display ('Vol 21, 1983, pp 709-720');
display ('H. Passing, W. Bablok');
display ('Written in July 2009 by Andrea Padoan, PADOVA, ITALY');
display(' ');
%% Find missing values and drop them
% Find NaN and drop them
%Calculate the number of measurements (n)
n = length (x);
i= 1;
n_dropped = 0;
while i <= n
if isnan(x(i)) == 1
x(i) = [];
y(i) = [];
n_dropped = n_dropped + 1;
n = n - 1;
elseif isnan(y(i)) == 1
x(i) = [];
y(i) = [];
n_dropped = n_dropped + 1;
n = n - 1;
end
i = i + 1;
end
if n_dropped > 0
display ('Warning: found some NaNs');
display ('Number of dropped values : ');
display (n_dropped);
end
%% Estimation of beta and alpha, according to Theil regression.
%Recalculate the number of measurements (n). Some rows can be dropped.
n = length (x);
% Calculate all combinations of the n elements in y taken 2 at a time and
% put its into an array
yij = combnk(y, 2);
yi = yij (:,1);
yj = yij (:,2);
% Calculate all combinations of the n elements in x taken 2 at a time and
% put its into an array
xij = combnk(x, 2);
xi = xij(:,1);
xj = xij(:,2);
% Get number of combination array elements
ncomb = length (xi);
%N rappresent the number of combination without identical pairs of
%measurements and without cases with S = -1
N = 1;
%Calculate Sij = (yi-yj)/(xi-xj)
for nindex = 1:1:ncomb
%First rule: identical pairs of measurements with xi=xj and yi=yj do not
%contribute to estimation of beta
if ( (yi(nindex)~= yj (nindex)) || (xi (nindex) ~= xj (nindex)) )
Sij (N)= (yi(nindex)-yj(nindex))/(xi(nindex)-xj(nindex));
%Second rule: any Sij with a value of -1 is disregarded.
if Sij (N) ~= -1
N = N+1;
end
end
end
%Adjust N
N= N -1;
%Sort Sij for obtain the ranked sequence
S=sort(Sij, 'ascend');
%Calculate K , that is the number of values of Sij with Sij < -1
K= 0;
%Count K, the number of values of Sij < -1
for nindex =1:1:N
if (S(nindex) < -1)
K = K +1;
end
end
%Estimates b using K as an offset. b is estimated by the shifted median b
%of the Si using K as offset.
%If N is odd
if rem(N, 2) ~= 0
b = S ( (N+1)/2+K );
else %if N is even
b = 0.5 * ( S (N/2+K) + S (N/2+1+K) );
end
Slope = b
%% Estimate slope and intercept 95 % confidence intervals
%Define a quantile for the costruction of two side confidence interval for
%beta. 1.96 means 95 % CI
w = 1.96;
C= w * sqrt ( (n*(n-1)*(2*n+5) ) /18 ) ;
M1 = (N - C)/2;
M1 = round(M1);
M2 = N-M1+1;
%Slope confidence intervals
slope_UB = S(M2+K)
slope_LB = S(M1+K)
%Estimation of alpha (=a) and alpha confidence's intervals
a = median (y-x*b);
Intercept = a
a_LB = median (y-slope_UB*x)
a_UB = median (y-slope_LB*x)
%% Statistical test of the assumption of linearity
%Calculare for every (xi,yi) point the distance from the intercept. These
%D values are needed for rank (xi,yi)
for i =1:1:n
D (i) = ( y(i)+(1/b)*x(i)-a )/ sqrt (1+1/(b*b));
end
%Create an array with D in the first row, x in the second and y in the
%third.
Dxy = D';
Dxy (:,2) = x;
Dxy (:,3) = y;
%Sort the first column of the array. In this way you rank xy for the
%distance from the intercept.
Dxy = sortrows (Dxy, 1);
%Create a new variable with ranked xy
ranked_xy = Dxy (:,2:3);
%Dxy (:,4)=a+b*Dxy(:,2);
%Calculate the number of l and L. l denote the number of points (xi, yi)
%with yi > a+bxi and L the number of point with yi < a+bxi.
l = 0;
L = 0;
for i =1:1:n
if y (i) > a+b*x(i)
l = l + 1;
elseif y (i) < a+b*x(i)
L = L + 1;
end
end
%Give a score r to each point of ranked xy. This rules are specified on
%Passing Bablok paper (1985). r now is ranked, like ranked_xy.
for i = 1:1:n
if ranked_xy(i,2) > a+b*ranked_xy(i,1)
r (i) = sqrt(L/l);
elseif ranked_xy(i,2) < a+b*ranked_xy(i,1)
r (i) = - sqrt(l/L);
elseif ranked_xy(i,2) == a+b*ranked_xy(i,1)
r (i) = 0;
end
end
%Calculate cumulative sum of ranked r.
cumsum_r = cumsum (r);
%Calculate max absolute value of cumsum of r.
max_cumsum_r = max(abs (cumsum_r))
%% Display results of statistic test.
%Calculates limits using critical values of Kolmogorov-Smirnov test.
p1perc = 1.63*sqrt(L+l);
p5perc = 1.36*sqrt(L+l);
p10perc = 1.22*sqrt(L+l);
display('Test for linear assumption: ');
display(' H0: x and y are linear relationship');
display( ' ');
if max_cumsum_r <= p10perc
display (' p > 0.1, so x and y have a linear relationship. Do not reject H0.');
%crt_value = p10perc
end
if (max_cumsum_r >= p10perc) && (max_cumsum_r < p5perc)
display (' p > 0.05, so x and y have a linear relationship. Do not reject H0.');
%crt_value = p10perc
%crt_value = p5perc
end
if (max_cumsum_r >= p5perc) && (max_cumsum_r <= p1perc)
display (' 0.01 < p < 0.05 and Linear relationship between x and y is rejected.');
%crt_value = p5perc
%crt_value = p1perc
end
if (max_cumsum_r >= p1perc)
display (' p < 0.01 and Linear relationship between x and y is rejected.');
%crt_value = p1perc
end
%% Plot regression graph
%Plot point of (xi,yi)
figure ('Name', 'Passing Bablok Regression fit', 'NumberTitle','off');
scatter (x,y);
hold on;
%Fit the curve from lowest value of x to the largest.
xhat = sort(x);
for i = 1:1:n
yhat(i) = a+b*xhat(i);
end
%Create a curve for slope_UB (upper 95 %CI) and slope_LB(lower 95 %CI)
for i = 1:1:n
yhatLB(i) = a_LB+slope_LB*xhat(i);
yhatUB(i) = a_UB+slope_UB*xhat(i);
end
%Define max values for axis
if max(sort(x)) > max(sort(y))
axis ([0 max(x) 0 max(x)]);
else
axis ([0 max(y) 0 max(y)]);
end
%Plot the line
plot (xhat, yhat, 'LineWidth', 2, 'Color', 'b');
plot (xhat, yhatLB, 'LineWidth', 1, 'Color', 'black', 'LineStyle','--');
plot (xhat, yhatUB, 'LineWidth', 1, 'Color', 'black', 'LineStyle','--');
title ('Passing Bablok regression', 'FontSize',18, 'Color', 'b', 'FontName', 'Courier','fontweight','b');
xlabel ('method B, x values');
ylabel ('method A, y values');
if max(xhat) >= max(yhat)
line ([0 max(xhat)], [0 max(xhat)], 'Color', 'r', 'LineStyle', ':' , 'LineWidth', 1);
else
line ([0 max(yhat)], [0 max(yhat)], 'Color', 'r', 'LineStyle', ':' , 'LineWidth', 1);
end
%% Plot scatter plot of ranked residual
for i = 1:1:n
residual (i) = Dxy (i,3) - b*Dxy(i,2)-a;
end
figure('Name', 'Ranked residual plot', 'NumberTitle','off');
hold off
plot (1:n, residual, 'bo', 'MarkerSize',5);
hold on
line ([ 0 n], [0 0], 'Color', 'r', 'LineWidth', 2);
%plot (0:0.05:n, 0, '-r', 'LineWidth', 2);
title ('Residual Plot', 'FontSize',18, 'Color', 'b', 'FontName', 'Courier','fontweight','b');
ylabel ('Residuals');
xlabel ('Rank (xi,yi)');
%% Plot cumsum_r statistics
hold off;
%Add zero for first value
figure('Name', 'cumsum statistic', 'NumberTitle','off');
%Plot cumsum, added of starting 0 value.
plot(0:n, horzcat([0],cumsum_r));
hold on;
title ('Linearity of tests', 'FontSize',18, 'Color', 'b', 'FontName', 'Courier','fontweight','b');
ylabel ('cumsum');
xlabel ('Rank (xi,yi)');
line ([ 0 n], [0 0], 'Color', 'r', 'LineWidth', 2)
end