%By Dr. Rex Cheung -- Mainline, PA, USA. Summer of 2013.
%This program is a SCOPE sub-routine extends Matlab 2-sample Kolmogorov-Smirnov test
%to test "censored" survival data stored in an excel file.
%This version does not use normalization hence it takes into account the
%failures at the zero time.
%This program expects to have at least three excel columns 1. follow up time and 2. failure
%indicator (1=failure has occured and 0=no failure has occurred and 3.
%predictor (1=present, 0=absent).
%A randomly generated excel data file example was included for testing with an earlier version.
%Type this at the command line to run: [h,p,k]= xlsSeerComputeSurvivalFuntionKSTest2ver2('survivalPreditorsTestData.xls')
function [h,p,k]= xlsSeerComputeSurvivalFuntionKSTest2ver2(xlsfilename)
%__________________________________________________________________________
%Open the excel file and read the follow up time (futime) column from xls file
uiwait(msgbox('select the column of f/u time data')); %wait for the user to interact
[a b] =xlsread(xlsfilename,-1); %[number text]=xlsread()
columnname=a(1); %extract the name of the column, interchange a and b position in the line above to capture
%the information
tempinputcol=a(1:end,1); %same as a(2:end) fro string, use a(1:end,1) for numbers,
futime=tempinputcol;
%__________________________________________________________________________
%Open the excel file and read the failure indicator (failureIndicator) column from xls file
uiwait(msgbox('select the column of failure indicator data')); %wait for the user to interact
[c d] =xlsread(xlsfilename,-1); %[number text]=xlsread()
columnname=c(1); %extract the name of the column, interchange a and b position in the line above to capture
%the information
tempinputcol2=c(1:end,1); %same as a(2:end) fro string, use a(1:end,1) for numbers,
failureIndicator=tempinputcol2;
%compute the censored and censoredfutime
censored = 1-failureIndicator;
%__________________________________________________________________________
%Open the excel file and read the predictor2 column from xls file
uiwait(msgbox('select the column of predicator1 data')); %wait for the user to interact
[e f] =xlsread(xlsfilename,-1); %[number text]=xlsread()
columnname=e(1); %extract the name of the column, interchange a and b position in the line above to capture
%the information
tempinputcol3=e(1:end,1); %same as a(2:end) fro string, use a(1:end,1) for numbers,
predictor1=tempinputcol3;
%________________________________________________________________________
%concatenate the column of futime and censored into temp, then concatenate
%the predictor column
temp= horzcat(futime, censored);
temp2= horzcat(temp, predictor1);
%separate the futime and censoring data based on presence (1) or absence (0) of a predictor
len=length(futime);
for i=1:len
% display(i);
if (temp2(i,3)==1)
data1(i,:)=temp2(i,:); %when the predictor1 is present
else
data2(i,:)=temp2(i,:); %when the predictor1 is absent
data2(i,3)=1; %make the predictor column = 1 temporarily, to be restored after removing the zero padding
%so that all zero rows not from zero padding will not
%be removed later
end
end
%removing the zero padding because of the looping above
data1(all(data1==0,2),:)=[];
data2(all(data2==0,2),:)=[];
%restore the third column of data2
len2=length(data2);
%make a zero column to replace the temporary 1 column (3rd) for data2
zerocolumndata2=zeros(len2,1);
%remove the third colume of data2 containing ones and restore it to the original zero
%column
data2(:,3)=[];
tempdata2=horzcat(data2,zerocolumndata2);
data2=tempdata2;
%______________________________________________________________________________________________
% Calculate and plot the empirical cdf obtained using Matlab ecdf()
% for data1
t1=data1(:,1);
censored1=data1(:,2);
[f1,x1] = ecdf(t1,'censoring',censored1);
survivalCurve1 = 1-f1; %f1 is the computed failure curve taking into account censoring, 1-f=survival curve
stairs(x1,survivalCurve1,'LineWidth',2);
hold on;
%for data2
t2=data2(:,1);
censored2=data2(:,2);
[f2,x2] = ecdf(t2,'censoring',censored2);
survivalCurve2 = 1-f2; %f2 is the computed failure curve taking into account censoring
stairs(x2,survivalCurve2,'LineWidth',3);
title('survival plots to check the output of ecdf()');
hold off;
%__________________________________________________________________________________
%kstest2() in matlab does not handle the censoring, I use the ecdf() of
%matlab to compute the survival curve taking into account the censoring,
%then call the ks2test( ) to compare the two survival curves estimated by ecdf()
[h,p,k] = kstest2(survivalCurve1,survivalCurve2);