%By Dr. Rex Cheung -- Mainline, PA, USA. Fall of 2012.
%This program is a SCOPE sub-routine extends Matlab 2-sample Kolmogorov-Smirnov test
%to test "censored" survival data stored in an excel file.
%This download should include this code, a randomly generated excel data
%file for testing purposes, and the example output.
%This program expects to have at least three excel columns 1. follow up time and 2. failure
%indicator (1=failure has occured and 0=no failure has occurred and 3.
%predictor (1=present, 0=absent).
%A randomly generated excel data file example is included for testing.
%Type this at the command line to run: [h,p,k]= xlsSeerComputeSurvivalFuntionKSTest2('survivalPreditorsTestData.xls')
function [h,p,k]= xlsSeerComputeSurvivalFuntionKSTest2(xlsfilename)
%__________________________________________________________________________
%Open the excel file and read the follow up time (futime) column from xls file
uiwait(msgbox('select the column of f/u time data')); %wait for the user to interact
[a b] =xlsread(xlsfilename,-1); %[number text]=xlsread()
columnname=a(1); %extract the name of the column, interchange a and b position in the line above to capture
%the information
tempinputcol=a(1:end,1); %same as a(2:end) fro string, use a(1:end,1) for numbers,
futime=tempinputcol;
%__________________________________________________________________________
%Open the excel file and read the failure indicator (failureIndicator) column from xls file
uiwait(msgbox('select the column of failure indicator data')); %wait for the user to interact
[c d] =xlsread(xlsfilename,-1); %[number text]=xlsread()
columnname=c(1); %extract the name of the column, interchange a and b position in the line above to capture
%the information
tempinputcol2=c(1:end,1); %same as a(2:end) fro string, use a(1:end,1) for numbers,
failureIndicator=tempinputcol2;
%compute the censored and censoredfutime
censored = 1-failureIndicator;
%__________________________________________________________________________
%Open the excel file and read the predictor2 column from xls file
uiwait(msgbox('select the column of predicator1 data')); %wait for the user to interact
[e f] =xlsread(xlsfilename,-1); %[number text]=xlsread()
columnname=e(1); %extract the name of the column, interchange a and b position in the line above to capture
%the information
tempinputcol3=e(1:end,1); %same as a(2:end) fro string, use a(1:end,1) for numbers,
predictor1=tempinputcol3;
%________________________________________________________________________
%concatenate the column of futime and censored into temp, then concatenate
%the predictor column
temp= horzcat(futime, censored);
temp2= horzcat(temp, predictor1);
%separate the futime and censoring data based on presence (1) or absence (0) of a predictor
len=length(futime);
for i=1:len
% display(i);
if (temp2(i,3)==1)
data1(i,:)=temp2(i,:); %when the predictor1 is present
else
data2(i,:)=temp2(i,:); %when the predictor1 is absent
end
end
%______________________________________________________________________________________________
% Calculate and plot the empirical cdf obtained using Matlab ecdf()
% for data1
t1=data1(:,1);
censored1=data1(:,2);
[f1,x1] = ecdf(t1,'censoring',censored1);
survivalCurve1 = 1-f1; %f1 is the computed failure curve taking into account censoring, 1-f=survival curve
%Note x1 is the time corresponds to where the failure or censoring occured.
stairs(x1,survivalCurve1,'LineWidth',2);
hold on;
%for data2
t2=data2(:,1);
censored2=data2(:,2);
[f2,x2] = ecdf(t2,'censoring',censored2);
survivalCurve2 = 1-f2; %f2 is the computed failure curve taking into account censoring
%Note x2 is the time corresponds to where the failure or censoring occured.
stairs(x2,survivalCurve2,'LineWidth',2);
title('survival plots to check the output of ecdf()');
hold off;
%__________________________________________________________________________________
%kstest2() in matlab does not handle the censoring, I use the ecdf() of
%matlab to compute the survival curve taking into account the censoring,
%then call the ks2test( ) to compare the two survival curves estimated by ecdf()
[h,p,k] = kstest2(f1,f2);