Code covered by the BSD License  

Highlights from
KMPlot

image thumbnail
from KMPlot by Giuseppe Cardillo
Plot the Kaplan-Meier estimation of the survival function

varargout=kmplot(varargin)
function varargout=kmplot(varargin)
% KMPLOT Plot the Kaplan-Meier estimation of the survival function
% Survival times are data that measure follow-up time from a defined
% starting point to the occurrence of a given event, for example the time
% from the beginning to the end of a remission period or the time from the
% diagnosis of a disease to death. Standard statistical techniques cannot
% usually be applied because the underlying distribution is rarely Normal
% and the data are often "censored". A survival time is described as
% censored when there is a follow-up time but the event has not yet
% occurred or is not known to have occurred. For example, if remission time
% is being studied and the patient is still in remission at the end of the
% study, then that patient�s remission time would be censored. If a patient
% for some reason drops out of a study before the end of the study period,
% then that patient�s follow-up time would also be considered to be
% censored. The survival function S(t) is defined as the probability of
% surviving at least to time t. The graph of S(t) against t is called the
% survival curve. The Kaplan�Meier method can be used to estimate this
% curve from the observed survival times without the assumption of an
% underlying probability distribution.
%
% Syntax: 	kmplot(x,alpha,censflag)
%      
%     Inputs:
%           X (mandatory)- Nx2 data matrix:
%                          (X:,1) = survival time of the i-th subject
%                          (X:,2) = censored flag 
%                                   (0 if not censored; 1 if censored)
%           note that if X is a vector, all the flags of the second column
%           will be set to 0 (all data are not censored).
%           ALPHA (optional) - significance level (default 0.05) 
%           CENSFLAG (optional) - Censored Plot flag (default 0). If 0
%           censored data will be plotted spreaded on the horizontal
%           segment; if 1 they will be plotted at the given time of censoring.
%     Outputs:
%           Kaplan-Meier plot
%
%      Example: (+ indicate that patient is censored)
%  
%                   ---------------------
%                   Patient     Survival
%                               time       
%                   ---------------------
%                      1        7    
%                      2        12   
%                      3        7+    
%                      4        12+  
%                      5        11+  
%                      6        8    
%                      7        9    
%                      8        6
%                      9        7+
%                     10        2    
%                   ----------------------
%    X=[7 0; 12 0; 7 1; 12 1; 11 1; 8 0; 9 0; 6 0; 7 1; 2 0];
%
%    Calling on Matlab the function: kmplot(X) the function will plot the
%    Kaplan-Meier estimation of the survival function
%
%           Created by Giuseppe Cardillo
%           giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:Curve
% Cardillo G. (2008). KMPLOT: Kaplan-Meier estimation of the survival
% function.
% http://www.mathworks.com/matlabcentral/fileexchange/22293

%Input Error handling
args=cell(varargin);
nu=numel(args);
if isempty(nu) 
    error('Warning: Data vectors are required')
elseif nu>3
    if nu>4
        error('Warning: Max two input data are required')
    end
end
default.values = {[7 0; 12 0; 7 1; 12 1; 11 1; 8 0; 9 0; 6 0; 7 1; 2 0],0.05,0,1};
default.values(1:nu) = args;
[x alpha cflag flag] = deal(default.values{:});
if ~all(isfinite(x(:))) || ~all(isnumeric(x(:)))
    error('Warning: all X values must be numeric and finite')
end
if isvector(x) 
    x(:,2)=0;
else
    if ~isequal(size(x,2),2)
        error('KMPLOT requires Nx2 matrix data.');
    end
    if ~all(x(:,2)==0 | x(:,2)==1)
        error('Warning: all X(:,2) values must be 0 or 1')
    end
end
if nu>1
    if isempty(alpha)
        alpha=0.05;
    else
        if ~isscalar(alpha) || ~isnumeric(alpha) || ~isfinite(alpha)
            error('Warning: it is required a numeric, finite and scalar ALPHA value.');
        end
        if alpha <= 0 || alpha >= 1 %check if alpha is between 0 and 1
            error('Warning: ALPHA must be comprised between 0 and 1.')
        end
    end
end
if nu==3
    if isempty(cflag)
        cflag=0;
    else
        if ~isscalar(cflag) || ~isnumeric(cflag) || ~isfinite(cflag)
            error('Warning: it is required a numeric, finite and scalar CENSFLAG value.');
        end
        if cflag~=0 && cflag~=1
            error('Warning: CENSFLAG value must be 0 or 1')
        end
    end
end    
clear args default nu
%string for LEGEND function
str1=[num2str((1-alpha)*100) '% confidence interval']; 

%sort data by survival time
x=sortrows(x,1);
%table of patients observed for each survival time
%the TABULATE function sets up this matrix:
%table1=[time count percent(on total)]
table1=[0 size(x,1) 1; tabulate(x(:,1))];
%if all observed time are integers remove not observed time added by
%TABULATE function
table1(table1(:,3)==0,:)=[];

%Table of censored data
table12=tabulate(x(x(:,2)==1));
if ~isempty(table12)
    % remove not observed time added by TABULATE function
    table12(table12(:,3)==0,:)=[];
    % setup the vector of the censored data
    [cens,loc]=ismember(table1(:,1),table12(:,1)); %find censored data
end    

%the percents stored in the the third column are unuseful;
%so, place in the third column how many subjects are still alive at the
%beginning of the i-th interval.
a1=[table1(1,2); -1.*table1(2:end,2)];
table1(:,3)=cumsum(a1); table1(2:end,3)=table1(1:end-1,3);
%number of deaths in the intervals (don't take in account the censored
%data)
if ~isempty(table12)
    table1(cens,2)=table1(cens,2)-table12(loc(cens),2);
end
%finally, delete the first row that is now useless
table1(1,:)=[];

t1=[0;table1(:,1)]; %this is the x variable (time);
%this is the y variable (survival function)
T1=[1;cumprod(1-(table1(:,2)./table1(:,3)))];
if flag %if this function was not called by LOGRANK function
    %compute the standard error of the survival function
    SE=[0;T1(2:end).*sqrt(cumsum(table1(:,2)./(table1(:,3).* ...
        (table1(:,3)-table1(:,2)))))];
end

%censored data plotting
if ~isempty(table12) 
    %if there are censored data after max(t1), add a new cell into the t1,
    %T1 and SE arrays
    if table12(end,1)>=t1(end,1)
        t1(end+1,1)=table12(end,1)+1;
        T1(end+1,1)=T1(end,1);
        if flag %if this function was not called by LOGRANK function
            SE(end+1,1)=SE(end,1);
        end
    end
    if ~cflag
        %vectors preallocation
        xcg=zeros(1,sum(table12(:,2))); ycg=xcg; J=1;
        %for each censored data into the i-th time interval...
        for I=1:size(table12,1)
            %compute how many position into the array they must occupy
            JJ=J+table12(I,2)-1;
            %find the correct time interval in which censored data must be
            %placed
            A=find(t1<=table12(I,1),1,'last');
            B=find(t1>table12(I,1),1,'first');
            %equally divide this interval
            int=linspace(table12(I,1),t1(B,1),table12(I,2)+2);
            %put all in the vectors of the plotting variables
            xcg(J:JJ)=int(2:end-1);
            ycg(J:JJ)=T1(A);
            %update the counter
            J=JJ+1;
        end
    else
        xcg=table1(table1(:,2)==0,1);
        ycg=T1(table1(:,2)==0);
    end
else
    if ~flag %if this function was called by LOGRANK function
        xcg=[]; ycg=[];
    end
end
%compute the hazard rate
c1=T1.*numel(x);
c2=-(diff(log(c1(1:end-1)))./diff(t1(1:end-1)));
lambda=mean(c2(c2~=0));

if flag %if this function was not called by LOGRANK function
    %compute the (1-alpha)*100% confidence interval curves
    cv=realsqrt(2)*erfcinv(alpha); %critical value
    %lower curve (remember that: the lower curve values can't be negative)
    lowc=max(0,T1-SE.*cv);
    %if the lower curve reaches the 0 earlier than survival function, trim the
    %data.
    if isequal(lowc(end-1:end),[0; 0])
        lowcend=find(lowc==0,1,'first');
    else
        lowcend=length(lowc);
    end
    %upper curve (remember that the upper curve values can't be >1)
    upc=min(1,T1+SE.*cv);
    %eventually, correct the data.
    if isequal(upc(end),1) 
        cupend=find(upc<1,1,'last');
        upc(cupend:end)=upc(cupend);
    end

    %compute the median survival time (if exist...)
    if isempty(T1(T1==0.5)) %if there is not a point where T=0.5...
        I=find(T1>0.5,1,'last'); %find the first point where T>0.5
        J=find(T1<0.5,1,'first'); %find the first point where T<0.5
        if isempty(J) %if all points are >0.5...
            mt=0; %...there is no median time
        else 
            %compute the median time by linear interpolation.
            p=polyfit([t1(I) t1(J)],[T1(I) T1(J)],1);
            mt=(0.5-p(2))/p(1);
            str2=['Median time ' num2str(mt)]; %string for LEGEND function
        end
    else
        mt=t1(T1==0.5);
        str2=['Median time ' num2str(mt)]; %string for LEGEND function
    end

    %plot all the data
    clf
    hold on
    S2=stairs(t1(1:lowcend),lowc(1:lowcend),'g--'); %lower confidence interval curve
    stairs(t1,upc,'g--'); %upper confidence interval curve
    S1=stairs(t1,T1,'b'); %Kaplan-Meier survival function
    if mt>0 %if exist a median time...
        S3=plot([0 mt mt],[0.5 0.5 0],'k:'); 
    end
    if ~isempty(table12) %if there are censored data...
        S4=plot(xcg,ycg,'r+');
    else
        S4=[];
    end
    hold off

    %set the axis properly
    xmax=max(t1)+1;
    axis([0 xmax 0 1.2]);
    axis square
    %add labels and legend
    txt=sprintf('Kaplan-Meier estimate of survival function (hazard rate: %0.4f)\n',lambda);
    title(txt,'FontName','Arial','FontSize',14,'FontWeight','Bold'); 
    ylabel('Estimated survival function','FontName','Arial','FontSize',14,'FontWeight','Bold'); 
    xlabel('Time','FontName','Arial','FontSize',14,'FontWeight','Bold'); 
    if mt
        if isempty(S4)
            legend([S1 S2 S3],'Data',str1,str2)
        else
            legend([S1 S2 S3 S4],'Data',str1,str2,'Censored')
        end
    else
        if isempty(S4)
            legend([S1 S2],'Data',str1)
        else
            legend([S1 S2 S4],'Data',str1,'Censored')
        end
    end
    disp('HAZARD RATE IS AN EXPERIMENTAL FUNCTION!!!!')
end
if nargout
    varargout(1)={table1};
    varargout(2)={table12};
    varargout(3)={t1};
    varargout(4)={T1};
    varargout(5)={xcg};
    varargout(6)={ycg};
    varargout(7)={lambda};
end

Contact us at files@mathworks.com