Code covered by the BSD License  

Highlights from
ROCout=roc(varargin)

image thumbnail

ROCout=roc(varargin)

by

 

16 May 2008 (Updated )

compute a ROC curve

ROCout=roc(varargin)
function ROCout=roc(varargin)
% ROC - Receiver Operating Characteristics.
% The ROC graphs are a useful tecnique for organizing classifiers and
% visualizing their performance. ROC graphs are commonly used in medical
% decision making.
%
% Syntax: ROCout=roc(x,thresholds,alpha,verbose)
%
% Input: x - This is a Nx2 data matrix. The first column is the column of the data value;
%            The second column is the column of the tag: unhealthy (1) and
%            healthy (0).
%        Thresholds - If you want to use all unique values in x(:,1) 
%            then set this variable to 0 or leave it empty; 
%            else set how many unique values you want to use (min=3);
%        alpha - significance level (default 0.05)
%        verbose - if you want to see all reports and plots (0-no; 1-yes by
%        default);
%
% Output: if verbose = 1
%         the ROCplots, the sensitivity and specificity at thresholds; the Area
%         under the curve with Standard error and Confidence interval and
%         comment.
%         if ROCout is declared, you will have a struct:
%         ROCout.AUC=Area under the curve (AUC);
%         ROCout.SE=Standard error of the area;
%         ROCout.ci=Confidence interval of the AUC
%         ROCout.co=Cut off points
%         ROCdata.xr and ROCdata.yr points for ROC plot
%
% USING roc WITHOUT ANY DATA, IT WILL RUN A DEMO
%
%           Created by Giuseppe Cardillo
%           giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:
% Cardillo G. (2008) ROC curve: compute a Receiver Operating Characteristics curve.
% http://www.mathworks.com/matlabcentral/fileexchange/19950

%Input Error handling
args=cell(varargin);
nu=numel(args);
if isempty(nu)
    error('Warning: almost the data matrix is required')
elseif nu>4
    error('Warning: Max four input data are required')
end
default.values = {[165 1;140 1;154 1;139 1;134 1;154 1;120 1;133 1;150 1;...
146 1;140 1;114 1;128 1;131 1;116 1;128 1;122 1;129 1;145 1;117 1;140 1;...
149 1;116 1;147 1;125 1;149 1;129 1;157 1;144 1;123 1;107 1;129 1;152 1;...
164 1;134 1;120 1;148 1;151 1;149 1;138 1;159 1;169 1;137 1;151 1;141 1;...
145 1;135 1;135 1;153 1;125 1;159 1;148 1;142 1;130 1;111 1;140 1;136 1;...
142 1;139 1;137 1;187 1;154 1;151 1;149 1;148 1;157 1;159 1;143 1;124 1;...
141 1;114 1;136 1;110 1;129 1;145 1;132 1;125 1;149 1;146 1;138 1;151 1;...
147 1;154 1;147 1;158 1;156 1;156 1;128 1;151 1;138 1;193 1;131 1;127 1;...
129 1;120 1;159 1;147 1;159 1;156 1;143 1;149 1;160 1;126 1;136 1;150 1;...
136 1;151 1;140 1;145 1;140 1;134 1;140 1;138 1;144 1;140 1;140 1;159 0;...
136 0;149 0;156 0;191 0;169 0;194 0;182 0;163 0;152 0;145 0;176 0;122 0;...
141 0;172 0;162 0;165 0;184 0;239 0;178 0;178 0;164 0;185 0;154 0;164 0;...
140 0;207 0;214 0;165 0;183 0;218 0;142 0;161 0;168 0;181 0;162 0;166 0;...
150 0;205 0;163 0;166 0;176 0;],0,0.05,1};
default.values(1:nu) = args;
[x threshold alpha verbose] = deal(default.values{:});
if isvector(x)
    error('Warning: X must be a matrix')
end
if ~all(isfinite(x(:))) || ~all(isnumeric(x(:)))
    error('Warning: all X values must be numeric and finite')
end
x(:,2)=logical(x(:,2));
if all(x(:,2)==0)
    error('Warning: there are only healthy subjects!')
end
if all(x(:,2)==1)
    error('Warning: there are only unhealthy subjects!')
end
if nu>=2
    if isempty(threshold)
        threshold=0;
    else
        if ~isscalar(threshold) || ~isnumeric(threshold) || ~isfinite(threshold)
            error('Warning: it is required a numeric, finite and scalar THRESHOLD value.');
        end
        if threshold ~= 0 && threshold <3
            error('Warning: Threshold must be 0 if you want to use all unique points or >=2.')
        end
    end
    if nu>=3
        if isempty(alpha)
            alpha=0.05;
        else
            if ~isscalar(alpha) || ~isnumeric(alpha) || ~isfinite(alpha)
                error('Warning: it is required a numeric, finite and scalar ALPHA value.');
            end
            if alpha <= 0 || alpha >= 1 %check if alpha is between 0 and 1
                error('Warning: ALPHA must be comprised between 0 and 1.')
            end
        end
    end
    if nu==4
        verbose=logical(verbose);
    end
end
clear args default nu

tr=repmat('-',1,100);
lu=length(x(x(:,2)==1)); %number of unhealthy subjects
lh=length(x(x(:,2)==0)); %number of healthy subjects
z=sortrows(x,1);
if threshold==0
    labels=unique(z(:,1));%find unique values in z
else
    K=linspace(0,1,threshold+1); K(1)=[];
    labels=quantile(unique(z(:,1)),K)';
end
labels(end+1)=labels(end)+1;
ll=length(labels); %count unique value
a=zeros(ll,2); b=a; c=zeros(ll,1);%array preallocation
ubar=mean(x(x(:,2)==1),1); %unhealthy mean value
hbar=mean(x(x(:,2)==0),1); %healthy mean value
for K=1:ll
    if hbar<ubar
        TP=length(x(x(:,2)==1 & x(:,1)>labels(K)));
        FP=length(x(x(:,2)==0 & x(:,1)>labels(K)));
        FN=length(x(x(:,2)==1 & x(:,1)<=labels(K)));
        TN=length(x(x(:,2)==0 & x(:,1)<=labels(K)));
    else
        TP=length(x(x(:,2)==1 & x(:,1)<labels(K)));
        FP=length(x(x(:,2)==0 & x(:,1)<labels(K)));
        FN=length(x(x(:,2)==1 & x(:,1)>=labels(K)));
        TN=length(x(x(:,2)==0 & x(:,1)>=labels(K)));
    end
    M=[TP FP;FN TN];
    a(K,:)=diag(M)'./sum(M); %Sensitivity and Specificity
    b(K,:)=[a(K,1)/(1-a(K,2)) (1-a(K,1))/a(K,2)]; %Positive and Negative likelihood ratio
    c(K)=trace(M)/sum(M(:)); %Efficiency
end
b(isnan(b))=Inf;

xroc=1-a(:,2); yroc=a(:,1); %ROC points
if hbar>ubar
    xroc=flipud(xroc); yroc=flipud(yroc); %ROC points
end

st=[1 mean(xroc) 1]; L=[0 0 0]; U=[Inf 1 Inf];
fo_ = fitoptions('method','NonlinearLeastSquares','Lower',L,'Upper',U,'Startpoint',st);
ft_ = fittype('1-1/((1+(x/C)^B)^E)',...
     'dependent',{'y'},'independent',{'x'},...
     'coefficients',{'B', 'C', 'E'});
cfit = fit(xroc,yroc,ft_,fo_);


xfit=linspace(0,1,500);
yfit=feval(cfit,xfit);
Area=trapz(xfit,yfit); %estimate the area under the curve
%standard error of area
Area2=Area^2; Q1=Area/(2-Area); Q2=2*Area2/(1+Area);
V=(Area*(1-Area)+(lu-1)*(Q1-Area2)+(lh-1)*(Q2-Area2))/(lu*lh);
Serror=realsqrt(V);
%confidence interval
ci=Area+[-1 1].*(realsqrt(2)*erfcinv(alpha)*Serror);
if ci(1)<0; ci(1)=0; end
if ci(2)>1; ci(2)=1; end
m=zeros(1,4); 
%z-test
SAUC=(Area-0.5)/Serror; %standardized area
p=1-0.5*erfc(-SAUC/realsqrt(2)); %p-value

if verbose
    %Performance of the classifier
    if Area==1
        str='Perfect test';
    elseif Area>=0.90 && Area<1
        str='Excellent test';
    elseif Area>=0.80 && Area<0.90
        str='Good test';
    elseif Area>=0.70 && Area<0.80
        str='Fair test';
    elseif Area>=0.60 && Area<0.70
        str='Poor test';
    elseif Area>=0.50 && Area<0.60
        str='Fail test';
    else
        str='Failed test - less than chance';
    end
    %display results
    disp('ROC CURVE ANALYSIS')
    disp(' ')
    disp(tr)
    str2=['AUC\t\t\tS.E.\t\t\t\t' num2str((1-alpha)*100) '%% C.I.\t\t\tComment\n'];
    fprintf(str2)
    disp(tr)
    fprintf('%0.5f\t\t\t%0.5f\t\t\t%0.5f\t\t%0.5f\t\t\t%s\n',Area,Serror,ci,str)
    disp(tr)
    fprintf('Standardized AUC\t\t1-tail p-value\n')
    if p<1e-4
        fprintf('%0.4f\t\t\t\t%0.4e',SAUC,p)
    else
        fprintf('%0.4f\t\t\t\t%0.4f',SAUC,p)
    end
    if p<=alpha
        fprintf('\t\tThe area is statistically greater than 0.5\n')
    else
        fprintf('\t\tThe area is not statistically greater than 0.5\n')
    end
    disp(' ')
    %display graph
    H=figure;
    set(H,'Position',[4 402 560 420])
    hold on
    plot([0 1],[0 1],'k');
    plot(xfit,yfit,'marker','none','linestyle','-','color','r','linewidth',2);
    H1=plot(xroc,yroc,'bo');
    set(H1,'markersize',6,'markeredgecolor','b','markerfacecolor','b')
    hold off
    xlabel('False positive rate (1-Specificity)')
    ylabel('True positive rate (Sensitivity)')
    title(sprintf('ROC curve (AUC=%0.4f)',Area))
    axis square
end

if p<=alpha
    ButtonName = questdlg('Do you want to input the true prevalence?', 'Prevalence Question', 'Yes', 'No', 'Yes');
    if strcmp(ButtonName,'Yes')
        ButtonName = questdlg('Do you want to input the true prevalence as:', 'Prevalence Question', 'Ratio', 'Probability', 'Ratio');
        switch ButtonName
            case 'Ratio'
                prompt={'Enter the Numerator or the prevalence ratio:','Enter the denominator or the prevalence ratio:'};
                name='Input for Ratio prevalence';
                Ratio=str2double(inputdlg(prompt,name));
                POD=Ratio(1)/diff(Ratio); %prior odds
            case 'Probability'
                prompt={'Enter the prevalence probability comprised between 0 and 1:'};
                name='Input for prevalence';
                pr=str2double(inputdlg(prompt,name));
                POD=pr/(1-pr); %prior odds
        end
        d=[1./(1+1./(b(:,1).*POD)) 1./(1+(b(:,2).*POD))];
        d((a(:,1)==0 & a(:,2)==1),1)=NaN;
        d((a(:,1)==1 & a(:,2)==0),2)=NaN;
        table=[labels'; a(:,1)'; a(:,2)';c';b(:,1)'; b(:,2)';d(:,1)'.*100; d(:,2)'.*100;]';
        if verbose
            disp('ROC CURVE DATA')
            disp(tr)
            fprintf('Cut-off \tSensitivity\tSpecificity\tEfficiency\tPLR\t\tNLR\tPos.Pred.\tNeg.Pred.\n')
            fprintf('%0.2f\t\t%0.4f\t\t%0.4f\t\t%0.4f\t\t%0.4f\t\t%0.4f\t\t%0.2f\t\t%0.2f\n',table')
            disp(tr)
            disp(' ')
        end
    else
        table=[labels'; a(:,1)'; a(:,2)';c';b(:,1)'; b(:,2)']';
        if verbose
            disp('ROC CURVE DATA')
            disp(tr)
            fprintf('Cut-off \tSensitivity\tSpecificity\tEfficiency\tPLR\t\tNLR\n')
            fprintf('%0.2f\t\t%0.4f\t\t%0.4f\t\t%0.4f\t\t%0.4f\t\t%0.4f\n',table')
            disp(tr)
            disp(' ')
        end
    end
    CSe=table(:,1)==min(table(table(:,2)==max(table(:,2)))); %Max sensitivity cut-off
    CSp=table(:,1)==min(table(table(:,3)==max(table(:,3)))); %Max specificity cut-off
    CEff=table(:,1)==min(table(table(:,4)==max(table(:,4)))); %Max efficiency cut-off
    d=realsqrt(xroc.^2+(1-yroc).^2); %apply the Pitagora's theorem
    CE=table(d==min(d),1); %Cost-effective cut-off
    xg=linspace(0,max(table(:,1)),500);
    st=[1 mean(table(:,1)) 1]; U=[Inf max(table(:,1)) Inf];
    fo_ = fitoptions('method','NonlinearLeastSquares','Lower',L,'Upper',U,'Startpoint',st);
    fitSe = fit(table(:,1),table(:,2),ft_,fo_);
    st=[-1 mean(table(:,1)) 1]; L=[-Inf 0 0]; U=[0 max(table(:,1)) Inf];
    fo_ = fitoptions('method','NonlinearLeastSquares','Lower',L,'Upper',U,'Startpoint',st);
    fitSp=fit(table(:,1),table(:,3),ft_,fo_);
    st=[min(table(:,4)) 1 mean(table(:,1)) max(table(:,4)) 1]; L=[0 0 0 0 0]; U=[1 Inf max(table(:,1)) 1 Inf];
    ft_ = fittype('D+(A-D)/((1+(x/C)^B)^E)','dependent',{'y'},'independent',{'x'},'coefficients',{'A', 'B', 'C', 'D', 'E'});
    fo_ = fitoptions('method','NonlinearLeastSquares','Lower',L,'Upper',U,'Startpoint',st);
    fitEff=fit(table(:,1),table(:,4),ft_,fo_);
    if verbose
        H2=figure;
        set(H2,'Position',[570 402 868 420])
        hold on
        HSE = plot(xg,feval(fitSe,xg),'marker','none','linestyle','-','color','r','linewidth',2);
        HCSe=plot([table(CSe,1) table(CSe,1)],[0 1],'marker','none','linestyle','--','color','r','linewidth',2);
        HSP = plot(xg,feval(fitSp,xg),'marker','none','linestyle','-','color','g','linewidth',2);
        HCSp=plot([table(CSp,1) table(CSp,1)],[0 1],'marker','none','linestyle','--','color','g','linewidth',2);
        HEFF = plot(xg,feval(fitEff,xg),'marker','none','linestyle','-','color','b','linewidth',2);
        HCEff=plot([table(CEff,1) table(CEff,1)],[0 1],'marker','none','linestyle','--','color','b','linewidth',2);
        HCO=plot([CE CE],[0 1],'marker','none','linestyle','--','color','m','linewidth',2);
        hold off
        legend([HSE HCSe HSP HCSp HCO HEFF HCEff],...
            'Sensitivity',sprintf('Max Sensitivity cutoff: %0.4f',table(CSe,1)),...
            'Specificity',sprintf('Max Specificity cutoff: %0.4f',table(CSp,1)),...
            sprintf('Cost effective cutoff: %0.4f',CE),...
            'Efficiency',sprintf('Max Efficiency cutoff: %0.4f',table(CEff,1)),...
            'Location','BestOutside')
        axis([xg(1) xg(end) 0 1.1])

        fprintf('1) Max Sensitivity Cut-off point= %0.2f\n',table(CSe,1))
        fprintf('2) Max Specificity Cut-off point= %0.2f\n',table(CSp,1))
        fprintf('3) Cost effective Cut-off point= %0.2f\n',CE), 
        fprintf('4) Max Efficiency Cut-off point= %0.2f\n',table(CEff,1))
        m=[table(CSe,1) table(CSp,1) CE table(CEff,1)];
    end
else
    table=NaN;
end


if nargout
    ROCout.AUC=Area; %Area under the curve
    ROCout.SE=Serror; %standard error of the area
    ROCout.ci=ci; % 95% Confidence interval
    ROCout.co=m; % cut off points
    ROCout.xr=xroc; %graphic x points
    ROCout.yr=yroc; %graphic y points
    ROCout.table=table;
end

Contact us