Code covered by the BSD License  

Highlights from
(AU)ROC(CH)

image thumbnail
from (AU)ROC(CH) by Ged Ridgway
Receiver Operating Characteristic curve with convex hull, plus areas under ROC and ROCCH.

...
function [auc auh acc0 accm thrm thrs acc sens spec hull] = ...
    rocplot(scores, classes, plottype, Nthr)
%rocplot: plot a Receiver Operating Characteristic (ROC) curve
% ROC curves illustrate the performance on a binary classification problem
% where classification is based on simply thresholding a set of scores at
% varying levels. Low thresholds give high sensitivity but low specificity,
% high thresholds give high specificity but low sensitivity; the ROC curve
% plots this trade-off over a range of thresholds.
%
% Examples:
%   rocplot(scores, classes);
%   rocplot(scores, classes, plottype);
%   rocplot(scores, classes, plottype, Nthr);
%   [AUC AUH] = rocplot(scores, classes);
%   [AUC AUH acc0 accM thrM thr acc sens spec hull] = rocplot(...);
%  
% classes is a Boolean vector of the same size as scores, which should
% be true where scores >= threshold should yield true, e.g. true for
% patients, and false for healthy control subjects, in medical diagnosis.
% If you have two vectors of scores, e.g. patient and control, first do:
%   scores  = [control(:); patient(:)];
%   classes = [false(numel(control), 1); true(numel(patient), 1)];
%
% plottype controls what is plotted, it defaults to 2, where:
%   0 gives no plot (useful to get AUC without creating a plot)
%   1 gives a standard ROC curve, with sensitivity vs (1 - specificity)
%   2 gives my preferred convention, with sensitivity vs specificity
% Nthr is an optional number of thresholds to consider (points in the ROC),
% if left off, all unique values in scores are considered, or, if there are
% more than 100 of those, then 100 values equally spaced from min to max of
% scores are used. Alternatively, Nthr can be a vector of thresholds.
%
% AUC is the area under the ROC curve, a measure of overall accuracy,
% which gives the probability that the classifier would rank a randomly
% chosen true instance (e.g. patient) higher than a random false one
% (e.g. control subject).
%
% AUH is the area under the convex hull of the ROC curve, which is of
% interest because it is theoretically possible to operate at any point on
% the convex hull of the points in an ROC curve (by using some proportional
% selection of classifiers that operate at two points defining the relevant
% section of the convex hull. This is just a more complex version of the
% logic that gives the null line for an ROC plot; if a threshold of -inf
% gives (spec=0,sens=1) and +inf gives (1,0), then using -inf for half of
% your data and +inf for the other half is expected to give (0.5,0.5) if
% you have equal numbers of true and false instances).
%
% Also recorded in the plot legend, and optionally returned, are acc0, the
% accuracy at a threshold of zero, which is of special importance
% in some algorithms, e.g. if your scores come from a linear classifier
% like a Support Vector Machine which can give scores(i) = w'*x(:, i) - b,
% as equivalent to testing for w'*x(:, i) > b, and accM, the max accuracy
% from the tested set of thresholds, which occurs at the threshold thrM.
%
% The function can also return a vector of all considered thresholds, along
% with the corresponding accuracies, sensitivities and specificities, in
% the variables thr, acc, sens, spec. The output hull contains the indices
% into sens and spec that give the convex hull. These arguments can then be
% used to plot multiple ROC curves and/or convex hulls on the same axis,
% e.g. after calling rocplot twice (with plottype 0), you could do:
%   plot(spec1,sens1,'b', spec2,sens2,'r', [0 1],[1 0],'g');
%
% See also: roc, prec_rec, get_concave_ROC, auroc, bookmaker
%
% These can be found in the MATLAB Central File Exchange:
% roc             http://www.mathworks.com/matlabcentral/fileexchange/19950
% roc             http://www.mathworks.com/matlabcentral/fileexchange/21318
% prec_rec        http://www.mathworks.com/matlabcentral/fileexchange/21528
% get_concave_ROC http://www.mathworks.com/matlabcentral/fileexchange/21382
% auroc           http://www.mathworks.com/matlabcentral/fileexchange/19468
% bookmaker       http://www.mathworks.com/matlabcentral/fileexchange/5648
% There is also a book "Ordinal Data Modeling", by Valen Johnson, with
% companion code  http://www.mathworks.com/matlabcentral/fileexchange/2264

% Copyright 2009 Ged Ridgway

scores = scores(:);
% need max thr greater than max(scores) so roc sens drops to zero
maxscoreplus = max(scores) + eps(max(scores));
classes = classes(:);
if any(double(logical(classes)) ~= double(classes))
    error('Classes vector must contain true (or 1) and false (or 0) only')
end

if ~exist('Nthr', 'var') || isempty(Nthr)
    if numel(scores) > 100
        Nthr = 100; % no need for more points in plot
    else
        thrs = unique([scores;maxscoreplus]);
        Nthr = numel(thrs);
    end
end
if ~exist('thrs', 'var')
    if ~isscalar(Nthr) % user-specified set of thrs to consider
        thrs = unique(Nthr); % (also sorts)
        Nthr = numel(thrs);
        % thrs(Nthr) = maxscoreplus; % commented out, assuming expert user.
    else
        thrs = linspace(min(scores), maxscoreplus, Nthr);
    end
end
thrs = thrs(:);

acc = zeros(Nthr, 1); sens = acc; spec = sens;
for n = 1:Nthr
    thr = thrs(n);
    result = scores >= thr;
    acc(n) = mean(result == classes);
    sens(n) = sum(result == 1 & classes == 1)/sum(classes == 1);
    spec(n) = sum(result == 0 & classes == 0)/sum(classes == 0);
end
% area under ROC curve
auc = trapz(spec, sens);
% test special case of 0 threshold, since training often assumes this
result0 = scores >= 0;
acc0 = mean(result0 == classes);
sens0 = sum(result0 == 1 & classes == 1)/sum(classes == 1);
spec0 = sum(result0 == 0 & classes == 0)/sum(classes == 0);
% threshold giving highest overall accuracy
[accm indm] = max(acc);
thrm = thrs(indm);
sensm = sens(indm);
specm = spec(indm);
% compute convex hull of ROC curve and area under hull
[hull auh] = convhull([spec;0], [sens;0]);
hull = unique(hull);
hull(end) = []; % don't close loop
% sanity check:
if abs(trapz(spec(hull), sens(hull)) - auh) > eps
    warning('rocplot:auh', 'Area under ROC convex hull may be incorrect');
end
if ~exist('plottype', 'var') || isempty(plottype)
    plottype = 2;
end
switch plottype
    case 0, return % no plot
    case 1 % standard TPR (= sens) vs FPR (= 1-spec)
        plot(1-spec,sens,'b', 1-spec(hull),sens(hull),'c', ...
            1-spec0,sens0,'ro', 1-specm,sensm,'ms', 1-[0 1],[1 0],'g');
        xlabel('False Positive Rate (1 - Specificity)');
        ylabel('True Positive Rate (Sensitivity)');
        legpos = 'SE';
        axis([0 1 0 1])
    case 2 % my convention, sens vs spec
        plot(spec,sens,'b', spec(hull),sens(hull),'c', ...
            spec0,sens0,'ro', specm,sensm,'ms', [0 1],[1 0],'g');
        xlabel('Specificity');
        ylabel('Sensitivity');
        legpos = 'SW';
        axis([0 1.05 0 1.05])
end
legend(sprintf('ROC Curve\nAUC = %.3g', auc), ...
    sprintf('Conv Hull\nAUC = %.3g', auh), ...
    sprintf('Acc(0) = %.3g', acc0), ...
    sprintf('Max Acc %.3g', accm), ... 'Chance', ...
    'Location', legpos)

if nargout == 0, clear auc, end % quieten rocplot(scores,classes) without ;

Contact us at files@mathworks.com