A set of MATLAB functions for evaluating generalization performance in binary classification.
acc_cmp1(targs,preds1,preds2)
% Tests the null hypothesis that two classifiers that were tested on
% the same dataset (paired sample) have the same prediction accuracy on
% unseen data.
%
% This function is to be used to compare two classifiers tested on the same
% dataset. In order to compare two classifiers that were tested on
% independent datasets, use ACC_CMP2 instead.
%
% Usage:
% p = acc_cmp1(targs,preds1,preds2)
%
% Arguments:
% targs - a vector of target labels
% preds1 - a vector of predictions by classifier 1
% preds2 - a vector of predictions by classifier 2
%
% Make sure that length(targs) == length(preds1) == length(preds2).
%
% See also:
% acc_cmp2
% Kay H. Brodersen, ETH Zurich, Switzerland
% http://people.inf.ethz.ch/bkay/
% $Id: acc_cmp1.m 8247 2010-10-22 13:34:28Z bkay $
% -------------------------------------------------------------------------
function p = acc_cmp1(targs,preds1,preds2)
warning('Bug - order of pred1, pred2 matters! Is this a test of whether classifier 1 better than classifier 2?');
% Check input
assert(~any(isnan(targs)));
assert(~any(isnan(preds1)));
assert(~any(isnan(preds2)));
assert(all(size(targs)==size(preds1)));
assert(all(size(preds1)==size(preds2)));
targs = targs(:);
preds1 = preds1(:);
preds2 = preds2(:);
% Prepare test statistic
n = length(targs);
X = double(preds1==targs);
Y = double(preds2==targs);
D = X-Y;
delta = mean(D);
S_2 = mean((D-mean(D)).^2);
se = sqrt(S_2)/sqrt(n);
W = delta / se;
% Return p value of Wald test
p = 1-normcdf(W,0,1);
end