function varargout = plotstats(X,Y,varargin)
%PLOTSTATS Format and plot data for basic statitical analysis.
%
% PLOTSTATS(XDATA,YDATA) generate a nice figure for the statistical
% visualisation of datasets. YDATA is a cell array of vectors, each vector
% being a group of samples to test. XDATA is a cell array of the same
% size of YDATA, each element being a single number representing the
% abscissa of the corresponding sample group in YDATA. Example:
% ydata = {
% [ 1 2 3 4 5 6 7 8 9 ]
% [ 2 3 4 5 ]
% 10*rand(10,1) };
% xdata = {
% 1
% 2
% 3 };
% ydata
% ydata =
% [ 1x9 double]
% [ 1x4 double]
% [10x1 double]
% xdata
% xdata =
% [1]
% [2]
% [3]
% plotstats(xdata,ydata)
%
% With this default syntax, the command generate a 2-panel figure. The
% first one containing the raw data plot, the second one the box plot. See
% below for details.
% This script will try to detect outliers using interquartile spacing, like
% in BOXPLOT. Data points that are farther than
% whiskers * (interquartile distance) from the 1st or 3rd quartile
% will be highlighted as outliers and discarded from further analysis. The
% value of the 'whisker' parameters is 1.5 by default, and can be
% customized, see below.
%
% It is possible to extensively customize this plot, using a field/value
% pair syntax, like
% PLOTSTATS(XDATA,YDATA,'field','value',...)
% Possible fields are the following:
%
% BASIC DISPLAY OPTIONS
%
% 'YLabel' - a string, default = ''
% Specify the label of the ordinate axis on each panel.
%
% 'FigureTitle' - a string, default = 'Plot stats'
% Specify the name of the figure.
%
% 'DisplayNumberFormat' - a string, default = '4.2e'
% Specify how to display numerical value on the figure each time it is
% required. Must be a format string understandable by num2str. See
% SPRINTF for help.
%
% 'SeriesLabel' - a cell of string, which size matches the YDATA cell.
% Specify the name of each sample group to be displayed as the abscissa
% label. By default, the value of xdata is displayed.
%
% 'PointLabels' - a cell of cell of string, with one element per element in
% the YDATA cell.
% Specify the 'DisplayName' of each individual data point, useful to
% track outliers. Empty by default.
%
% ANALYSIS OPTIONS
%
% 'PlotRawData' - logical, default = true
% If true, will plot the raw data set, as individual points of ordinate
% specified in YDATA and of abscissa XDATA + a random amount to
% highlight individual points.
% This panel can be customized using the 'PlotRawDataOptions' field, see
% below.
%
% 'PlotBox' - logical, default = true
% If true, will plot the bocplot of the whole dataset, as with the
% command BOXPLOT in the statistics toolbox.
% This panel can be customized using the 'PlotBoxOptions' field, see
% below.
%
% 'PlotHistogram' - logical, default = false
% If true, will plot the grouped histogram of the whole dataset.
% This panel can be customized using the 'PlotHistogramOptions' field,
% see below.
%
% 'PlotFit' - logical, default = false
% If true, will plot the errorbar plot (mean +/- standard error)
% of the dataset, and try to fit it.
% This panel can be customized using the 'PlotFitOptions' field,
% see below.
%
% PANELS OPTIONS
%
% Each of this 4 panels can be customized by giving in argument a struct
% containing the field and value for each parameters. They are detailed
% here:
%
% For 'PlotRawDataOptions', a struct with fields:
% 'AddMeanToPlot' - logical, default = false
% If true will add next to the datapoints the error plot of each
% sample group (mean +/- standard error).
% 'DataSpread' - a double, default = 0.2
% Specify the random amount to add to the abscissa of each data point
% to separate and highlight them.
% 'DisplayProperties' - a struct of plot options, understandable by plot
% (LineWidth, Color, etc...), that will be use to display data
% points.
% 'OutliersProperties' - the same, but used to highlight outliers.
%
% For 'PlotBoxOptions', a struct with fields:
% 'WhiskerFactor' - a double, default = 1.5
% Specify the whiskers length, also used to find outliers. Data
% points that are farther than whiskers * (interquartile distance)
% from the 1st or 3rd quartile will be highlighted as outliers and
% discarded from further analysis.
% 'PlotStudentTest' - logical, default = true
% If true, will perform a t-test on each pair of sample group, and
% will add the result to the panel. A black line will link sample
% group for which difference was found to be significant, a red line
% otherwise. T-tests are peformed with outliers excluded.
% 'PrintHelp' - logical, default = false
% If true, will add to the panel a small help of what is the box
% plot.
% 'Spacer' - a double, default = 0.03
% Percentage by which t-test bars will be spaced on the panel.
%
% For 'PlotHistogramOptions', a struct with fields:
% 'Nbins' - a whole number, default = 7
% Number of bins for the histogram.
% 'WidthFactor' - a double, default = 0.5
% Relative width of individual bars in the histogram.
% 'AlphaTransparency' - a double, default = 1.0
% Value for alpha transparency of each bar in the histogram.
% 'ColormapFunction' - a function handle, default = @(arg) summer(arg)
% A function handle that specify the color of each sample group
% histogram.
%
% For 'PlotFitOptions', a struct with fields:
% 'DoFit' - logical, default = false
% If true, will try to fit the mean values by a fit whose parameters
% are specified by the following fields:
% 'FitFunctionStr' - string, default = ' A * x + B'
% String describing the fitting function to perform the fit with.
% 'FitParam' - cell of string, default = {'A','B'}
% Specify what are the parameters in the FitFunction.
% 'FitIndep' - cell of string, default = 'x'
% Specify what are the independant variable in the FitFunction.
% 'FitStartPoint' - a vector, default = [-0.3 0.3]
% Specify the starting point for the fitting procedure.
% The fit is made through the fit function of the curve fitting toolbox,
% using the NonlinearLeastSquares method.
%
% OUTPUT
%
% S = PLOTSTATS(...) will output a struct S, with the following fields:
%
% 'SeriesLabel' - cell of string
% Contain the label given to each sample group.
%
% 'Outliers' - cell of logical vector
% Each element of these arrays specify if the corresponding data oint is
% an outlier (true) or not(false).
%
% 'Plot' - a sub-struct, with fields
% 'Figure' - handle to the figure
% 'AxesRawData' - handle to the axes of the raw data panel
% 'AxesBox' - handle to the axes of the boxplot panel
% 'AxesHistogram' - handle to the histogram panel
% 'AxesMeanStdErr' - handle to the axes of the mean/fit panel
%
% 'TTest' - a sub-struct with results from the t-tests/
% 'h' - cell of logical values. Difference significance.
% Logical value at index (i,j) is true if
% the difference between sample groups i and j
% was found to be statistically different, false
% otherwise.
% 'p' - cell of double. P-values.
% Value at index (i,j) is the p-value given by the t-test between
% sample groups i and j.
% 'ci' - cell of 2x1 double.
% Value at index (i,j) is the confidence intervals given by the
% t-test between sample groups i and j.
%
% 'Stats' - a sub-struct with fields:
% 'Mean' - array of double, mean of each sample group.
% 'Std' - array of double, standard deviation of each sample group.
% 'Err' - array of double, standard error of each sample group.
%
% --------
% EXAMPLES
% --------
%
% % Basic plot
% x = {1 2 4};
% y={ rand(50,1), rand(50,1), 0.7 + 0.5*rand(100,1)};
% plotstats(x,y)
%
% % With everything, outliers and fit (default is straight line)
% x = {10 5 20};
% y={ 2*rand(50,1), rand(50,1), [2.1 + 0.8*rand(100,1)' 0.2 0.25 0.1]};
% labels = {'medium' 'low' 'high'};
% fitopt.DoFit = true; % option structure for the fit panel
% plotstats(x,y,'SeriesLabels',labels,'PlotHistogram',true,'PlotFit',true,'PlotFitOptions',fitopt)
%
% % With no graphical output and a output argument, one can get only the result struct:
% x = {10 5 20};
% y={ 2*rand(50,1), rand(50,1), [2.1 + 0.8*rand(100,1)' 0.2 0.25 0.1]};
% labels = {'medium' 'low' 'high'};
% fitopt.DoFit = true; % option structure for the fit panel
% S = plotstats(x,y,'SeriesLabels',labels,'PlotRawData',false,'PlotBox',false,'PlotFitOptions',fitopt)
%% Varargin check
% Option defaults for the raw data plot
rawdataoptions = struct( ...
'AddMeanToPlot',false,...
'DataSpread',0.2, ...
'DisplayProperties', struct( ...
'Linestyle','none',...
'Marker','o',...
'MarkerFaceColor','w',...
'MarkerEdgeColor','k'), ...
'OutliersProperties', struct( ...
'Linestyle','none',...
'Marker','o',...
'MarkerFaceColor','w',...
'MarkerEdgeColor','r') ...
);
def{1} = rawdataoptions;
isval{1} = {
@islogical
@isreal
@isstruct
@isstruct
};
% Option defaults for the boxplot
boxoptions = struct( ...
'WhiskerFactor', 1.5, ... % exclude data further than whis * interquartile -> outliers
'Spacer', 0.03, ... % specify the spacing of the student t-test bars (in percent)
'PrintHelp', false, ... % print help about boxplot
'PlotStudentTest', true ...
);
def{2} = boxoptions;
isval{2} = {
@isreal
@isreal
@islogical
@islogical
};
% Option defaults for the histogram plot
histogramoptions = struct( ...
'Nbins',7,...
'WidthFactor',0.5 ,...
'AlphaTransparency',1.0,...
'ColormapFunction',@(arg) summer(arg) ...
);
def{3} = histogramoptions;
isval{3} = {
@(i) iswhole(i)
@isreal
@isreal
@(i) isa(i,'function_handle')
};
% Option defaults for the mean and stderr plot
meanstderroptions = struct( ...
'DoFit',false,...
'FitFunctionStr',' A * x + B', ...
'FitParam','', ...
'FitIndep','',...
'FitStartPoint',[-0.3 0.3] ...
);
isval{4} = {
@islogical
@ischar
@ischar
@ischar
@isnumeric
};
meanstderroptions.FitParam = {'A','B'};
meanstderroptions.FitIndep = 'x';
def{4} = meanstderroptions;
% Main option list
listfields = {
'SeriesLabels'
'PointLabels'
'YLabel'
'PlotRawData'
'PlotRawDataOptions'
'PlotBox'
'PlotBoxOptions'
'PlotHistogram'
'PlotHistogramOptions'
'PlotFit'
'PlotFitOptions'
'DisplayNumberFormat'
'FigureTitle'
};
testvalid = {
@ischar
@iscell
@ischar
@islogical
@isstruct
@islogical
@isstruct
@islogical
@isstruct
@islogical
@isstruct
@ischar
@ischar
};
acceptablevalues = {
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
};
default = struct( ...
'serieslabels',{''}, ...
'pointlabels',{ [] },...
'ylabel','',...
'plotrawdata',true,...
'plotrawdataoptions',rawdataoptions,...
'plotbox',true,...
'plotboxoptions',boxoptions,...
'plothistogram',false,...
'plothistogramoptions',histogramoptions,...
'plotfit',false,...
'plotfitoptions',meanstderroptions, ...
'displaynumberformat','%4.2g', ...
'figuretitle','Plot stats'...
);
mainoptions = checkfield(varargin,listfields,testvalid,acceptablevalues,default);
% Check for validity of suboptions
opt{1} = mainoptions.PlotRawDataOptions;
opt{2} = mainoptions.PlotBoxOptions;
opt{3} = mainoptions.PlotHistogramOptions;
opt{4} = mainoptions.PlotFitOptions;
for i = 1 : length(opt)
lfields = fieldnames(def{i});
accval = cell(length(lfields),1);
opt{i} = checkfield( struct2varargin(opt{i}), lfields, isval{i}, accval, def{i});
end
rawdataoptions = opt{1};
boxoptions = opt{2};
histogramoptions = opt{3};
meanstderroptions = opt{4};
%% Settings
% What to plot
doplotrawdata = mainoptions.PlotRawData;
doplotbox = mainoptions.PlotBox;
doplothistogram = mainoptions.PlotHistogram;
doplotmeanstderr = mainoptions.PlotFit;
% Plot raw data
addtorawdatamean = rawdataoptions.AddMeanToPlot;
plotdisp = rawdataoptions.DataSpread;
datasetopt = rawdataoptions.DisplayProperties;
outliersetopt = rawdataoptions.OutliersProperties;
% Boxplot and outliers exclusion
whis = boxoptions.WhiskerFactor; % exclude data further than whis * interquartile
doplotstudenttest = boxoptions.PlotStudentTest;
printstathelp = boxoptions.PrintHelp; % print help about boxplot
spacer = boxoptions.Spacer; % percent
% Histogran
nbins = histogramoptions.Nbins; % number of bins for histogram
wfactor = histogramoptions.WidthFactor; % Witdth of the bars of the histogram
histalpha = histogramoptions.AlphaTransparency;
cmap = histogramoptions.ColormapFunction;
% Fit
dofit = meanstderroptions.DoFit;
funcstr = meanstderroptions.FitFunctionStr;
param = meanstderroptions.FitParam;
indep = meanstderroptions.FitIndep;
spoint = meanstderroptions.FitStartPoint;
strn = mainoptions.DisplayNumberFormat; % How to display numbers?
labely = mainoptions.YLabel;
%% Loading - Real code starts here
if ~isempty(mainoptions.SeriesLabels)
textleg = mainoptions.SeriesLabels;
else
for i = 1 : length(X)
textleg{i} = num2str(X{i},strn);
end
end
S.SeriesLabels = textleg;
for i = 1 : length(Y)
if ~isempty(mainoptions.PointLabels)
T{i} = mainoptions.PointLabels{i};
else
temp = num2str( (1 : length(Y{i}))' );
[itemp jtemp] = size(temp);
T{i} = mat2cell(temp,ones(1,itemp),jtemp);
end
xpos(i) = X{i}(1);
end
[junk boxpos] = sort(xpos) ;
%% Determine outliers
Xo = X;
Yo = Y; % Yo, Xo : data with outliers excluded
for i =1 : length(Y)
pctiles = prctile(Y{i},[25;50;75]);
q1 = pctiles(1,:);
q3 = pctiles(3,:);
vhi = q3 + whis * (q3-q1);
upadj = max(Y{i}(Y{i}<=vhi));
if (isempty(upadj)), upadj = q3; end
vlo = q1 - whis * (q3-q1);
loadj = min(Y{i}(Y{i}>=vlo));
if (isempty(loadj)), loadj = q1; end
outlier{i} = Y{i} < loadj | Y{i} > upadj;
Yo{i}(outlier{i}) = [];
end
S.Outliers = outlier;
%% Compute stats
ymean = NaN(length(Xo),1);
ystd = NaN(length(Xo),1);
yerr = NaN(length(Xo),1);
for i = 1 : length(Xo)
ymean(i) = mean(Yo{i});
ystd(i) = std(Yo{i});
yerr(i) = ystd(i) / sqrt(length(Yo{i})) ;
end
S.Stats.Mean = ymean;
S.Stats.Std = ystd;
S.Stats.Err = yerr;
%% Concatenate data
dx = [];
dy = [];
for i = 1 : length(X)
dx = [dx; X{i}(1) * ones(length(Y{i}),1)];
dy = [dy; Y{i}(:)];
end
%% Make fit
if dofit
foptions = fitoptions( ...
'Method','NonlinearLeastSquares',...
'StartPoint',spoint);
fmodel = fittype(funcstr,...
'coefficients',param,...
'independent',indep,...
'options',foptions);
rfit = fit(dx,dy,fmodel);
rc = coeffvalues(rfit);
rconf = confint(rfit);
S.Fit.Fit = rfit;
S.Fit.CoeffValues = rc;
S.Fit.ConfInt = rconf;
end
%% Prepare figure
npanels = (doplotrawdata + doplotbox + doplothistogram + doplotmeanstderr);
if npanels > 0
figtitle = mainoptions.FigureTitle;
hf = figure;
set(hf,'Name',figtitle,'NumberTitle','off')
S.Plot.Figure = hf;
if npanels > 2
subplotind = [221 222 223 224];
elseif npanels > 1
subplotind = [121 122];
else
subplotind = 111;
end
spi = 0;
end
%% Plot raw data set
if doplotrawdata
spi =spi + 1;
subplot(subplotind(spi))
ha1 = cla;
S.Plot.AxesRawData = ha1;
hold all
xmin = X{1}(1);
xmax = X{1}(1);
for i = 1 : length(Y)
cx = Xo{i}(1);
cy = Yo{i};
ct = T{i};
for j = 1 : length(cy)
plot(cx + plotdisp*(rand-0.5),cy(j),datasetopt,...
'DisplayName',ct{j});
xmin = min([xmin cx]);
xmax = max([xmax cx]);
end
cx = X{i}(1);
cy = Y{i}(outlier{i});
ct = T{i}(outlier{i});
for j = 1 : length(cy)
plot(cx + plotdisp*(rand-0.5),cy(j),outliersetopt, ...
'DisplayName',ct{j});
end
end
set(gca,'XLim',[xmin-1 xmax+1]);
ylim = get(gca,'YLim');
set(gca,'YLim',[0,ylim(2)]);
ylabel(labely);
[sortedxpos isort] = sort(xpos);
set(gca,'XTick',sortedxpos, ...
'XTickLabel',textleg(isort))
if addtorawdatamean
errorbar(ha1,xpos+2*plotdisp,ymean,yerr,...
'LineStyle','none',...
'Color','b',...
'LineWidth',1,...
'MarkerFaceColor','w',...
'MarkerEdgeColor','b',...
'MarkerSize',10,...
'Marker','o');
ht = title( {'Raw data set' , ...
'\color[rgb]{1 0 0} Mean +/- StdErr'} );
else
title('Raw data set')
end
end
%% Plot histogram
if doplothistogram
N = NaN(nbins,length(X));
bins = linspace(min(dy),max(dy),nbins);
spi = spi + 1;
subplot(subplotind(spi))
ha2 = cla;
S.Plot.AxesHistogram = ha2;
hold all
cm = cmap(length(X));
histleg = {};
for i = 1 : length(X)
[N(:,i) B] = hist(Y{i},bins);
% recenter bars
B = B + (i-1) * (B(2)-B(1)) * wfactor/2;
hb = barh(B,N(:,i),wfactor,'FaceColor',cm(i,:));
% recolor patches
hpatch = get(hb,'Children');
set(hpatch,...
'FaceAlpha',histalpha, ...
'EdgeColor','k');
histleg{i} = textleg{i};
end
S.Histogram = N;
legend(textleg)
xlabel('Count')
ylabel(labely);
title({'Histograms'});
if doplotrawdata
set(gca,'YLim',get(ha1,'Ylim'));
end
legend(histleg)
end
%% Plot boxes
if doplotbox
spi =spi + 1;
subplot(subplotind(spi))
ha3 = cla;
S.Plot.AxesBox = ha3;
hold all
HB = boxplot(dy,dx,'Position',xpos(boxpos),...
'Whisker',whis,...
'Symbol','ro');
box off
xl = get(gca,'XLim');
yl = get(gca,'YLim');
set(gca,'YLim',[0 yl(2)]);
set(gca,'XTick',xpos(boxpos), ...
'XTickLabel',textleg(boxpos))
ylabel(labely);
title({ 'Data repartition'});
if printstathelp
text( xl(1) + 0.2*xl(2), 0.9 * yl(2), ...
{' - box: lower quartile, median, upper quartile', ...
[' - whiskers: data within ',num2str(whis,strn),' times interquartile range'],...
' - crosses: outliers'});
end
% Plot student test
yl = get(gca,'YLim');
yrange = yl(2) - yl(1);
spacefactor = length(X);
for i = 1 : length(X)
for j = i+1 : length(X)
hup1 = HB(1,i);
hup2 = HB(1,j);
y1 = get(hup1,'YData');
y2 = get(hup2,'YData');
ybar1 = max( [ y1(2) y2(2) ] ) + (spacefactor*(i-1)+(2*j)+1) * spacer * yrange;
ybar2 = max( [ y1(2) y2(2) ] ) + (spacefactor*(i-1)+(2*j)+2) * spacer * yrange;
xbar1 = xpos(i);
xbar2 = xpos(j);
[h{i,j},p{i,j},ci{i,j}] = ttest2(Yo{i},Yo{j},[],[],'unequal');
if doplotstudenttest
if h{i,j} == 1
barcol = 'k';
else
barcol = 'r';
end
xd = [xbar1 xbar1 xbar2 xbar2];
yd = [ybar1 ybar2 ybar2 ybar1];
line(xd,yd,'Color',barcol);
xtext = (xbar1 + xbar2 ) / 2;
ytext = ybar2 + spacer/1.5 * yrange;
ptext = ['P = ',sprintf(strn,p{i,j})];
text(xtext,ytext,ptext,'HorizontalAlignment','center');
end
end
end
if doplotstudenttest
ht = get(gca,'Title');
set(ht,'String',[get(ht,'String'),' & Two-sample t-test']);
end
S.TTest.h = h;
S.TTest.p = p;
S.TTest.ci = ci;
set(gca,'YLimMode','auto');
ylim = get(gca,'YLim');
set(gca,'Ylim',[0,ylim(2)]);
if doplotrawdata
set(ha1,'YLim',get(ha3,'YLim'));
end
if doplothistogram
set(ha2,'YLim',get(ha3,'YLim'));
end
end
%% Fit
if doplotmeanstderr
spi =spi + 1;
subplot(subplotind(spi))
ha4 = cla;
S.Plot.AxesMeanStdErr = ha4;
hold all
he = errorbar(xpos,ymean,yerr,'k');
set(he,datasetopt);
if dofit
plot(rfit)
xl = get(gca,'XLim');
yl = get(gca,'YLim');
for i = 1 : length(param)
fitinfo{i} = [param{i},' = ',num2str(rc(i),strn),...
' (',num2str(rconf(1,i),strn),...
',' num2str(rconf(2,i),strn),')'];
end
text(0.4*xl(2),0.9*yl(2), fitinfo);
title({['Fit by ',funcstr],'(outliers excluded)'});
legend( ...
{'mean +/- stderr', ...
['fit by ',funcstr] });
else
legend( ...
{'mean +/- stderr'});
end
if doplotrawdata
set(gca,'YLim',get(ha1,'YLim'));
end
ylabel(labely)
end
%% Prepare outputs
if nargout > 0
varargout{1} = S;
end
%% Subfunction
function out = struct2varargin(in)
% Convert a struct in a cell of char in the Matlab 'field/value'
% way, suitable to be a varargin cell
if ~isstruct(in)
errid = 'MATLAB:struct2varargin:BadInputType';
errmsg = ['Input argument must be a struct, got a ' ,class(in),'.'];
error(errid,errmsg);
end
names = fieldnames(in);
values = struct2cell(in);
n = length(names);
out = cell(n,1);
for i = 1 : n
out(2*i-1) = names(i);
out(2*i) = values(i);
end