Code covered by the BSD License  

Highlights from
plotstats

image thumbnail
from plotstats by Jean-Yves Tinevez
PLOTSTATS Format and plot data for basic statitical analysis.

plotstats(X,Y,varargin)
function varargout = plotstats(X,Y,varargin)
%PLOTSTATS Format and plot data for basic statitical analysis.
%
% PLOTSTATS(XDATA,YDATA) generate a nice figure for the statistical
% visualisation of datasets. YDATA is a cell array of vectors, each vector
% being a group of samples to test. XDATA is a cell array of the same
% size of YDATA, each element being a single number representing the
% abscissa of the corresponding sample group in YDATA. Example:
% ydata = { 
%   [ 1 2 3 4 5 6 7 8 9 ]
%   [ 2 3 4 5 ]
%   10*rand(10,1)        };
% xdata = {
%   1 
%   2
%   3                     };
% ydata
% ydata = 
%     [ 1x9 double]
%     [ 1x4 double]
%     [10x1 double]
% xdata
% xdata = 
%     [1]
%     [2]
%     [3]
% plotstats(xdata,ydata)
% 
% With this default syntax, the command generate a 2-panel figure. The
% first one containing the raw data plot, the second one the box plot. See
% below for details.
% This script will try to detect outliers using interquartile spacing, like
% in BOXPLOT. Data points that are farther than 
%       whiskers * (interquartile distance) from the 1st or 3rd quartile
% will be highlighted as outliers and discarded from further analysis. The
% value of the 'whisker' parameters is 1.5 by default, and can be
% customized, see below.
% 
% It is possible to extensively customize this plot, using a field/value
% pair syntax, like 
%   PLOTSTATS(XDATA,YDATA,'field','value',...)
% Possible fields are the following:
% 
% BASIC DISPLAY OPTIONS
% 
% 'YLabel' - a string, default = ''    
%   Specify the label of the ordinate axis on each panel.
% 
% 'FigureTitle' - a string, default = 'Plot stats'
%   Specify the name of the figure.
% 
% 'DisplayNumberFormat' - a string, default = '4.2e'
%   Specify how to display numerical value on the figure each time it is
%   required. Must be a format string understandable by num2str. See
%   SPRINTF for help.
% 
% 'SeriesLabel' - a cell of string, which size matches the YDATA cell.
%   Specify the name of each sample group to be displayed as the abscissa
%   label. By default, the value of xdata is displayed.
% 
% 'PointLabels' - a cell of cell of string, with one element per element in
%   the YDATA cell.
%   Specify the 'DisplayName' of each individual data point, useful to
%   track outliers. Empty by default.
% 
% ANALYSIS OPTIONS
% 
% 'PlotRawData' - logical, default = true
%   If true, will plot the raw data set, as individual points of ordinate
%   specified in YDATA and of abscissa XDATA + a random amount to
%   highlight individual points. 
%   This panel can be customized using the 'PlotRawDataOptions' field, see
%   below.
% 
% 'PlotBox' - logical, default = true
%   If true, will plot the bocplot of the whole dataset, as with the
%   command BOXPLOT in the statistics toolbox.
%   This panel can be customized using the 'PlotBoxOptions' field, see
%   below.
% 
% 'PlotHistogram' - logical, default = false
%   If true, will plot the grouped histogram of the whole dataset.
%   This panel can be customized using the 'PlotHistogramOptions' field,
%   see below.
% 
% 'PlotFit' - logical, default = false
%   If true, will plot the errorbar plot (mean +/- standard error) 
%   of the dataset, and try to fit it.
%   This panel can be customized using the 'PlotFitOptions' field,
%   see below.
% 
% PANELS OPTIONS
% 
% Each of this 4 panels can be customized by giving in argument a struct
% containing the field and value for each parameters. They are detailed
% here:
% 
% For 'PlotRawDataOptions', a struct with fields:
%   'AddMeanToPlot' - logical, default = false
%       If true will add next to the datapoints the error plot of each
%       sample group (mean +/- standard error).
%   'DataSpread' - a double, default = 0.2
%       Specify the random amount to add to the abscissa of each data point
%       to separate and highlight them.
%   'DisplayProperties' - a struct of plot options, understandable by plot
%       (LineWidth, Color, etc...), that will be use to display data
%       points.
%   'OutliersProperties' - the same, but used to highlight outliers.
% 
% For 'PlotBoxOptions', a struct with fields:
%   'WhiskerFactor' - a double, default = 1.5
%       Specify the whiskers length, also used to find outliers. Data
%       points that are farther than whiskers * (interquartile distance)
%       from the 1st or 3rd quartile will be highlighted as outliers and
%       discarded from further analysis.
%   'PlotStudentTest' - logical, default = true
%       If true, will perform a t-test on each pair of sample group, and
%       will add the result to the panel. A black line will link sample
%       group for which difference was found to be significant, a red line
%       otherwise. T-tests are peformed with outliers excluded.
%   'PrintHelp' - logical, default = false
%       If true, will add to the panel a small help of what is the box
%       plot.
%   'Spacer' - a double, default = 0.03
%       Percentage by which t-test bars will be spaced on the panel.
% 
% For 'PlotHistogramOptions', a struct with fields:
%   'Nbins' - a whole number, default = 7
%       Number of bins for the histogram.
%   'WidthFactor' - a double, default = 0.5
%       Relative width of individual bars in the histogram.
%   'AlphaTransparency' - a double, default = 1.0
%       Value for alpha transparency of each bar in the histogram.
%   'ColormapFunction' - a function handle, default = @(arg) summer(arg)
%       A function handle that specify the color of each sample group
%       histogram.
% 
% For 'PlotFitOptions', a struct with fields:
%   'DoFit' - logical, default = false
%       If true, will try to fit the mean values by a fit whose parameters
%       are specified by the following fields:
%   'FitFunctionStr' - string, default = ' A * x + B'
%       String describing the fitting function to perform the fit with.
%   'FitParam' - cell of string, default = {'A','B'}
%       Specify what are the parameters in the FitFunction.
%   'FitIndep' - cell of string, default = 'x'
%       Specify what are the independant variable in the FitFunction.
%   'FitStartPoint' - a vector, default = [-0.3 0.3]
%       Specify the starting point for the fitting procedure.
%   The fit is made through the fit function of the curve fitting toolbox,
%   using the NonlinearLeastSquares method.
% 
% OUTPUT
% 
% S = PLOTSTATS(...) will output a struct S, with the following fields:
% 
% 'SeriesLabel' - cell of string
%   Contain the label given to each sample group.
% 
% 'Outliers' - cell of logical vector
%   Each element of these arrays specify if the corresponding data oint is
%   an outlier (true) or not(false).
% 
% 'Plot' - a sub-struct, with fields
%   'Figure' - handle to the figure
%   'AxesRawData' - handle to the axes of the raw data panel
%   'AxesBox' - handle to the axes of the boxplot panel
%   'AxesHistogram' - handle to the histogram panel
%   'AxesMeanStdErr' - handle to the axes of the mean/fit panel
% 
% 'TTest' - a sub-struct with results from the t-tests/
%   'h' - cell of logical values. Difference significance.
%       Logical value at index (i,j) is true if
%       the difference between sample groups i and j 
%       was found to be statistically different, false
%       otherwise.
%   'p' - cell of double. P-values.
%       Value at index (i,j) is the p-value given by the t-test between
%       sample groups i and j.
%   'ci' - cell of 2x1 double.
%       Value at index (i,j) is the confidence intervals given by the 
%       t-test between sample groups i and j.
% 
% 'Stats' - a sub-struct with fields:
%   'Mean' - array of double, mean of each sample group.
%   'Std' - array of double, standard deviation of each sample group.
%   'Err' - array of double, standard error of each sample group.
% 
% --------
% EXAMPLES
% --------
% 
% % Basic plot
% x = {1 2 4};
% y={ rand(50,1), rand(50,1), 0.7 + 0.5*rand(100,1)};
% plotstats(x,y)
% 
% % With everything, outliers and fit (default is straight line)
% x = {10 5 20};
% y={ 2*rand(50,1), rand(50,1), [2.1 + 0.8*rand(100,1)' 0.2 0.25 0.1]};
% labels = {'medium' 'low' 'high'};
% fitopt.DoFit = true; % option structure for the fit panel
% plotstats(x,y,'SeriesLabels',labels,'PlotHistogram',true,'PlotFit',true,'PlotFitOptions',fitopt)
% 
% % With no graphical output and a output argument, one can get only the result struct:
% x = {10 5 20};
% y={ 2*rand(50,1), rand(50,1), [2.1 + 0.8*rand(100,1)' 0.2 0.25 0.1]};
% labels = {'medium' 'low' 'high'};
% fitopt.DoFit = true; % option structure for the fit panel
% S  = plotstats(x,y,'SeriesLabels',labels,'PlotRawData',false,'PlotBox',false,'PlotFitOptions',fitopt)

%% Varargin check


% Option defaults for the raw data plot

rawdataoptions = struct( ...
    'AddMeanToPlot',false,...
    'DataSpread',0.2, ...
    'DisplayProperties',    struct( ...
        'Linestyle','none',...
        'Marker','o',...
        'MarkerFaceColor','w',...
        'MarkerEdgeColor','k'), ...
    'OutliersProperties',   struct( ...
        'Linestyle','none',...
        'Marker','o',...
        'MarkerFaceColor','w',...
        'MarkerEdgeColor','r') ...
    );
def{1} = rawdataoptions;
isval{1} = {
    @islogical
    @isreal
    @isstruct
    @isstruct
    };
    

% Option defaults for the boxplot

boxoptions = struct( ...
    'WhiskerFactor', 1.5, ...   % exclude data further than whis * interquartile -> outliers
    'Spacer', 0.03, ...         % specify the spacing of the student t-test bars (in percent)
    'PrintHelp', false, ...     % print help about boxplot
    'PlotStudentTest', true ...
    );
def{2} = boxoptions;
isval{2} = {
    @isreal
    @isreal
    @islogical
    @islogical
    };

% Option defaults for the histogram plot

histogramoptions = struct( ...
    'Nbins',7,...
    'WidthFactor',0.5 ,...
    'AlphaTransparency',1.0,...
    'ColormapFunction',@(arg) summer(arg) ...
    );
def{3} = histogramoptions;
isval{3} = {
    @(i) iswhole(i)
    @isreal
    @isreal
    @(i) isa(i,'function_handle')
    };

% Option defaults for the mean and stderr plot

meanstderroptions = struct( ...
    'DoFit',false,...
    'FitFunctionStr',' A * x + B', ...
    'FitParam','', ...
    'FitIndep','',...
    'FitStartPoint',[-0.3 0.3] ...
    );
isval{4} = {
    @islogical
    @ischar
    @ischar
    @ischar
    @isnumeric
    };
meanstderroptions.FitParam = {'A','B'};
meanstderroptions.FitIndep = 'x';
def{4} = meanstderroptions;

% Main option list

listfields = {
    'SeriesLabels'
    'PointLabels'
    'YLabel'
    'PlotRawData'
    'PlotRawDataOptions'
    'PlotBox'
    'PlotBoxOptions'
    'PlotHistogram'
    'PlotHistogramOptions'
    'PlotFit'
    'PlotFitOptions'
    'DisplayNumberFormat'
    'FigureTitle'
    };

testvalid = {
    @ischar
    @iscell
    @ischar
    @islogical
    @isstruct
    @islogical
    @isstruct
    @islogical
    @isstruct
    @islogical
    @isstruct
    @ischar
    @ischar
    };

acceptablevalues = {
    {}
    {}
    {}
    {}
    {}
    {}
    {}
    {}
    {}
    {}
    {}
    {}
    {}
    };

default = struct( ...
    'serieslabels',{''}, ...
    'pointlabels',{ [] },...
    'ylabel','',...
    'plotrawdata',true,...
    'plotrawdataoptions',rawdataoptions,...
    'plotbox',true,...
    'plotboxoptions',boxoptions,...
    'plothistogram',false,...
    'plothistogramoptions',histogramoptions,...
    'plotfit',false,...
    'plotfitoptions',meanstderroptions, ...
    'displaynumberformat','%4.2g', ...
    'figuretitle','Plot stats'...
    );
    
mainoptions = checkfield(varargin,listfields,testvalid,acceptablevalues,default);

% Check for validity of suboptions

opt{1} = mainoptions.PlotRawDataOptions;
opt{2} = mainoptions.PlotBoxOptions;
opt{3} = mainoptions.PlotHistogramOptions;
opt{4} = mainoptions.PlotFitOptions;

for i = 1 : length(opt)
   
    lfields = fieldnames(def{i});
    accval = cell(length(lfields),1);
    opt{i} = checkfield( struct2varargin(opt{i}), lfields, isval{i}, accval, def{i});
    
end

rawdataoptions = opt{1};
boxoptions = opt{2};
histogramoptions = opt{3};
meanstderroptions = opt{4};


%% Settings

% What to plot
doplotrawdata = mainoptions.PlotRawData;
doplotbox = mainoptions.PlotBox;
doplothistogram = mainoptions.PlotHistogram;
doplotmeanstderr = mainoptions.PlotFit;

% Plot raw data
addtorawdatamean = rawdataoptions.AddMeanToPlot;
plotdisp = rawdataoptions.DataSpread;
datasetopt = rawdataoptions.DisplayProperties;
outliersetopt = rawdataoptions.OutliersProperties;

% Boxplot and outliers exclusion
whis = boxoptions.WhiskerFactor; % exclude data further than whis * interquartile
doplotstudenttest = boxoptions.PlotStudentTest;
printstathelp = boxoptions.PrintHelp; % print help about boxplot
spacer = boxoptions.Spacer; % percent

% Histogran
nbins = histogramoptions.Nbins; % number of bins for histogram
wfactor = histogramoptions.WidthFactor; % Witdth of the bars of the histogram
histalpha = histogramoptions.AlphaTransparency;
cmap = histogramoptions.ColormapFunction;

% Fit
dofit = meanstderroptions.DoFit;
funcstr = meanstderroptions.FitFunctionStr;
param = meanstderroptions.FitParam;
indep = meanstderroptions.FitIndep;
spoint = meanstderroptions.FitStartPoint;

strn = mainoptions.DisplayNumberFormat; % How to display numbers?
labely = mainoptions.YLabel;


%% Loading - Real code starts here

if ~isempty(mainoptions.SeriesLabels)
    textleg = mainoptions.SeriesLabels;
else 
    for i = 1 : length(X)
        textleg{i} = num2str(X{i},strn);
    end
end
S.SeriesLabels = textleg;

for i = 1 : length(Y)
    
    if ~isempty(mainoptions.PointLabels)
        T{i} = mainoptions.PointLabels{i};
    else
        temp = num2str( (1 : length(Y{i}))' );
        [itemp jtemp] = size(temp);
        T{i} = mat2cell(temp,ones(1,itemp),jtemp);
    end
    xpos(i) = X{i}(1);
end

[junk boxpos] = sort(xpos) ;


%% Determine outliers

Xo = X;
Yo = Y; % Yo, Xo : data with outliers excluded

for i =1 : length(Y)

    pctiles = prctile(Y{i},[25;50;75]);
    q1 = pctiles(1,:);
    q3 = pctiles(3,:);

    vhi = q3 + whis * (q3-q1);
    upadj = max(Y{i}(Y{i}<=vhi));
    if (isempty(upadj)), upadj = q3; end

    vlo = q1 - whis * (q3-q1);
    loadj = min(Y{i}(Y{i}>=vlo));
    if (isempty(loadj)), loadj = q1; end

    outlier{i} = Y{i} < loadj | Y{i} > upadj;

    Yo{i}(outlier{i}) = [];

end

S.Outliers = outlier;

%% Compute stats

ymean = NaN(length(Xo),1);
ystd = NaN(length(Xo),1);
yerr = NaN(length(Xo),1);

for i = 1 : length(Xo)
    ymean(i) = mean(Yo{i});
    ystd(i) = std(Yo{i});
    yerr(i) = ystd(i) / sqrt(length(Yo{i})) ;
end

S.Stats.Mean = ymean;
S.Stats.Std = ystd;
S.Stats.Err = yerr;

%% Concatenate data

dx = [];
dy = [];
for i = 1 : length(X)
        dx = [dx; X{i}(1) * ones(length(Y{i}),1)];
    dy = [dy; Y{i}(:)];
end

%% Make fit 

if dofit

    foptions = fitoptions( ...
        'Method','NonlinearLeastSquares',...
        'StartPoint',spoint);

    fmodel = fittype(funcstr,...
        'coefficients',param,...
        'independent',indep,...
        'options',foptions);

    rfit = fit(dx,dy,fmodel);
    rc = coeffvalues(rfit);
    rconf = confint(rfit);

    S.Fit.Fit = rfit;
    S.Fit.CoeffValues = rc;
    S.Fit.ConfInt = rconf;

end

%% Prepare figure

npanels = (doplotrawdata + doplotbox + doplothistogram + doplotmeanstderr);

if npanels > 0
    figtitle = mainoptions.FigureTitle;
    hf = figure;
    set(hf,'Name',figtitle,'NumberTitle','off')
    S.Plot.Figure = hf;
    
    if  npanels > 2
        subplotind = [221 222 223 224];
    elseif npanels > 1
        subplotind = [121 122];
    else
        subplotind = 111;
    end
    spi = 0;
end

%% Plot raw data set

if doplotrawdata
    
    spi =spi + 1;    
    subplot(subplotind(spi))
    ha1 = cla;
    S.Plot.AxesRawData = ha1;
    hold all

    xmin = X{1}(1);
    xmax = X{1}(1);
    for i = 1 : length(Y)

        cx = Xo{i}(1);
        cy = Yo{i};
        ct = T{i};
        for j = 1 : length(cy)
            plot(cx + plotdisp*(rand-0.5),cy(j),datasetopt,...
                'DisplayName',ct{j});
            xmin = min([xmin cx]);
            xmax = max([xmax cx]);
        end
        
        cx = X{i}(1);
        cy = Y{i}(outlier{i});
        ct = T{i}(outlier{i});
        for j = 1 : length(cy)
            plot(cx + plotdisp*(rand-0.5),cy(j),outliersetopt, ...
                'DisplayName',ct{j});
        end

    end

    set(gca,'XLim',[xmin-1 xmax+1]);
    ylim = get(gca,'YLim');
    set(gca,'YLim',[0,ylim(2)]);
    ylabel(labely);
    [sortedxpos isort] = sort(xpos); 
    set(gca,'XTick',sortedxpos, ...
        'XTickLabel',textleg(isort))    
    
    if addtorawdatamean
                
        errorbar(ha1,xpos+2*plotdisp,ymean,yerr,...
            'LineStyle','none',...
            'Color','b',...
            'LineWidth',1,...
            'MarkerFaceColor','w',...
            'MarkerEdgeColor','b',...
            'MarkerSize',10,...
            'Marker','o');
        ht = title( {'Raw data set' , ...
            '\color[rgb]{1 0 0} Mean +/- StdErr'} );
        
    else

        title('Raw data set')
        
    end
    
end

%% Plot histogram

if doplothistogram


    N = NaN(nbins,length(X));
    bins = linspace(min(dy),max(dy),nbins);

    spi = spi + 1;
    subplot(subplotind(spi))
    ha2 = cla;
    S.Plot.AxesHistogram = ha2;
    hold all
    cm = cmap(length(X));
    histleg = {};
    for i = 1 : length(X)
        [N(:,i) B] = hist(Y{i},bins);
        % recenter bars
        B = B + (i-1) * (B(2)-B(1)) * wfactor/2;
        hb = barh(B,N(:,i),wfactor,'FaceColor',cm(i,:));
        % recolor patches
        hpatch = get(hb,'Children');
        set(hpatch,...
            'FaceAlpha',histalpha, ...
            'EdgeColor','k');
        histleg{i} =  textleg{i};
    end

    S.Histogram = N;

    legend(textleg)
    xlabel('Count')
    ylabel(labely);
    title({'Histograms'});
    if doplotrawdata
        set(gca,'YLim',get(ha1,'Ylim'));
    end
    legend(histleg)
    
end


%% Plot boxes

if doplotbox
    
    spi =spi + 1;
    subplot(subplotind(spi))
    ha3 = cla;
    S.Plot.AxesBox = ha3;
    hold all
    HB = boxplot(dy,dx,'Position',xpos(boxpos),...
        'Whisker',whis,...
        'Symbol','ro');
    box off
    xl = get(gca,'XLim');
    yl = get(gca,'YLim');
    set(gca,'YLim',[0 yl(2)]);
    set(gca,'XTick',xpos(boxpos), ...
        'XTickLabel',textleg(boxpos))
    ylabel(labely);

    title({ 'Data repartition'});
    if printstathelp
        text( xl(1) + 0.2*xl(2), 0.9 * yl(2), ...
            {' - box: lower quartile, median, upper quartile', ...
            [' - whiskers: data within ',num2str(whis,strn),' times interquartile range'],...
            ' - crosses: outliers'});
    end

    % Plot student test
    yl = get(gca,'YLim');
    yrange = yl(2) - yl(1);
    spacefactor = length(X);

    for i = 1 : length(X)

        for j = i+1 : length(X)

            hup1 = HB(1,i);
            hup2 = HB(1,j);
            y1 = get(hup1,'YData');
            y2 = get(hup2,'YData');

            ybar1 = max( [ y1(2) y2(2) ] ) + (spacefactor*(i-1)+(2*j)+1) * spacer * yrange;
            ybar2 = max( [ y1(2) y2(2) ] ) + (spacefactor*(i-1)+(2*j)+2) * spacer * yrange;
            xbar1 = xpos(i);
            xbar2 = xpos(j);            
            
            [h{i,j},p{i,j},ci{i,j}] = ttest2(Yo{i},Yo{j},[],[],'unequal');

            if doplotstudenttest

                if h{i,j} == 1
                    barcol = 'k';
                else
                    barcol = 'r';
                end
                xd = [xbar1 xbar1 xbar2 xbar2];
                yd = [ybar1 ybar2 ybar2 ybar1];
                line(xd,yd,'Color',barcol);

                xtext = (xbar1 + xbar2 ) / 2;
                ytext = ybar2 + spacer/1.5 * yrange;
                ptext = ['P = ',sprintf(strn,p{i,j})];
                text(xtext,ytext,ptext,'HorizontalAlignment','center');                
                
            end

        end
    end

    if doplotstudenttest
        ht = get(gca,'Title');
        set(ht,'String',[get(ht,'String'),' & Two-sample t-test']);
    end
    
    S.TTest.h = h;
    S.TTest.p = p;
    S.TTest.ci = ci;
    
    set(gca,'YLimMode','auto');
    ylim = get(gca,'YLim');
    set(gca,'Ylim',[0,ylim(2)]);
    
    if doplotrawdata
        set(ha1,'YLim',get(ha3,'YLim'));
    end
    if doplothistogram
        set(ha2,'YLim',get(ha3,'YLim'));
    end

end




%% Fit

if doplotmeanstderr

    spi =spi + 1;
    subplot(subplotind(spi))
    ha4 = cla;
    S.Plot.AxesMeanStdErr = ha4;
    hold all
    he = errorbar(xpos,ymean,yerr,'k');
    set(he,datasetopt);
    
    if dofit
        
        plot(rfit)

        xl = get(gca,'XLim');
        yl = get(gca,'YLim');
        for i = 1 : length(param)
            fitinfo{i} = [param{i},' = ',num2str(rc(i),strn),...
                ' (',num2str(rconf(1,i),strn),...
                ',' num2str(rconf(2,i),strn),')'];
        end
        text(0.4*xl(2),0.9*yl(2), fitinfo);
        title({['Fit by ',funcstr],'(outliers excluded)'});
        legend( ...
            {'mean +/- stderr', ...
            ['fit by ',funcstr] });
    else
        legend( ...
            {'mean +/- stderr'});
    end
    
    if doplotrawdata
        set(gca,'YLim',get(ha1,'YLim'));
    end
    ylabel(labely)

end


%% Prepare outputs

if nargout > 0 
    varargout{1} = S;
end



%% Subfunction

    function out = struct2varargin(in)
        % Convert a struct in a cell of char in the Matlab 'field/value'
        % way, suitable to be a varargin cell

        if ~isstruct(in)
            errid = 'MATLAB:struct2varargin:BadInputType';
            errmsg = ['Input argument must be a struct, got a ' ,class(in),'.'];
            error(errid,errmsg);
        end

        names = fieldnames(in);
        values = struct2cell(in);
        n = length(names);
        out = cell(n,1);

        for i = 1 : n
            out(2*i-1) = names(i);
            out(2*i) = values(i);
        end

Contact us at files@mathworks.com