Code covered by the BSD License

# Violin Plots for plotting multiple distributions (distributionPlot.m)

by

### Jonas (view profile)

13 Apr 2009 (Updated )

Function for plotting multiple histograms side-by-side in 2D - better than boxplot.

### Editor's Notes:

This file was selected as MATLAB Central Pick of the Week

histogram(varargin)
```function [N,X,sp] = histogram(varargin)
% HISTOGRAM generates a histogram using the "optimal" number of bins
%
% If called with no output argument, histogram plots into the current axes
%
% SYNOPSIS [N,X,sp] = histogram(data,factor,normalize)
%          [...] = histogram(data,'smooth')
%          [...] = histogram(axesHandle,...)
%
% INPUT    data: vector of input data
%          factor: (opt) factor by which the bin-widths are multiplied
%                   if 'smooth' (or 's'), a smooth histogram will be formed.
%                   (requires the spline toolbox). For an alternative
%                   approach to a smooth histogram, see ksdensity.m
%                   if 'discrete' (or 'd'), the data is assumed to be a discrete
%                   collection of values. Note that if every data point is,
%                   on average, repeated at least 3 times, histogram will
%                   consider it a discrete distribution automatically.
%                   if 'continuous' (or 'c'), histogram is not automatically
%                   checking for discreteness.
%          normalize : if 1 (default), integral of histogram equals number
%                       data points. If 0, height of bins equals counts.
%                       This option is exclusive to non-"smooth" histograms
%          axesHandle: (opt) if given, histogram will be plotted into these
%                       axes, even if output arguments are requested
%
% OUTPUT   N   : number of points per bin (value of spline)
%          X   : center position of bins (sorted input data)
%          sp  : definition of the smooth spline
%
% REMARKS: The smooth histogram is formed by calculating the cumulative
%           histogram, fitting it with a smoothening spline and then taking
%           the analytical derivative. If the number of data points is
%           markedly above 1000, the spline is fitting the curve too
%           locally, so that the derivative can have huge peaks. Therefore,
%           only 1000-1999 points are used for estimation.
%           Note that the integral of the spline is almost exactly the
%           total number of data points. For a standard histogram, the sum
%           of the hights of the bins (but not their integral) equals the
%           total number of data points. Therefore, the counts might seem
%           off.
%
%           WARNING: If there are multiples of the minimum value, the
%           smooth histogram might get very steep at the beginning and
%           produce an unwanted peak. In such a case, remove the
%           multiple small values first (for example, using isApproxEqual)
%
%
% c: 2/05 jonas
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% test input
if nargin < 1
error('not enough input arguments for histogram')
end

% check for axes handle
if length(varargin{1}) == 1 && ishandle(varargin{1});
axesHandle = varargin{1};
varargin(1) = [];
else
% ensure compatibility to when axesHandle was given as last input
if nargin == 3 && ishandle(varargin{end}) && varargin{end} ~= 0
axesHandle = varargin{end};
varargin(end) = [];
else
axesHandle = 0;
end
end

% assign data
numArgIn = length(varargin);
data = varargin{1};
data = data(:);

% check for non-finite data points
data(~isfinite(data)) = [];

% check for "factor"
if numArgIn < 2 || isempty(varargin{2})
factor = 1;
else
factor = varargin{2};
end
if ischar(factor)
switch factor
case {'smooth','s'}
factor = -1;
case {'discrete','d'}
factor = -2;
case {'continuous','c'}
factor = -3;
otherwise
error('The only string inputs permitted for histogram.m are ''smooth'',''discrete'', or ''continuous''')
end
else
% check for normalize, but do so only if there is no "smooth". Note
% that numArgIn is not necessarily equal to nargin
if numArgIn < 3 || isempty(varargin{3})
normalize = true;
else
normalize = varargin{3};
end
end

% doPlot is set to 1 for now. We change it to 0 below if necessary.
doPlot = 1;

nData = length(data);
% check whether we do a standard or a smooth histogram
if factor ~= -1
% check for discrete distribution
[xx,nn] = countEntries(data);
% consider the distribution discrete if there are, on average, 3
% entries per bin
nBins = length(xx);
if factor == -2 || (factor ~= -3 && nBins*3 < nData)
% discrete distribution.
nn = nn';
xx = xx';
else
% not a discrete distribution
if nData < 20
warning('HISTOGRAM:notEnoughDataPoints','Less than 20 data points!')
nBins = ceil(nData/4);
else

% create bins with the optimal bin width
% W = 2*(IQD)*N^(-1/3)
interQuartileDist = diff(prctile(data,[25,75]));
binLength = 2*interQuartileDist*length(data)^(-1/3)*factor;

% number of bins: divide data range by binLength
nBins = round((max(data)-min(data))/binLength);

if ~isfinite(nBins)
nBins = length(unique(data));
end

end

% histogram
[nn,xx] = hist(data,nBins);
% adjust the height of the histogram
if normalize
Z = trapz(xx,nn);
nn = nn * nData/Z;
end

end
if nargout > 0
N = nn;
X = xx;
doPlot = axesHandle;
end
if doPlot
if axesHandle
bar(axesHandle,xx,nn,1);
else
bar(xx,nn,1);
end
end

else
% make cdf, smooth with spline, then take the derivative of the spline

% cdf
xData = sort(data);
yData = 1:nData;

% when using too many data points, the spline fits very locally, and
% the derivatives can still be huge. Good results can be obtained with
% 500-1000 points. Use 1000 for now
step = max(floor(nData/1000),1);
xData2 = xData(1:step:end);
yData2 = yData(1:step:end);

% spline. Use strong smoothing
cdfSpline = csaps(xData2,yData2,1./(1+mean(diff(xData2))^3/0.0006));

% pdf is the derivative of the cdf
pdfSpline = fnder(cdfSpline);

% histogram
if nargout > 0
xDataU = unique(xData);
N = fnval(pdfSpline,xDataU);
X = xDataU;
% adjust the height of the histogram
Z = trapz(X,N);
N = N * nData/Z;
sp = pdfSpline;
% set doPlot. If there is an axesHandle, we will plot
doPlot = axesHandle;
end
% check if we have to plot. If we assigned an output, there will only
% be plotting if there is an axesHandle.
if doPlot
if axesHandle
plot(axesHandle,xData,fnval(pdfSpline,xData));
else
plot(xData,fnval(pdfSpline,xData));
end
end
end```