No BSD License  

Highlights from
DistributionFit

image thumbnail
from DistributionFit by J. David Cogdell
Fits normal, lognormal, and weibull distributions to data.

distributionfit(data,distribution,nbins)
function [pdffit,offset,A,B,resnorm,h] = distributionfit(data,distribution,nbins)
%function [pdffit,offset,A,B,resnorm,h] = distributionfit(data,distribution,nbins)
%PURPOSE                                                             jdc  rev. 06-jun-05
%   Fit one of three probability distributions (normal, lognormal, weibull)
%   to input data vector. If the distribution is specified as 'best' the dis-
%   tribution that best fits the data is selected automatically.
%INPUT
%   If nargin==1, "distribution" is prompted for and entered interactively
%
%   data         - n x 1 or 1 x n  input data vector 
%   distribution - probability distribution to fit to "data". Can
%                  be 'normal', 'lognormal', 'weibull', or 'best' ... default: 'best'
%   nbins        - number of bar-chart bins ......................... default: sqrt(length(data))
%OUTPUT
%   pdffit       - fitted probability density function - n x 2 matrix with column 1 the
%                  x-values, column 2 the y values
%   offset       - amount by which the data was offset for lognormal or weibull fits 
%                  (to satisfy the positive-definite requirements for these distributions).
%                  Note: this is roughly equivalent to fitting a 3- rather than a 2-parameter
%                  distribution.
%   A,B          - distribution parameters - mu and sigma for normal and lognormal distributions,
%                  scale and shape parameters for weibull distribution
%   h            - handles to the bar chart and probability density curve
%
%TYPICAL FUNCTION CALLS
%  distributionfit(randn(10000,1));
%  distributionfit(wblrnd(2,3,10000,1));
%  distributionfit(wblrnd(2,3,10000,1),'weibull');
%  distributionfit(lognrnd(1.5,.5,10000,1),'lognormal');
%  distributionfit(lognrnd(1.5,.5,10000,1),'best');
%  distributionfit(lognrnd(1.5,.5,10000,1),'lognormal');
%
%REFERENCE
%  Statistics Toolbox Version 3.0.2, function HISTFIT.M 

data = data(~isnan(data));
data = data(:);
ndata = length(data);
if nargin<3 | isempty(nbins), nbins = ceil(sqrt(ndata)); end
if nargin==1,
   distID = menu('Choose a Distribution','Normal','Lognormal','Weibull','best');
else
   if     strfind(lower(distribution),'lognormal'), distID = 2;
   elseif strfind(lower(distribution),'normal'   ), distID = 1;
   elseif strfind(lower(distribution),'weibull'  ), distID = 3;
   elseif strfind(lower(distribution),'best'     ), distID = 4;
   elseif isempty(distribution),                    distID = 4;
   end
end   
switch distID
   case 1, distribution = 'Normal';
   case 2, distribution = 'Lognormal';
   case 3, distribution = 'Weibull';
   case 4
      data = sort(data);
      cdfe = (1:ndata)'/ndata;                      % experimental cdf
      phat = mle(data,'distribution','Normal');
      A = phat(1);   % for normal & lognormal, phat = [mu std], for weibull, = [A B]
      B = phat(2);
      cdft = cdf('Normal',data,A,B);             % best-fit cdf
      residuals = cdfe-cdft;
      resnormNormal = residuals'*residuals;
      %-------------------------------------      
      offset = -min(data)+10*eps;
      offsetdata = data+offset; % zero-shift data so smallest value is 0+
      phat = mle(offsetdata,'distribution','Lognormal');
      A = phat(1);   % for normal & lognormal, phat = [mu std], for weibull, = [A B]
      B = phat(2);
      cdft = cdf('Lognormal',offsetdata,A,B);             % best-fit cdf
      residuals = cdfe-cdft;
      resnormLognormal = residuals'*residuals;
      %-------------------------------------      
      phat = mle(offsetdata,'distribution','Weibull');
      A = phat(1);   % for normal & lognormal, phat = [mu std], for weibull, = [A B]
      B = phat(2);
      cdft = cdf('Weibull',offsetdata,A,B);             % best-fit cdf
      residuals = cdfe-cdft;
      resnormWeibull = residuals'*residuals;
      %-------------------------------------    
      resnorms = [resnormNormal resnormLognormal resnormWeibull];
      distID = find(resnorms==min(resnorms));
      switch distID
         case 1, distribution = 'Normal';
         case 2, distribution = 'Lognormal';
         case 3, distribution = 'Weibull';
      end      
end 
if distID==2 | distID==3, 
   offset = -min(data)+10*eps;
   data = data+offset; % zero-shift data for lognormal and weibull fits so smallest value is 0+
elseif distID==1,
   offset = 0;   
end   
%----------------------------------------------------------------------
figure
[n,xbin]=hist(data,nbins);
hh = bar(xbin,n,1); % get number of counts per bin and bin width
xd = get(hh,'Xdata'); % retrieve the x-coordinates of the bins.
rangex = max(xd(:)) - min(xd(:)); % find the bin range
binwidth = rangex/nbins;    % find the width of each bin.
close(gcf);   % close figure (will replot on probability scale)
%----------------------------------------------------------------------
ch = get(0,'children');
if isempty(ch),
   figure(1);
else   
   figure(max(ch)+1);
end
set(0,'Units','inches');
ss = get(0,'ScreenSize');
set(0,'Units','pixels');
width = 3;
height = 3;
edge = 0.25;
set(gcf,'Units','inches','Position',[ss(3)-width-edge ss(4)-height-edge width height],...
        'Color',[.8 .8 .8],'InvertHardCopy','off','PaperPosition',[1 1 width height]);
nscaled = n/(ndata*binwidth);   % convert bin counts to probabilities
hh = bar(xbin,nscaled,1);       % draw the probability-scaled bars
set(hh,'EdgeColor',[.6 .6 .6],'FaceColor',[.9 .9 .9]);
set(gca,'FontSize',8);
xlabel('Data');
ylabel('Probability Density');
%----------------------------------------------------------------------
phat = mle(data,'distribution',distribution); % probability distribution parameter estimation
A = phat(1);   % for normal & lognormal, phat = [mu std], for weibull, = [A B]
B = phat(2);
switch distID  % get limits for plotting the best-fit pdf curve
   case 1,
      lolim = norminv(0.0001,A,B);
      hilim = norminv(0.9999,A,B); 
   case 2,
      lolim = logninv(0.0001,A,B);
      hilim = logninv(0.9999,A,B);
   case 3,
      lolim = wblinv(0.0001,A,B);
      hilim = wblinv(0.9999,A,B);
end      
xpdf = (lolim:(hilim-lolim)/100:hilim); % construct the x-vector for the pdf curve
ypdf = pdf(distribution,xpdf,A,B);    
hh1 = line(xpdf,ypdf,'Color','r','LineWidth',2); % overplot the histogram with the best-fit pdf curve
pdffit = [xpdf(:) ypdf(:)];
%----------------------------------------------------------------------
data = sort(data);                           % compute resnorm
cdfe = (1:ndata)'/ndata;                      % experimental cdf
cdft = cdf(distribution,data,A,B);             % best-fit cdf
residuals = cdfe-cdft;
resnorm = residuals'*residuals;
%----------------------------------------------------------------------
xlim = get(gca,'Xlim');
ylim = get(gca,'Ylim');
trd = text(xlim(2),ylim(2),distribution,'FontSize',8,'HorizontalAlignment','right','VerticalAlignment','bottom'); 
ext = extent(trd);
tld = text(ext(1),ylim(2),'Distribution: ',       'FontSize',8,'HorizontalAlignment','right','VerticalAlignment','bottom');

trr = text(xlim(2),ylim(2)-ext(4),sprintf('%8.1e ',resnorm),'FontSize',8,'HorizontalAlignment','right','VerticalAlignment','middle');
ext = extent(trr);
tll = text(ext(1),ylim(2)-ext(4),'Resnorm: ','FontSize',8,'HorizontalAlignment','right','VerticalAlignment','middle');

tt =    text(xlim(2),ylim(2)-ext(4),sprintf('%4.2f   ',A),'FontSize',8,'HorizontalAlignment','right','Visible','off');
ext = extent(tt);

i = 1;
switch distID
   case {1,2} 
      if distID==2,
         tl(i) = text(ext(1),ylim(2)-(i+1)*ext(4),'Zero Shift: ');
         tr(i) = text(ext(1),ylim(2)-(i+1)*ext(4),sprintf('%+5.2f',offset));
         i = i+1;
      end   
      tl(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),'Sigma: ');
      tr(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),sprintf('%4.2f',B));
      i = i+1;
      tl(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),'Mu: ');
      tr(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),sprintf('%4.2f',A));
   case 3   
      tl(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),'Zero Shift: ');
      tr(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),sprintf('%+5.2f',offset));
      i = i+1;
      tl(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),'scale: ');
      tr(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),sprintf('%4.2f',A));
      i = i+1;
      tl(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),'shape: ');
      tr(i) =    text(ext(1),ylim(2)-(i+1)*ext(4),sprintf('%4.2f',B));
end
set(tl,'Color','k','FontSize',8,'HorizontalAlignment','right','VerticalAlignment','middle'); 
set(tr,'Color','k','FontSize',8,'HorizontalAlignment','left', 'VerticalAlignment','middle'); 
%----------------------------------------------------------------------
h = [hh; hh1];

Contact us at files@mathworks.com