function BF= fitmethis(data,varargin)
% FITMETHIS finds best-fitting distribution
% F= fitmethis(X) finds the distibution that best fits data in vector X
% among all distributions available in MATLAB's function MLE. Either
% continuous or discrete distributions are used based on user input
% or the type of data supplied (see below)
%
% The function returns a structure array with fields:
% name: name of the distribution (see HELP MLE for a list)
% par: vector of parameter estimates (1, 2 or 3 values)
% ci: matrix of confidence limits, one column per parameter
% LL: Log-Likelihood of the data
% aic: Akaike Information Criterion
%
% Optional arguments can be specified as name/value pairs. Argument
% names are case insensitive and partial matches are allowed.
%
% Name Value
% 'dtype' character string that specifies if the data are continuous
% ('cont') or discrete ('disc'). If missing, the function
% decides the data are discrete if all values of X are
% natural numbers.
%
% 'ntrials' Specifies number of trials for the binomial distribution.
% NTRIALS must be either a scalar or a vector of the same
% size as X. If missing the binomial is not fitted.
%
% 'figure' Either 'on' (default), or 'off'. If 'on' a plot of the
% data and the fitting is produced (scaled to match the data).
% Requires aditional function 'plotfitdist'.
%
% 'alpha' A value between 0 and 1 specifying a confidence level
% for CI of 100*(1-alpha) (default is 0.05).
%
% 'delta' Range of Log-Likelihoods to select 'good' fits (default is 2)
%
% If X contains negative values, only the Normal distribution is fitted.
% Also, if X contains values > 1 the Beta distribution is not fitted.
%
% The function can be easily modified to choose 'good' fits based
% on AIC instead of LogLikelihood (see line 188) if that's what
% you want.
%
% Requires Statistics Toolbox
%
% Example:
% x= gamrnd(5,0.5,1000,1);
% F= fitmethis(x);
%
% Name Par1 Par2 Par3 LogL
% gamma 4.947e+00 5.034e-01 -1.461e+03
% gev -1.126e-02 8.965e-01 1.982e+00 -1.463e+03
warning off
% Defaults & Storage
dtype= {};
ntrials= [];
fig= 'on';
alpha= 0.05;
delta= 2;
F= struct('name',{},'par',[],'ci',[],'LL',[],'aic',[]);
% Arguments
for j= 1:2:length(varargin)
string= lower(varargin{j});
switch string(1:min(3,length(string)))
case 'dty'
dtype= varargin{j+1};
case 'ntr'
ntrials= varargin{j+1};
case 'fig'
fig= varargin{j+1};
case 'alp'
alpha= varargin{j+1};
case 'del'
delta= varargin{j+1};
otherwise
error('Unknown argument name');
end
end
% Distributions
Cdist= {'normal'; 'lognormal'; 'exponential'; 'gamma'; 'logistic';...
'loglogistic'; 'uniform'; 'weibull'; 'ev'; ...
'gev'; 'gp'; 'inversegaussian'; 'birnbaumsaunders';...
'beta'; 'nakagami'; 'rayleigh';'rician'};
Ddist= {'binomial'; 'nbin'; 'unid';'geometric';'poisson'};
% Try determine data type: Discrete or Continuous (avoid 0)
if isempty(dtype)
if isempty(find(1- (data+1)./(fix(data)+1), 1))
dtype= 'disc';
else
dtype= 'cont';
end
end
% Fit all...
switch dtype(1:4)
% Continuous
case 'cont'
for j= 1:numel(Cdist)
% If negative values, only fit normal
if min(data) < 0
[phat,pci]= mle(data,'distribution','normal','alpha',alpha);
F(j).name= Cdist{j};
F(j).par= phat;
F(j).ci= pci;
F(j).LL= sum(log(pdf('normal',data,F(j).par(1),F(j).par(2))));
F(j).aic= 2*2- 2*F(j).LL;
break
% Check if values > 1 for Beta
elseif max(data) > 1 && strcmp('beta',Cdist{j})
F(j).name= 'beta';
F(j).par= [];
F(j).ci= [];
F(j).LL= -Inf;
F(j).aic= Inf;
% Any other case ...
else
[phat,pci]= mle(data,'distribution',Cdist{j},'alpha',alpha);
F(j).name= Cdist{j};
F(j).par= phat;
F(j).ci= pci;
if numel(F(j).par) == 1
F(j).LL= sum(log(pdf(F(j).name,data,F(j).par(1))));
elseif numel(F(j).par) == 2
F(j).LL= sum(log(pdf(F(j).name,data,F(j).par(1),F(j).par(2))));
else
F(j).LL= sum(log(pdf(F(j).name,data,F(j).par(1),F(j).par(2),F(j).par(3))));
end
F(j).aic= 2*numel(F(j).par)- 2*F(j).LL;
end
end
% Discrete
case 'disc'
for j= 1:numel(Ddist)
% Binomial needs number of trials
if strcmp('binomial',Ddist{j})
F(j).name= 'binomial';
if isempty(ntrials) || (numel(ntrials) > 1 && numel(data) ~= numel(ntrials))
F(j).par= [];
F(j).ci= [];
F(j).LL= -Inf;
F(j).aic= Inf;
else
[phat,pci]= mle(data,'ntrials',ntrials,'distribution','binomial','alpha',alpha);
F(j).par= phat;
F(j).ci= pci;
F(j).LL= sum(log(pdf('bino',data,ntrials,F(j).par(1))));
end
else
[phat,pci]= mle(data,'distribution',Ddist{j},'alpha',alpha);
F(j).name= Ddist{j};
F(j).par= phat;
F(j).ci= pci;
if numel(F(j).par) == 1
F(j).LL= sum(log(pdf(F(j).name,data,F(j).par(1))));
elseif numel(F(j).par) == 2
F(j).LL= sum(log(pdf(F(j).name,data,F(j).par(1),F(j).par(2))));
else
F(j).LL= sum(log(pdf(F(j).name,data,F(j).par(1),F(j).par(2),F(j).par(3))));
end
F(j).aic= 2*numel(F(j).par)- 2*F(j).LL;
end
end
end
% Choose best ones (DeltaLL <= 2)
% Alternatively we can use F.aic (change good & index)
good= abs( max([F.LL])-[F.LL] ) <= delta;
BF= F(good);
index= sortrows([(1:size(BF,2))',[BF.LL]'],-2);
BF= BF(index(:,1));
% Nice output
fprintf('\n\t\t\t\tName\t\tPar1\t\tPar2\t\tPar3\t\tLogL\n')
for j= 1:size(BF,2)
switch numel(BF(j).par)
case 1
fprintf('%20s \t%10.3e \t\t\t\t\t\t\t%.3e\n',BF(j).name,BF(j).par,BF(j).LL)
case 2
fprintf('%20s \t%10.3e \t%10.3e \t\t\t\t%10.3e\n',BF(j).name,BF(j).par,BF(j).LL)
case 3
fprintf('%20s \t%10.3e \t%10.3e \t%10.3e \t%10.3e\n',BF(j).name,BF(j).par,BF(j).LL)
end
end
% Plot data histogram & best fit
if strcmp('on',fig)
if strcmp('binomial',BF(1).name)
plotfitdist(data,BF(1).name,BF(1).par,dtype,'ntrials',ntrials(1));
else
plotfitdist(data,BF(1).name,BF(1).par,dtype);
end
end