Code covered by the BSD License  

Highlights from
fitmethis

fitmethis

by

 

04 Feb 2013 (Updated )

FITMETHIS finds best-fitting distribution to data vector

fitmethis(data,varargin)
function BF= fitmethis(data,varargin)
% FITMETHIS finds best-fitting distribution
%	F= fitmethis(X) finds the distibution that best fits data in vector X
%	among all distributions available in MATLAB's function MLE. Either 
%	continuous or discrete distributions are used based on user input 
%	or the type of data supplied (see below)
%
%	The function returns a structure array with fields: 
%	name: name of the distribution (see HELP MLE for a list)
%	par:  vector of parameter estimates (1, 2 or 3 values)
%	ci:   matrix of confidence limits, one column per parameter
%	LL:   Log-Likelihood of the data
%	aic:  Akaike Information Criterion
%
%	Optional arguments can be specified as name/value pairs. Argument 
%	names are case insensitive and partial matches are allowed.
%
%	Name			Value
%	'dtype'		character string that specifies if the data are continuous 
% 				('cont') or discrete ('disc'). If missing, the function 
% 				decides the data are discrete if all values of X are 
% 				natural numbers.
%
%	'ntrials'	Specifies number of trials for the binomial distribution. 
% 				NTRIALS must be either a scalar or a vector of the same 
% 				size as X. If missing the binomial is not fitted.
%
%	'figure'	Either 'on' (default), or 'off'. If 'on' a plot of the 
% 				data and the fitting is produced (scaled to match the data).
%				Requires aditional function 'plotfitdist'.
%
%	'alpha'		A value between 0 and 1 specifying a confidence level
%				for CI of 100*(1-alpha) (default is 0.05).
%
%	'delta'		Range of Log-Likelihoods to select 'good' fits (default is 2)
% 
%	If X contains negative values, only the Normal distribution is fitted.
%	Also, if X contains values > 1 the Beta distribution is not fitted.
%
%	The function can be easily modified to choose 'good' fits based
%	on AIC instead of LogLikelihood (see line 188) if that's what
%	you want.
%
%	Requires Statistics Toolbox
%
%	Example:
%	x= gamrnd(5,0.5,1000,1);
%	F= fitmethis(x);
%
%   Name      Par1       Par2        Par3         LogL
%  gamma   4.947e+00   5.034e-01               -1.461e+03
%    gev  -1.126e-02   8.965e-01   1.982e+00   -1.463e+03


warning off


% Defaults & Storage
dtype= {};
ntrials= [];
fig= 'on';
alpha= 0.05;
delta= 2;
F= struct('name',{},'par',[],'ci',[],'LL',[],'aic',[]);


% Arguments
for j= 1:2:length(varargin)
	string= lower(varargin{j});
	switch string(1:min(3,length(string)))
		case 'dty'
			dtype= varargin{j+1};
		case 'ntr'
			ntrials= varargin{j+1};
		case 'fig'
			fig= varargin{j+1};
		case 'alp'
			alpha= varargin{j+1};
		case 'del'
			delta= varargin{j+1};
		otherwise
			error('Unknown argument name');
	end
end



% Distributions
Cdist= {'normal'; 'lognormal'; 'exponential'; 'gamma'; 'logistic';...
			'loglogistic'; 'uniform'; 'weibull'; 'ev'; ...
			'gev'; 'gp'; 'inversegaussian'; 'birnbaumsaunders';...
			'beta'; 'nakagami'; 'rayleigh';'rician'};

Ddist= {'binomial'; 'nbin'; 'unid';'geometric';'poisson'};



% Try determine data type: Discrete or Continuous (avoid 0)
if isempty(dtype)
	if isempty(find(1- (data+1)./(fix(data)+1), 1)) 
		dtype= 'disc';
	else
		dtype= 'cont';
	end
end



% Fit all... 
switch dtype(1:4)

	% Continuous
	case 'cont'
	for j= 1:numel(Cdist)
		% If negative values, only fit normal
		if min(data) < 0
			[phat,pci]= mle(data,'distribution','normal','alpha',alpha);
			F(j).name= Cdist{j};
			F(j).par= phat;
			F(j).ci=  pci;
			F(j).LL=  sum(log(pdf('normal',data,F(j).par(1),F(j).par(2))));
			F(j).aic= 2*2- 2*F(j).LL;
			break

		% Check if values > 1 for Beta
		elseif max(data) > 1 && strcmp('beta',Cdist{j})
			F(j).name= 'beta';
			F(j).par= [];
			F(j).ci=  [];
			F(j).LL=  -Inf;
			F(j).aic= Inf;

		% Any other case ...
		else
			[phat,pci]= mle(data,'distribution',Cdist{j},'alpha',alpha);
			F(j).name= Cdist{j};
			F(j).par= phat;
			F(j).ci=  pci;
			if numel(F(j).par) == 1
				F(j).LL=  sum(log(pdf(F(j).name,data,F(j).par(1))));
			elseif numel(F(j).par) == 2
				F(j).LL=  sum(log(pdf(F(j).name,data,F(j).par(1),F(j).par(2))));
			else
				F(j).LL=  sum(log(pdf(F(j).name,data,F(j).par(1),F(j).par(2),F(j).par(3))));
			end
			F(j).aic= 2*numel(F(j).par)- 2*F(j).LL;
		end
	end


	% Discrete
	case 'disc'
	for j= 1:numel(Ddist)

		% Binomial needs number of trials
		if strcmp('binomial',Ddist{j}) 
			F(j).name= 'binomial';
			if isempty(ntrials) || (numel(ntrials) > 1 && numel(data) ~= numel(ntrials))
				F(j).par= [];
				F(j).ci=  [];
				F(j).LL= -Inf;
				F(j).aic= Inf;
			else
				[phat,pci]= mle(data,'ntrials',ntrials,'distribution','binomial','alpha',alpha);
				F(j).par= phat;
				F(j).ci=  pci;
				F(j).LL=  sum(log(pdf('bino',data,ntrials,F(j).par(1))));
			end
		else
			[phat,pci]= mle(data,'distribution',Ddist{j},'alpha',alpha);
			F(j).name= Ddist{j};
			F(j).par= phat;
			F(j).ci=  pci;
			if numel(F(j).par) == 1
				F(j).LL=  sum(log(pdf(F(j).name,data,F(j).par(1))));
			elseif numel(F(j).par) == 2
				F(j).LL=  sum(log(pdf(F(j).name,data,F(j).par(1),F(j).par(2))));
			else
				F(j).LL=  sum(log(pdf(F(j).name,data,F(j).par(1),F(j).par(2),F(j).par(3))));
			end
			F(j).aic= 2*numel(F(j).par)- 2*F(j).LL;
		end
	end

end


% Choose best ones (DeltaLL <= 2)
% Alternatively we can use F.aic (change good & index)
good= abs( max([F.LL])-[F.LL] ) <= delta;
BF= F(good);
index= sortrows([(1:size(BF,2))',[BF.LL]'],-2);
BF= BF(index(:,1));



% Nice output
fprintf('\n\t\t\t\tName\t\tPar1\t\tPar2\t\tPar3\t\tLogL\n')
for j= 1:size(BF,2)
	switch numel(BF(j).par)
		case 1
			fprintf('%20s \t%10.3e \t\t\t\t\t\t\t%.3e\n',BF(j).name,BF(j).par,BF(j).LL)
		case 2
			fprintf('%20s \t%10.3e \t%10.3e \t\t\t\t%10.3e\n',BF(j).name,BF(j).par,BF(j).LL)
		case 3
			fprintf('%20s \t%10.3e \t%10.3e \t%10.3e \t%10.3e\n',BF(j).name,BF(j).par,BF(j).LL)
	end
end



% Plot data histogram & best fit
if strcmp('on',fig)
	if strcmp('binomial',BF(1).name)
		plotfitdist(data,BF(1).name,BF(1).par,dtype,'ntrials',ntrials(1)); 
	else
		plotfitdist(data,BF(1).name,BF(1).par,dtype);
	end
end






Contact us