Code covered by the BSD License

genfit

12 Nov 2012 (Updated )

This is a data fitting program tailored for use in physics or other physical sciences.

fitresult=genfit(X,Y,fhandle,params,varargin)
function fitresult=genfit(X,Y,fhandle,params,varargin)
% genfit: Linear and non-linear data fitting. Provides uncertainties in the fit parameters.
%------------------------------------------------------------------------------------------------------------
% PROGRAM: genfit
% MODIFIED: 9/2014
%
% SYNTAX:  fitresult=genfit(X,Y,fhandle,params <,fittype,outputoptions,nonlincontrolstring>);
%                        <...> indicates optional parameters.
%
% This routine does a least squares fit to the data and provides the errors in the
% parameters. It accepts two matrixes, x (Nx1) and [y dy] (Nx2), and the handle of a function
% f([a1,...,aM],x) : Real->Real ,which may be nonlinear in the parameters a1,..aN.
% The routine then fits f([a1,...,aM],x) to the data y, by varying [a1,...aM]. Various
% options can be specified to control the fit type, output, tolerances etc.  The routine
% returns the best fit parameters [a1,...,aM], the uncertainty in the parameters, [da1,...,daM],
% the reduced Chi-squared around the solution, etc.  When the fit function f is linear in the
% parameters either a linear or a nonlinear fit may be chosen.  If the function is not linear in
% the parameters, chose a nonlinear fit to obtain meaningful results.  The linear method is
% deterministic but may encounter machine-precision problems.  (The routine attempts to warn the
% user when this is the case.)  Also, for large datasets (more than ~1000 data points) the
% linear method may be slow. In that case, use the nonlinear method. If the uncertainty in the
% data points is not provided (both X and Y variables have only a single column) then the uncertainty
% is taken as the residual around the fit. In that case, the reduced Chi-squared value will
% automatically be near unity, and does not provide a meaningful test of the model.
%
% NOTE:  If X uncertainties are supplied as the second column in the x data
% vector, the fit will be done twice. The first pass is done without using the x
% uncertainties in order to get an initial fit which is used to convert the x-uncertainties
% to y-uncertainties via the slope of the best fit function. Genfit is then it is called
% again and supplied with the full uncertainties in the y-points: namely, the original
% y-uncertainties added in quadrature with the calculated contribution from the x-uncertainties.
%
%
% INPUT ARGUMENTS:
%
% X         = Nx1 matrix.  Independent varaible data values in column 1. OR
%             Nx2 matrix.  Independent variable data values in column 1 and uncertainties in column 2.
% Y         = Nx1 matrix.  Dependent varaible data values in column 1. OR
%             Nx2 matrix.  Dependent variable data values in column 1 and uncertainties in column 2.
% fhandle   = Handle of a function to fit to, e.g. if we wish to fit to the function
%             straighline.m in the current matlab path, then fhandle should be @straightline.
%             The function definintion must have the form y=fun_name(a,x), where 'a' is a
%             vector of parameters with nparams elements and x is the the dependent variable
%             (a vector of any length).
% params    = Initial values for the parameters.  For linear fits, the values are not used but the
%             number of fit parameters is obtained from the length of this vector.  Thus, for
%             linear fits params=[1:nparams] is just as good as actual guesses for the initial values.
%             For the nonlinear fit however, good guesses for the initial parameters are crucial.
%             If the initial values are too far off the fit may find a local minimum that isn't
%             the best fit.
% fittype   = 'lin' for a linear fit, 'linreg' for a linear fit using the "regress" function from
%             the statistics toolbox (not to be used when uncertainties are specified), and 'nonlin'
%             for a nonlinear fit with or without uncertainties.
%             Default is 'lin'.
% outputoptions = Vector of flags. Set the desired flag(s) to activate the corresponding option(s).
%             Setting a flag to zero deactivates the corresponding option. Default is 2
%             Flag number   Option
%             1             Set to 1 to suppress plotting the data and resulting fit. Set to 2 to also
%                           plot the fits corresponding to the one-sigma error in the parameters.
%             2             Set to 2 to completely suppress calculation of uncertainty in
%                           the fit parameters. (Value 1 is not currently used.)
%             3             Set to 1 to produce no workspace output (run silently).
%             Default is outputoptions=[0 0 0].
% nonlincontrolstring= Either a string in the form 'options=optimset(...)' to be evaluated inside
%             the function to allow direct control of the nonlinear fit, or alternatively (and more simply)
%             the structure "options" generated by the command options=optimset(...).
%
% OUTPUT ARGUMENTS:
%
% The single output variable output is a structure with the following fields:
% fitresult.solution    = [a1,...,aM] giving the best fit.
% fitresult.uncertainty = [da1,...,daM] the 1-sigma uncertainty in the parameters about the best fit.
% fitresult.fitdomain   = The domain x over which the fit was actually performed (may be modified by NaNs in the data set, etc.)
% fitresult.fitlocus    = Locus of the solution on the domain x.
% fitresult.residual    = Locus of the residual over the domain x.
% fitresult.method      = Method used for fit ('linlsq','linreg','nonlinlsq')
% fitresult.rxs0        = The reduced Chi squared at the best fit values [a1,...,aM]
% fitresult.probrxs0 = The probability of obtaining rxs0 or greater for the reduced Chisqr
%                         if the errors supplied were truly random errors of the correct size.
% fitresult.xerrorsupplied  = "1" if x-error bars were supplied with the data, "0" otherwise.
% fitresult.yerrorsupplied  = "1" if y-error bars were supplied with the data, "0" otherwise.
% fitresult.yfromxerrors    = The y-error bar contributions that were generated from the
%                             x-error bars that were supplied. Note that to get the
%                             complete y-error, this must be added to the y-error supplied
%
% NOTE: When fitresult.probrxs0 << 1, the value (1-fitresult.probrxs0) is essentially the likelihood
% that the model is inconsistent with the data. But fitresult.probrxs0 is not meaningful unless error
% bars were supplied with the data (fitresult.errorsupplied=1). When error bars are not supplied,
% the uncertainties are assumed to be uniform and set equal to be the rms residual. In
% other words, rxs0 will be very close to unity and probrxs0 should be near 0.5.
%
% EXAMPLES:
%           X=[0.8; 2.0; 3.0; 3.8; 5.1; 6.1];
%           Y=[[4.3,1.5];[5.6,2.5];[12.3,1.0];[13.0,0.5];[18.7,1.7];[19.1,1.5]];
%           options=optimset('TolFun',1e-3);
%           result=genfit(X,Y,'straightline',[1 1],'nonlin',[],options);
%
%------------------------------------------------------------------------------------------------------------
% SYNTAX:  fitresult=genfit(X,Y,fhandle,params <,fittype,outputoptions,nonlincontrolstring>)
%------------------------------------------------------------------------------------------------------------

%% BEGIN parameter processing

if nargin>=5, fittype=varargin{1}; else fittype='lin'; end;
defaultoptions=[0,0,0];
if nargin>=6, outputoptions=varargin{2}; else outputoptions=defaultoptions; end;
if nargin>=7, nonlincontrolstring=varargin{3}; else nonlincontrolstring=''; end;
if length(outputoptions)<3
outputoptions=[outputoptions(1:end),defaultoptions(length(outputoptions)+1:length(defaultoptions))];
end
showfit=(outputoptions(1)>1 || outputoptions(1)==0);
showatend=false;                                                                % Will be set to true if we are in the first pass of an x->y error transformation
suppressErrorCalc=(outputoptions(2)==2);
silentrun=(outputoptions(3)==1);
nparams=length(params);
NextPlot_current=get(gca,'NextPlot');                                           % Get the original 'hold' state of the current figure

if size(X,1)<size(X,2)                                                          % If X data is provided as a row vector or 2xN matrix, transpose it.
X = X.';
disp('GENFIT: X vector was transposed to provide a column vector (or a pair of columns if X-errors were supplied).');
end;                                                                            % If Y data is provided as a row vector or 2xN matrix, transpose it.
if size(Y,1)<size(Y,2)
Y = Y.';
disp('GENFIT: Y vector was transposed to provide a column vector (or a pair of columns if Y-errors were supplied).');
end;

x=X(:,1);
yraw=Y(:,1);
unity=diag(ones(nparams,1));

notNaNs = imag(yraw)==0 & ~isnan(yraw) & ~isinf(yraw);                          % Whenever the y-value is a NaN, Inf or Complex remove the
numNaNs=sum(~notNaNs);                                                          % corresponding points from the data set
if numNaNs>=1
disp(['GENFIT: Ignoring NaN(s), Inf(s) and Complex values in Y(:,1). ',num2str(numNaNs),' such points removed from data set before fit attempt.']);
end
x=x(notNaNs);
yraw=yraw(notNaNs);

if size(Y,2)==2                                                                 % Check whether y error bars have been supplied
yerr=Y(:,2);
yerr=yerr(notNaNs);
yerrset=(1==1);                                                             % Set a flag if y error bars are supplied
else
yerrset=(0==1);
end;

if size(X,2)==2                                                                 % Check whether x error bars have been supplied
xerr=X(:,2);
xerr=xerr(notNaNs);
xerrset=(1==1);
else
xerrset=(0==1);
end;
if xerrset                                                                      % going to have to run again to make xerrors into yerrors, so this is just the initial pass. xerrset will be false on the second pass.
if silentrun
silentrunatend = true;
else
silentrunatend = false;                                                 % don't want to display the results from the preliminary fit (so silentrun set to true) but still want to display the results after both passes through genfit
end
silentrun=true;
if showfit
showatend = true;                                                       % still want to showm the fit at the end, but only after the second pass has been executed
showfit = false;
end
end
N=length(x);

if length(x)~=length(yraw), error('X and Y must be the same length!'); end;
if length(x) < nparams, error('Number of data points is too small to determine model parameters'); end;

%% ==BEGIN linear least squares fit ====================================================================
if strcmp(fittype,'lin')
f1=zeros(nparams,N);
if yerrset==0                                                               % If there are no error bars we'll get the fit and use the residual
yerr1=sqrt(mean((diff(yraw)).^2))*ones(size(yraw));                     % Get the initial fit using a fabricated uncertainty
y1=yraw./yerr1;
clear f1;
for l=1:nparams                                                         % Each row corresponds to a term in the fit function
for i=1:N                                                           % Each column corresponds to a value of x
f1(l,i)=feval(fhandle,unity(l,:),x(i))/yerr1(i);                % The matrix element is the value of the term in the
end                                                                 % fit function for a given x, where the parameter has been set to
end;                                                                    % unity, all normalized by the y measurement error.

beta1=f1*y1;                                                            % f*y is similar to y.^2 but with the contributions of each term in the fit function separated
alpha1=f1*f1';                                                          % nparmams*nparams size matrix, this shows the average size^2 of the contributions of each term
% inv(alpha1) is the error matrix
errmat1 = inv(alpha1);
unitytest = errmat1*alpha1;                                             %#ok<MINV>
unitytol=0.1;
if ~inrange(unitytest,...
diag(ones(1,nparams))-unitytol*ones(nparams,nparams),...
diag(ones(1,nparams))+unitytol*ones(nparams,nparams))
disp('GENFIT: Error matrix is close to being singular. Try using fit method ''nonlin'' instead of ''lin''.');
end
a1=alpha1\beta1;                                                        % Best fit values of the coefficients, same as (error matrix)*beta1
residual1=yraw-feval(fhandle,a1,x);                                     % data - fit.
yerr=sqrt(mean(residual1.^2))*ones(size(yraw));                         % Uncertainty taken as the rms residual
end

y=yraw./yerr;
f=zeros(nparams,N);
for l=1:nparams                                                             % Each row corresponds to a term in the fit function
for i=1:N                                                               % Each column corresponds to a value of x
f(l,i)=feval(fhandle,unity(l,:),x(i))/yerr(i);                      % The matrix element is the value of the term in the
end                                                                     % fit function for a given x, where the parameter has been set to
end;                                                                        % unity, all normalized by the y measurement error.

beta=f*y;                                                                   % f*y is similar to y.^2 but with the contributions from each term in the fit function separated
alpha=f*f';                                                                 % nparmams*nparams size matrix, this shows the average size^2 of the contributions of each term
% in some sense the inverse of the average of each term
errmat=alpha\diag(ones(size(alpha,1),1));
unitytest = inv(alpha)*alpha;                                               %#ok<MINV>
unitytol=0.1;
if ~inrange(unitytest,...
diag(ones(1,nparams))-unitytol*ones(nparams,nparams),...
diag(ones(1,nparams))+unitytol*ones(nparams,nparams))
disp('GENFIT: Error matrix is close to being singular. Try using fit method ''nonlin'' instead of ''lin''.');
end

a=errmat*beta;                                                              % Best fit values of the coefficients
aerr=1./sqrt(diag(alpha));                                                  % Uncertainty in the best fit value of the components

residual=yraw-feval(fhandle,a,x);                                           % data - fit.
chisqr=sum(residual.^2./yerr.^2 );                                          % chi-squared
rxs0=1/(N-nparams)*chisqr;                                                  % reduced chisquared at the solution
if ~silentrun
fprintf('%s\n','The fit parameters and the uncertainties in their fitted values are listed in the rows below:');
fprintf('%s\n',matrow);
end
end

if showfit
if yerrset
if length(yerr)<=300
errorbar(x,yraw,yerr,'b.','MarkerSize',3); hold on;
else
plot(x,yraw,'bo','MarkerSize',3); hold on;
if ~silentrun
disp(' ');
disp('Not showing error bars due to the large number of points');
disp(' ');
end
end
else
plot(x,yraw,'bo','MarkerSize',3); hold on;
end
plot(x,feval(fhandle,a',x),'g-','LineWidth',1);
hold off;
end

fitresult.solution=a';
fitresult.uncertainty=aerr';
fitresult.fitdomain=x;
fitresult.fitlocus=feval(fhandle,fitresult.solution,x);
fitresult.residual=residual;
fitresult.method='linlsq';
fitresult.rxs0=rxs0;
fitresult.probrxs0=gammainc(rxs0*(N-nparams)/2,(N-nparams)/2,'upper');      % See comment in corresponding line in the nonlinear fit section below
fitresult.xerrorsupplied = xerrset;                                         % 1 if the errors were provided
fitresult.yerrorsupplied = yerrset;

elseif strcmp(fittype,'nonlin')
%% == BEGIN nonlinear least squares fit ================================================================

y=yraw;
if ~silentrun
options=optimset('Display','Notify');                                   %,'TolX',1e-12,'TolFun',1e-12,'MaxFunEval',1000,'MaxIter',1000);
else
options=optimset('Display','Off');
end

if  ischar(nonlincontrolstring)
if ~isempty(nonlincontrolstring)
eval(nonlincontrolstring);
end
else
options=nonlincontrolstring;                                            % In this case "nonlincontrolstring" is not a string but an options structure created with "optimset"
end

if ~yerrset
afminsearch=fminsearch(@rmsfitresidual,params,options,fhandle,x,y);   	% fminsearch is more robust than lsqnonlin in that it finds the correct
if ~suppressErrorCalc
[a,~,residual,~,~,~,jacobian] =...                                  % lsqnonlin is then used to generate the jacobian at
lsqnonlin(@fitresidual,afminsearch,[],[],options,...           	% the solution which is needed for the uncertainty estimtes
fhandle,x,y);                                                   % The parameters sent to lsqnonlin are from the results of fminsearch
J=zeros(1,length(a)); da=zeros(1,length(a));
for r=1:length(a)
J(r)=sqrt(sum( (jacobian(:,r)).^2 ));                       	% Uncertainty is estimated from the lowest order expansion
da(r)=abs(rmss(residual)/J(r));                               	% of the residual about the solution: (Delta Res)^2=J(r)^2*(Delta a)^2
end
end
else
if ~suppressErrorCalc
afminsearch=fminsearch(@rmschisqr,params,options,fhandle,x,y,yerr);
[a,~,residual,~,~,~,jacobian] =...
lsqnonlin(@weightedresidual,afminsearch,[],[],options,...
fhandle,x,y,yerr);
J=zeros(1,length(a)); da=zeros(1,length(a));
for r=1:length(a)
J(r)=sqrt(sum( (jacobian(:,r)).^2 ));
da(r)=abs(rmss(residual)/J(r));
end
end
end

if ~silentrun                                                               % Print the best fit values of the parameters to the workspace
disp(['Parameters:    ',num2str(a)])
disp(['Uncertainties: ',num2str(da)])
end

if showfit                                                                  % Display the fit in a plot
if yerrset
if length(yerr)<=300
errorbar(x,yraw,yerr,'b.','MarkerSize',3); hold on;
else
plot(x,yraw,'bo','MarkerSize',3); hold on;
if ~silentrun
disp(' ');
disp('Not showing error bars due to the large number of points');
disp(' ');
end
end
else
plot(x,yraw,'bo','MarkerSize',3); hold on;
end
plot(x,feval(fhandle,a',x),'r-','LineWidth',1);
hold off;
end

if yerrset
rxs0=1/(N-nparams) * sum((y-feval(fhandle,a,x)).^2./yerr.^2);           % Reduced X-sqared at the solution
else
rxs0=1/(N-nparams) * sum((y-feval(fhandle,a,x)).^2)/rmss(residual).^2;
end

fitresult.solution=a;
fitresult.uncertainty=da;
fitresult.fitdomain = x;
fitresult.fitlocus=feval(fhandle,fitresult.solution,x);
fitresult.residual=-residual;                                               % fminsearch appears to define the residual as fit minus data rather than data minus fit.
fitresult.probrxs0=gammainc(rxs0*(N-nparams)/2,(N-nparams)/2,'upper');
fitresult.xerrorsupplied = xerrset;                                         % 1 if the relevant errors were provided
fitresult.yerrorsupplied = yerrset;
fitresult.method='nonlin';
fitresult.rxs0=rxs0;                                                        % fitresult.probrxs0 is the integral of the tail of the chi-squared distribution (see e.g. "Data Reduction and Error Analysis for the Physical Sciences" by
% P.R.Bevington and D.K.Robinson, Appendix C.4.). This is the probability of obtaining the reduced chi-squared value that was found or a greater value, if the
% uncertainties were given correctly and if the model (fit function) is correct.
else
error('GENFit:InvalidFitType',['GENFIT: Fit type specification string ''',fittype,''' not recognized.']);
end

%% == BEGIN x-errorbar handling ===========================================================================
if xerrset
if ~silentrunatend
disp('GENFIT: Using the x-uncertainties to generate y-uncertainties from the initial fit.');
end
precision = eps(class(Y));                                              	% Machine precision of the data variables (Y)
epsilon = abs(sort(diff(sort(x)))); epsilon = epsilon(1)/10;            	%  one tenth of the minimum domain distance;
if epsilon <= 5e3*precision,                                               	%  if zero or approx zero try the next smallest x-distance
epsilon = abs(sort(diff(sort(x)))); epsilon = epsilon(2)/10;
end
if epsilon <= 5e3*precision                                               	% if still very small, just use 5000*(precision of y variable class);
epsilon = 5e3*precision;
disp('GENFIT: At least some x-values are very closely spaced. This may limit the accuracy of the x->y uncertainty transformation.');
end
yyleft = feval(fhandle,fitresult.solution,x-epsilon);                     	%  Get the values of the fit slightly to the left of the data points
yy = feval(fhandle,fitresult.solution,x-epsilon);                         	%  values of the fit at the x data locations.
yyright = feval(fhandle,fitresult.solution,x+epsilon);                     	%  Get the values of the fit slightly to the left of the data points
leftslope   = (yy-yyleft)./epsilon;                                        	%  Slope of fit segment between preceding point and current point
rightslope  = (yyright-yy)./epsilon;                                       	%  Slope of fit segment between current point and following point
yyslope = ( leftslope+rightslope )/2;
yyerr = abs(xerr.*yyslope);
if yerrset
totyerr = sqrt(yyerr.^2 + yerr.^2);                                  	% new unc.in y is quad. sum of x-unc. projected onto y and original y-unc.
else
totyerr = yyerr;
end
fitresult=genfit(x,[y,totyerr],fhandle,params,fittype,[1 0 outputoptions(3)],nonlincontrolstring);
fitresult.yfromxerrors = yyerr;
fitresult.xerrorsupplied = 1;
if yerrset
fitresult.yerrorsupplied = 1;
else
fitresult.yerrorsupplied = 0;
end

if ~silentrun
disp(['Parameters:    ',num2str(a)])
disp(['Uncertainties: ',num2str(da)])
end
%% Display fit
if showatend
if length(x)<=300
if yerrset
for ss=1:length(x)                                              % plot all the points with x and y error bars
plot([x(ss)-xerr(ss) x(ss)+xerr(ss)], [y(ss) y(ss)],'b');
plot([x(ss) x(ss)], [y(ss)-yerr(ss) y(ss)+yerr(ss)],'b');
hold on;
end
else
for ss=1:length(x)                                              % plot all the points with x and y error bars
plot([x(ss)-xerr(ss) x(ss)+xerr(ss)], [y(ss) y(ss)],'b');
hold on;
end
end
plot(x,y,'bo','markersize',3);
plot(x,feval(fhandle,a',x),'r-','LineWidth',1);
else
plot(x,yraw,'bo','MarkerSize',3); hold on;
if ~silentrunatend
disp(' ');
disp('Not showing error bars due to the large number of points');
disp(' ');
end
plot(x,feval(fhandle,a',x),'r-','LineWidth',1);
end
hold off;
end
end

if showfit || showatend
set(gca,'NextPlot',NextPlot_current);                                       % Reset original figure hold state
end

function res=rmsfitresidual(a,fhandle,x,y)                                    	% This form used for fminsearch
res=sqrt(sum( (feval(fhandle,a,x)-y).^2 ));

function res=rmschisqr(a,fhandle,x,y,yerr)                                   	% This form used for fminsearch
res=sqrt(sum( (feval(fhandle,a,x)-y).^2./yerr.^2 ));

function res=fitresidual(a,fhandle,x,y)                                       	% This form used for lsqnonlin
res=(feval(fhandle,a,x)-y);

function res=weightedresidual(a,fhandle,x,y,yerr)                            	% This form used for lsqnonlin
res=(feval(fhandle,a,x)-y)./yerr;

function result=rmss(v)
result = sqrt( mean( (v).^2 ) );