function fitresult=genfit(X,Y,fhandle,params,varargin)
% genfit: Front end to least squares fitting - linear and nonlinear routines. Provides errors in the param's.
%------------------------------------------------------------------------------------------------------------
% PROGRAM: genfit
% AUTHOR: Andri M. Gretarsson
% DATE: 05/27/03
% MODIFIED: 11/2012
%
% SYNTAX: fitresult=genfit(X,Y,fhandle,initparams <,fittype,outputoptions,nonlincontrolstring>);
% <...> indicates optional parameters.
%
% This routine does a least squares fit to the data and provides the errors in the
% parameters. It accepts two matrixes, x (Nx1) and [y dy] (Nx2), and the handle of a function
% f([a1,...,aM],x) : Real->Real ,which may be nonlinear in the parameters a1,..aN.
% The routine then fits f([a1,...,aM],x) to the data y, by varying [a1,...aM]. Various
% options can be specified to control the fit type, output, tolerances etc. The routine
% returns the best fit parameters [a1,...,aM], the uncertainty in the parameters, [da1,...,daM],
% the reduced Chi-squared around the solution, etc. When the fit function f is linear in the
% parameters either a linear or a nonlinear fit may be chosen. If the function is not linear in
% the parameters, chose a nonlinear fit to obtain meaningful results. The linear methods are
% deterministic but may encounter machine-precision problems. (The routine attempts to warn the
% user when this is the case.) Also, for large datasets (more than ~1000 data points) the
% linear methods may be slow. In that case, use the nonlinear method. If the uncertainty in the
% data points is not provided (the y-matrix has only a single column) then the uncertainty is
% taken as the resudual around the fit. In that case, the reduced Chi-squared value will
% automatically be near unity, and does not provide a meaningful test of the model. There are
% two types of linear fit possible 'lin' and 'linreg'. The former uses deterministic code
% directly in this function, the second uses the linear regression function "regress" from
% the statistics toolbox. Only the former can handle uncertainties but the latter may be
% more robust for large or otherwise problemmatic data sets.
% If no uncertainties are speciefied, the uncertainty is
% taken as the rms residual of the data from the fit. The value of Chi-squared will be
% automatically close to unity in this case and can not be used as a meaningful
% test of the model.
%
% NOTE: If your data has x and y uncertainties, the x uncertainties should be converted to
% y uncertainties via the slope of the model function. (Adding the calculated additional
% y uncertainties in quadrature to the already present, inherent y uncertainties.)
% This is not implemented in the routine and should be done by hand prior to using the
% routine. However, most often the dominant source of uncertainty is in the y coordinate
% (the dependent coordinate) and no dx->dy conversion is necessary.
%
%
% INPUT ARGUMENTS:
%
% X = Nx1 matrix. Independent varaible data values in column 1, uncertaintly in column 2.
% Y = Nx2 matrix. Dependent varaible data values in column 1, uncertaintly in column 2.
% initparams= Initial values for the parameters. For linear fits, the values are not used but the
% number of fit parameters is obtained from the length of this vector. Thus, for
% linear fits params=[1:nparams] is just as good as actual guesses for the initial values.
% For the nonlinear fit however, good guesses for the initial parameters are crucial.
% If the initial values are too far off the fit may find a local minimum that isn't
% the best fit.
% fhandle = Handle of a function to fit to, e.g. if we wish to fit to the function
% straighline.m in the current matlab path, then fhandle should be @straightline.
% The function definintion must have the form y=fun_name(a,x), where 'a' is a
% vector of parameters with nparams elements and x is the the dependent variable
% (a vector of any length).
% fittype = 'lin' for a linear fit, 'linreg' for a linear fit using the "regress" function from
% the statistics toolbox (not to be used when uncertainties are specified), and 'nonlin'
% for a nonlinear fit with either uniform uncertainties or no uncertainties.
% Default is 'lin'.
% outputoptions = Vector of flags. Set the desired flag(s) to activate the corresponding option(s).
% Setting a flag to zero deactivates the corresponding option. Default is 2
% Flag number Option
% 1 Set to 1 to suppress plotting the data and resulting fit. Set to 2 to also
% plot the fits corresponding to the one-sigma error in the parameters.
% 2 Set to 1 to suppress calculation of the Xsqr cross sections. (Useful
% for fitting large data sets where Xsqr calculation adds
% significant time.)
% Set to 2 to completely suppress calculation of uncertainty in the fit parameters
% 3 Set to 1 to produce no workspace output (run silently).
% Default is outputoptions=[0 0 0].
% nonlincontrolstring= Either a string in the form 'options=optimset(...)' to be evaluated inside
% the function to allow direct control of the nonlinear fit, or alternatively (and more simply)
% the structure "options" generated by the command options=optimset(...).
%
% OUTPUT ARGUMENTS:
%
% The single output variable output is a structure with the following fields:
% fitresult.solution = [a1,...,aM] giving the best fit.
% fitresult.uncertainty = [da1,...,daM] the 1-sigma uncertainty in the parameters about the best fit.
% fitresult.fitlocus = Locus of the solution on the domain X.
% fitresult.residual = Locus of the residual over the domain X.
% fitresult.method = Method used for fit ('linlsq','linreg','nonlinlsq')
% fitresult.rxs0 = The reduced Chi squared at the best fit values [a1,...,aM]
% fitresult.probrxs0 = The probability of obtaining rxs0 or greater for the reduced Chisqr
% if the errors supplied were truly random errors of the correct size.
% fitresult.errorsupplied = "1" if error bars were supplied with the data, "0" otherwise.
%
% NOTE: When fitresult.probrxs0 << 1, the value (1-fitresult.probrxs0) is essentially the likelihood
% that the model is inconsistent with the data. But fitresult.probrxs0 is not meaningful unless error
% bars were supplied with the data (fitresult.errorsupplied=1). When error bars are not supplied,
% the uncertainties are assumed to be uniform and set equal to be the rms residual. In
% other words, rxs0 will be very close to unity and probrxs0 should be near 0.5.
%
% EXAMPLES:
% X=[0.8; 2.0; 3.0; 3.8; 5.1; 6.1];
% Y=[[4.3,1.5];[5.6,2.5];[12.3,1.0];[13.0,0.5];[18.7,1.7];[19.1,1.5]];
% options=optimset('TolFun',1e-3);
% result=genfit(X,Y,'straightline',[1 1],'nonlin',[],options);
%
%------------------------------------------------------------------------------------------------------------
% SYNTAX: fitresult=genfit(X,Y,fhandle,initparams <,fittype,outputoptions,nonlincontrolstring>)
%------------------------------------------------------------------------------------------------------------
% -- BEGIN parameter processing ---------------------------------------------------------------------
if nargin>=5, fittype=varargin{1}; else fittype='lin'; end;
defaultoptions=[0,0,0];
if nargin>=6, outputoptions=varargin{2}; else outputoptions=defaultoptions; end;
if nargin>=7, nonlincontrolstring=varargin{3}; else nonlincontrolstring=''; end;
if length(outputoptions)<3
outputoptions=[outputoptions(1:end),defaultoptions(length(outputoptions)+1:length(defaultoptions))];
end
showfit=(outputoptions(1)>1 || outputoptions(1)==0);
plotXsqr=(outputoptions(2)==0);
suppressXsqr=(outputoptions(2)>=1);
suppressErrorCalc=(outputoptions(2)==2);
silentrun=(outputoptions(3)==1);
nparams=length(params);
x=X(:,1);
yraw=Y(:,1);
N=length(x);
unity=diag(ones(nparams,1));
if size(Y,2)==2 % Check whether y error bars have been supplied
yerr=Y(:,2);
yerrset=(1==1); % Set a flag if y error bars are supplied
else
yerrset=(0==1);
end;
if length(x)~=length(yraw), error('X and Y must be the same length!'); end;
if length(x) < nparams, error('Number of data points is too small to determine model parameters'); end;
if strcmp(fittype,'linreg') && ~chkbx('statistics')
warning(0,'The statistics toolbox must be present to use fit type ''linreg''. Using ''lin'' instead.');
fittype='lin';
end
%--END parameter processing -------------------------------------------------------------------------
%==BEGIN linear least squares fit ====================================================================
if strcmp(fittype,'lin')
f1=zeros(nparams,N);
if yerrset==0 % If there are no error bars we'll get the fit and use the residual
yerr1=sqrt(mean((diff(yraw)).^2))*ones(size(yraw)); % Get the initial fit using a fabricated uncertainty
y1=yraw./yerr1;
clear f1;
for l=1:nparams % Each row corresponds to a term in the fit function
for i=1:N % Each column corresponds to a value of x
f1(l,i)=feval(fhandle,unity(l,:),x(i))/yerr1(i); % The matrix element is the value of the term in the
end % fit function for a given x, where the parameter has been set to
end; % unity, all normalized by the y measurement error.
beta1=f1*y1; % f*y is similar to y.^2 but with the contributions of each term in the fit function separated
alpha1=f1*f1'; % nparmams*nparams size matrix, this shows the average size^2 of the contributions of each term
errmat1=inv(alpha1); % in some sense the inverse of the average of each term
unitytest1=alpha1*errmat1; % Checking the inversion for errors.
unitytol=0.1;
if ~inrange(unitytest1,...
diag(ones(1,nparams))-unitytol*ones(nparams,nparams),...
diag(ones(1,nparams))+unitytol*ones(nparams,nparams))
warning('GENFIT:SingularMatrix','Fit did not succeed. Error-matrix inversion not accurate.');
end
a1=errmat1*beta1; % Best fit values of the coefficients
residual1=yraw-feval(fhandle,a1,x); % data - fit.
yerr=sqrt(mean(residual1.^2))*ones(size(yraw)); % Uncertainty taken as the rms residual
end
clear f;
y=yraw./yerr;
f=zeros(nparams,N);
for l=1:nparams % Each row corresponds to a term in the fit function
for i=1:N % Each column corresponds to a value of x
f(l,i)=feval(fhandle,unity(l,:),x(i))/yerr(i); % The matrix element is the value of the term in the
end % fit function for a given x, where the parameter has been set to
end; % unity, all normalized by the y measurement error.
beta=f*y; % f*y is similar to y.^2 but with the contributions from each term in the fit function separated
alpha=f*f'; % nparmams*nparams size matrix, this shows the average size^2 of the contributions of each term
% errmat=inv(alpha); % in some sense the inverse of the average of each term
errmat=alpha\diag(ones(size(alpha,1),1));
unitytest=alpha*errmat; % Checking the inversion for errors.
if ~inrange(unitytest,...
diag(ones(1,nparams))-0.1*ones(nparams,nparams),...
diag(ones(1,nparams))+0.1*ones(nparams,nparams))
warning('GENFIT:SingularMatrix','Fit did not succeed. Error-matrix inversion not accurate.');
end
a=errmat*beta; % Best fit values of the coefficients
%aerr=diag(errmat)
aerr=1./sqrt(diag(alpha)); % Uncertainty in the best fit value of the components
residual=yraw-feval(fhandle,a,X(:,1)); % data - fit.
chisqr=sum(residual.^2./yerr.^2 ); % chi-squared
rxs0=1/(N-nparams)*chisqr; % reduced chisquared at the solution
if ~silentrun
disp(num2str(a'))
disp(num2str(aerr'))
end
if showfit>=1;
figure(1)
if yerrset
if length(yerr)<=100
errorbar(x,Y(:,1),Y(:,2),'b.','MarkerSize',3); hold on;
else
plot(x,Y(:,1),'bo','MarkerSize',3); hold on;
disp(' ');
disp('Not showing error bars due to the large number of points');
disp(' ');
end
else
plot(x,Y(:,1),'bo','MarkerSize',3); hold on;
end
plot(x,feval(fhandle,a',x),'g-','LineWidth',1);
hold off;
end
fitresult.solution=a';
fitresult.uncertainty=aerr';
fitresult.fitlocus=feval(fhandle,fitresult.solution,x);
fitresult.residual=residual;
fitresult.method='linlsq';
fitresult.rxs0=rxs0;
fitresult.probrxs0=gammainc(rxs0*(N-nparams)/2,(N-nparams)/2,'upper');
fitresult.errorsupplied = yerrset;
%== END linear least squares fit =====================================================================
%== BEGIN linear regression ==========================================================================
elseif strcmp(fittype,'linreg') % Requires statistics toolbox
clear f;
y=yraw;
f=zeros(nparams,N);
for l=1:nparams % Each row corresponds to a term in the fit function
for i=1:N % Each column corresponds to a value of x
f(l,i)=feval(fhandle,unity(l,:),x(i)); % The matrix element is the value of the term in the fit function
end % for a given x, where the parameter has been set to unity,
end;
f=f';
if ~yerrset
[b bint r]=regress(y,f,0.32); % This is where the regression is done
berr=((abs(bint(:,1)-b)+abs(bint(:,2)-b))/4); % The uncertainty in the parameters
else
error('GENFIT: Fit type ''linreg'' is not intended for use with uncertainties. Use fit type ''lin'' instead.')
end
if ~silentrun
disp(num2str(b'))
disp(num2str(berr'))
end
fitresult.solution=b';
fitresult.uncertainty=berr';
fitresult.fitlocus=feval(fhandle,fitresult.solution,x);
fitresult.residual=r;
fitresult.method='linreg';
fitresult.rxs0=[];
fitresult.probrxs0=[];
fitresult.errorsupplied = yerrset;
if showfit>=1; % displaying the fit
figure(1)
plot(x,y,'bo','MarkerSize',3); hold on;
plot(x,feval(fhandle,b,x),'k-','LineWidth',1);
if showfit==2
for r=1:nparams
bposextreme=b;
bnegextreme=b;
bposextreme(r)=b(r)+berr(r);
plot(x,feval(fhandle,bposextreme,x),'m-');
plot(x,feval(fhandle,bnegextreme,x),'c-');
end
end
hold off;
end
%== END linear regression =====================================================================================
elseif strcmp(fittype,'nonlin')
%== BEGIN nonlinear least squares fit =========================================================================
% Fitting works correctly with non-uniform error bars now.
y=yraw;
if ~silentrun
options=optimset('Display','Notify'); %,'TolX',1e-12,'TolFun',1e-12,'MaxFunEval',1000,'MaxIter',1000);
else
options=optimset('Display','Off');
end
if ischar(nonlincontrolstring)
if ~isempty(nonlincontrolstring)
eval(nonlincontrolstring);
end
else
options=nonlincontrolstring; % In this case "nonlincontrolstring" is not a string but an options structure created with "optimset"
end
if ~yerrset
afminsearch=fminsearch(@rmsfitresidual,params,options,fhandle,x,y); % fminsearch is more robust than lsqnonlin in that it finds the correct
if ~suppressErrorCalc
[a,~,residual,~,~,~,jacobian] =... % lsqnonlin is then used to generate the jacobian at
lsqnonlin(@fitresidual,afminsearch,[],[],options,... % the solution which is needed for the uncertainty estimtes
fhandle,x,y); % The parameters sent to lsqnonlin are from the results of fminsearch
J=zeros(1,length(a)); da=zeros(1,length(a));
for r=1:length(a)
J(r)=sqrt(sum( (jacobian(:,r)).^2 )); % Uncertainty is estimated from the lowest order expansion
da(r)=abs(rmss(residual)/J(r)); % of the residual about the solution: (Delta Res)^2=J(r)^2*(Delta a)^2
end
end
else
if ~suppressErrorCalc
afminsearch=fminsearch(@rmschisqr,params,options,fhandle,x,y,yerr);
[a,~,residual,~,~,~,jacobian] =...
lsqnonlin(@weightedresidual,afminsearch,[],[],options,...
fhandle,x,y,yerr); %
J=zeros(1,length(a)); da=zeros(1,length(a));
for r=1:length(a)
J(r)=sqrt(sum( (jacobian(:,r)).^2 ));
da(r)=abs(rmss(residual)/J(r));
end
end
end
% Print the best fit values of the parameters to the workspace
if ~silentrun
disp(['Parameters: ',num2str(a)])
disp(['Uncertainties: ',num2str(da)])
end
% Display the fit in a plot
if showfit>=1;
figure(1)
if yerrset
if length(yerr)<=100
errorbar(x,Y(:,1),Y(:,2),'b.','MarkerSize',3); hold on;
else
plot(x,Y(:,1),'bo','MarkerSize',3); hold on;
disp(' ');
disp('Not showing error bars due to the large number of points');
disp(' ');
end
else
plot(x,Y(:,1),'bo','MarkerSize',3); hold on;
end
plot(x,feval(fhandle,a',x),'r-','LineWidth',1);
hold off;
end
if yerrset
rxs0=1/(N-nparams) * sum((y-feval(fhandle,a,x)).^2./Y(:,2).^2); % Reduced X-sqared at the solution
else
rxs0=1/(N-nparams) * sum((y-feval(fhandle,a,x)).^2)/rmss(residual).^2;
end
fitresult.solution=a;
fitresult.uncertainty=da;
fitresult.fitlocus=feval(fhandle,fitresult.solution,x);
fitresult.residual=-residual; % fminsearch appears to define the residual as fit minus data rather than data minus fit.
fitresult.method='nonlin';
fitresult.rxs0=rxs0;
% fitresult.probability is the integral of the tail of the chi-squared distribution
% (see e.g. "Data Reduction and Error Analysis for the Physical Sciences" by
% P.R.Bevington and D.K.Robinson, Appendix C.4.)
fitresult.probrxs0=gammainc(rxs0*(N-nparams)/2,(N-nparams)/2,'upper');
fitresult.errorsupplied = yerrset; % 1 if the errors were provided
%== END nonlinear least squared fit ==================================================================================
else
error('GENFit:InvalidFitType',['GENFIT: Fit type specification string ''',fittype,''' not recognized.']);
end
function res=rmsfitresidual(a,fhandle,x,y) % This form used for fminsearch
res=sqrt(sum( (feval(fhandle,a,x)-y).^2 ));
function res=rmschisqr(a,fhandle,x,y,yerr) % This form used for fminsearch
res=sqrt(sum( (feval(fhandle,a,x)-y).^2./yerr.^2 ));
function res=fitresidual(a,fhandle,x,y) % This form used for lsqnonlin
res=(feval(fhandle,a,x)-y);
function res=weightedresidual(a,fhandle,x,y,yerr) % This form used for lsqnonlin
res=(feval(fhandle,a,x)-y)./yerr;
function result=rmss(v)
result = sqrt( mean( (v).^2 ) );
function boolres=chkbx(toolboxnames)
a=struct2cell(ver);
b=a(1,1,:);
c=b(:);
if size(toolboxnames,2)>size(toolboxnames,1)
c=transpose(c);
end
searchedfor=regexprep(regexprep(lower(toolboxnames),'\W',''),'toolbox','');
present=regexprep(regexprep(lower(c),'\W',''),'toolbox','');
boolres=ismember(searchedfor,present);