% Copyright (c) 2017, Domenico L. Gatti
% All rights reserved.
% 
% Redistribution and use in source and binary forms, with or without 
% modification, are permitted provided that the following conditions are 
% met:
% 
%     * Redistributions of source code must retain the above copyright 
%       notice, this list of conditions and the following disclaimer.
%     * Redistributions in binary form must reproduce the above copyright 
%       notice, this list of conditions and the following disclaimer in 
%       the documentation and/or other materials provided with the 
%       distribution
%       
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
% IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
% CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
% EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
%% General dependencies
% We always start from the CODE directory and we add to the path
% subdirectories containing various tools described in the book chapters.
addpath(genpath('../GENERAL_SCRIPTS_FUNCTIONS'));
addpath(genpath('../DATABASE'));
% addpath(genpath('../TOOLBOXES'));

%% CHAPTER 8: Non-linear least squares.

%% Problem 1. We have carried out an experiment in which we followed the
% fluorescence changes produced by the binding of ligand L to 0.1 ?M
% receptor R. The X-ray structure of the complex L:R in the presence of 200
% ?M X shows clearly that there are 2 distinct binding sites both
% apparently equally occupied. We have collected 30 experimental points
% with L concentrations logarithmically spaced between 1 ?M and 200 ?M. We
% have also collected an additional point with 3 mM L, at which
% concentration we are completely confident all the binding sites of R were
% completely saturated. Finally we have expressed the fluorescence values
% obtained in the concentration range xvec = 1-200 ?M as a percentage of
% the fluorescence value at 3 mM. Our final experimental data is thus
% expressed as 'fractional saturation', yvec = [LR]/[Rtot]. The following
% is our binding data:
clear
% clear U u* g* dg* g sos* trust deltaU

xvec = [1.0000 1.2005 1.4411 1.7300 2.0767 2.4930 2.9928 3.5927 4.3129 5.1774 ...
6.2153 7.4611 8.9567 10.7522 12.9075 15.4949 18.6009 22.3295 26.8056 32.1789 ...
38.6293 46.3728 55.6684 66.8274 80.2233 96.3045 115.6092 138.7836 ...
166.6035 200.0000]';

yvec = [0.2121 0.2411 0.2724 0.3056  0.3407 0.3772 0.4149 0.4534 0.4923 0.5312 ...
0.5699 0.6080 0.6450 0.6806 0.7147 0.7467 0.7766 0.8042 0.8294 0.8522 0.8726 ...
0.8906 0.9065  0.9204 0.9324 0.9428 0.9517 0.9593 0.9658 0.9712]';

% Knowing that the fractional saturation of a receptor by a ligand can be
% expressed as the hyperbolic binding function:
%
% xvec = [L]
% yvec = [LR]/[Rtot] = [L]/(Kd+[L])
%
% a. Determine the Kd of both binding sites of R for L, assuming that both
% binding sites contribute equally to the fluorescence of the complex if
% they are equally saturated. Plot your global fit and the individual
% components of the fit corresponding to the two binding sites.

% In this specific example the function to minimize is the residual between
% the 'experimental' data and the sum of two hyperbolas calculated with our 
% guess for the hyperbolas coefficients:
%
% yvec_n --> experimental data
%
% u2*(xvec/(u1 + xvec)) + u4*(xvec/(u3 + xvec)) --> two hyperbolas
%
% g(u) = u2*(xvec/(u1 + xvec)) + u4*(xvec/(u3 + xvec)) - yvec_n 
%

close all
figure
plot(xvec,yvec,'ob');hold on
xlim([-5 205]);
ylim([0 1.05]);

% Initial guess
U = [4.0 15.0]'
guess_yvec1 = 0.5*xvec./(U(1)+xvec);
guess_yvec2 = 0.5*xvec./(U(2)+xvec);
guess_yvec = guess_yvec1 + guess_yvec2;
plot(xvec,guess_yvec,'--r')
hold off

%% Solution using an 'ad hoc' script 
xlabel('[L] (microM)')
ylabel('[LR]/[Ltot]')
hold on

tolerance = 1E-8;

Uo = U;

nobs = length(xvec);
nvars = size(U,1);
u1 = U(1);
u2 = 0.5;
u3 = U(2);
u4 = 0.5;

g = u2*(xvec./(u1 + xvec)) + u4*(xvec./(u3 + xvec)) - yvec;

delta_sos = tolerance*10;

niter = 0;

while delta_sos > tolerance 

niter = niter +1;

g_old = g;

du1 = u1*eps^(1/3);
du3 = u3*eps^(1/3);

g1 = @(u1) u2*(xvec./(u1 + xvec)) + u4*(xvec./(u3 + xvec)) - yvec;
g3 = @(u3) u2*(xvec./(u1 + xvec)) + u4*(xvec./(u3 + xvec)) - yvec;
J1 = (g1(u1+du1)-g1(u1-du1))/(2*du1);
J3 = (g3(u3+du3)-g3(u3-du3))/(2*du3); 
J = [J1 J3]; 

h = -g;
nobs2 = size(J,1);
alpha = 1;

[Q,R] = qr(J,0);
deltaU = R\Q'*h;

U = U + deltaU;

u1 = U(1);
u3 = U(2);
g = u2*(xvec./(u1 + xvec)) + u4*(xvec./(u3 + xvec)) - yvec;

sos_old = g_old'*g_old;
sos = g'*g;
delta_sos = abs(sos-sos_old);

end

U1 = U';

sim_yvec_1 = u2*(xvec./(u1 + xvec));
sim_yvec_2 = u4*(xvec./(u3 + xvec));
sim_yvec = sim_yvec_1 + sim_yvec_2;
plot(xvec,yvec,'or',xvec,sim_yvec,'-b',xvec,sim_yvec_1,'--k',...
    xvec,sim_yvec_2,'--m')

% We calculate the Mean Square Error and the Root Mean Square Error (RMSE).
MSE = sos/(nobs-nvars);
RMSE = sqrt(MSE)

U_cov = inv(J'*J).*MSE;

[Corr,sigma] = corrcov(U_cov);

% cov_inv = inv(U_cov);
cov_inv = (J'*J)./MSE;

Rho = zeros(nvars,nvars);

    for i = 1:nvars
        for j = i:nvars
            Rho(i,j) = cov_inv(i,j)/sqrt(cov_inv(i,i)*cov_inv(j,j));
            Rho(j,i) = Rho(i,j);
        end
    end
    
    for i = 1:nvars
        Rho(i,i) = NaN;
    end

Corr
Rho

Conf_95 = [U-1.96*sigma U+1.96*sigma]
sse = sos

% Rsquared 
c_yvec = yvec - mean(yvec);
sst = c_yvec'*c_yvec;
rsquare = 1-sse/sst
niter

our_U = U

%% Solution using MATLAB functions

%% Solution using 'fit'
% fo = ...
% fitoptions('Method','NonlinearLeastSquares','Lower',[U(1)-3,U(2)-6],...
%     'Upper',[U(1)+3,Uo(2)+6],'StartPoint',U);
fo = fitoptions('Method','NonlinearLeastSquares','StartPoint',U, ...
    'Algorithm','Trust-Region','Display','iter','TolFun',1e-8,'TolX',1e-8);
f = fittype('0.5*(x/(a + x)) + 0.5*(x/(b + x))','options',fo);
[Hyperb,GOF,output] = fit(xvec,yvec,f);
close all
figure
plot(xvec,yvec,'ob');hold on
xlim([-5 205]);
ylim([0 1.05]);

matlab_U = coeffvalues(Hyperb)'
ls_yvec1 = 0.5*xvec./(matlab_U(1)+xvec);
ls_yvec2 = 0.5*xvec./(matlab_U(2)+xvec);
ls_yvec = ls_yvec1 + ls_yvec2;
plot(xvec,ls_yvec,'--c')
hold off


%% Solution using 'lsqnonlin'
options = ...
    optimoptions('lsqnonlin','Display','iter','FinDiffType','central',...
    'TolFun',1e-8,'TolX',1e-8);
[u,sos,res,flag,output,lambda,J] = ...
    lsqnonlin(@(U) 0.5*(xvec./(U(1) + xvec)) + 0.5*(xvec./(U(2) + xvec)) - yvec,...
    U,[],[],options); 

close all
figure
plot(xvec,yvec,'ob');hold on
xlim([-5 205]);
ylim([0 1.05]);

matlab_U = u
ls_yvec1 = 0.5*xvec./(matlab_U(1)+xvec);
ls_yvec2 = 0.5*xvec./(matlab_U(2)+xvec);
ls_yvec = ls_yvec1 + ls_yvec2;
plot(xvec,ls_yvec,'--c')
hold off


%% Solution using 'lsqcurvefit'
options = ...
    optimoptions('lsqcurvefit','Display','iter','FinDiffType','central',...
    'TolFun',1e-8,'TolX',1e-8);
[u,sos,res,flag,output,lambda,J] = ...
    lsqcurvefit(@(U,xvec) 0.5*(xvec./(U(1) + xvec)) + 0.5*(xvec./(U(2) + xvec)),U,xvec,yvec,[],[],options);

close all
figure
plot(xvec,yvec,'ob');hold on
xlim([-5 205]);
ylim([0 1.05]);

matlab_U = coeffvalues(Hyperb)'
ls_yvec1 = 0.5*xvec./(matlab_U(1)+xvec);
ls_yvec2 = 0.5*xvec./(matlab_U(2)+xvec);
ls_yvec = ls_yvec1 + ls_yvec2;
plot(xvec,ls_yvec,'--c')
hold off


%% Solution using 'nlinfit'
modelfun = @(U,xvec) 0.5*(xvec./(U(1) + xvec)) + 0.5*(xvec./(U(2) + xvec));
options = statset('Display','iter','TolTypeFun','abs',...
    'TolFun',1e-8,'TolTypeX','abs','TolX',1e-8);
[u,R,J,CovB,MSE,ErrorModelInfo] = nlinfit(xvec,yvec,modelfun,U,options);    
ci = nlparci(u,R,'Jacobian',J,'alpha',0.05);
[ls_yvec,delta] = nlpredci(modelfun,xvec,u,R,'Jacobian',J, 'alpha',0.05);

close all
figure
plot(xvec,yvec,'ob');hold on
xlim([-5 205]);
ylim([0 1.05]);

plot(xvec,ls_yvec,'--c')
hold off

% Solution using 'nlintool'
% nlintool(xvec,yvec_n,modelfun,U,0.05,'[L]','[Theta]');    
    
%% Solution using 'fminsearch'
modelfun = @(U) sum((0.5*(xvec./(U(1) + xvec)) + 0.5*(xvec./(U(2) + xvec)) - yvec).^2);
options = optimset('Display','iter', 'TolFun',1e-8, 'TolX',1e-8);
[u,fval,exitflag,output] = fminsearch(modelfun,[U(1),U(2)],options);

close all
figure
plot(xvec,yvec,'ob');hold on
xlim([-5 205]);
ylim([0 1.05]);

matlab_U = u
ls_yvec1 = 0.5*xvec./(matlab_U(1)+xvec);
ls_yvec2 = 0.5*xvec./(matlab_U(2)+xvec);
ls_yvec = ls_yvec1 + ls_yvec2;
plot(xvec,ls_yvec,'--c')
hold off

%% First we display again our own fit:

close all
figure
plot(xvec,yvec,'ob');hold on
xlim([-5 205]);
ylim([0 1.05]);

% First we display the fit with our local script
matlab_U = coeffvalues(Hyperb)'

ls_yvec1 = 0.5*xvec./(our_U(1)+xvec);
ls_yvec2 = 0.5*xvec./(our_U(2)+xvec);
ls_yvec = ls_yvec1 + ls_yvec2;
plot(xvec,ls_yvec,'--sb'); hold on

% Next we display the MATLAB fit
matlab_U = coeffvalues(Hyperb)'

ls_yvec1 = 0.5*xvec./(matlab_U(1)+xvec);
ls_yvec2 = 0.5*xvec./(matlab_U(2)+xvec);
ls_yvec = ls_yvec1 + ls_yvec2;
plot(xvec,ls_yvec,'--c'); hold on

% b. Can you distinguish your model from one in which there is only 1
% binding site?

% Finally we check if a single hyperbola would provide an equally good
% solution:
fo = fitoptions('Method','NonlinearLeastSquares','StartPoint',mean(U));
f = fittype('(x/(a + x))','options',fo);
[Hyperb_2,GOF_2,output_2] = fit(xvec,yvec,f);
GOF_2

% Next we display the MATLAB fit
matlab_U_2 = coeffvalues(Hyperb_2)'
ls_yvec_2 = xvec./(matlab_U_2+xvec);
plot(xvec,ls_yvec_2,'-y')


%% c. Compare your results with the results you would obtain with a
% traditional Scatchard plot. A Scatchard plot is a plot of Bound/Unbound
% vs Bound. With this plot the slope is -1/Kd and the intercept on the X
% axis is the concentration of the receptor. If there are two binding sites
% we should be able to recognize a break in the line going through the
% points of the plot. Since slope = -1/Kd, if the Kd is small (high
% affinity) the slope is going to be steeper: this is the left-hand side of
% the Scatchard plot. The site with lower affinity corresponds to the
% right-hand side of the Scatchard plot. Use linear least-squares (either
% writing your own function or using a MATLAB buil-in function) to fit the
% points in the Scatchard plot with either a single line (one site) or two
% separate lines (low and high affinity sites).

figure
X = yvec*.1;
Y = X./xvec;
plot(X,Y,'s',...
             'MarkerEdgeColor','k',...
             'MarkerFaceColor','g',...
             'MarkerSize',7);

xlabel('[Bound Ligand]');
ylabel('[Bound Ligand]/[Free Ligand]');
title('Scatchard Plot');
hold on

% First we fit the data corresponding to the high affinity receptor by
% linear regression (least square fit) to the equation of a straight line.
% We recall here that in the Scatchard plot slope = -1/Kd, therefore if the
% Kd is small (high affinity) the slope is going to be steeper: this is the
% left-hand side of the Scatchard plot. We are going to fit a straight line
% through the first 10 points of the Scatchard.

X1 = X(1:10);
Y1 = Y(1:10);

f = fittype('a*x + b');
[Scatchard_1,GOF_1] = fit(X1,Y1,f,'StartPoint',[-0.5 0.02]);
plot(Scatchard_1,'-b');
ylim([0,Scatchard_1(0)]);
xlabel('[Bound Ligand]');
ylabel('[Bound Ligand]/[Free Ligand]');

% Retrieve the fit parameters.
Scatchard_1
GOF_1
% or simply:
Scatchard_1_params = coeffvalues(Scatchard_1);
Kd_1 = -1/Scatchard_1_params(1)

% As y/x = -slope, x = y/-slope
Receptor_conc_1 = -Scatchard_1_params(2)/Scatchard_1_params(1)
         
% Next we fit the data corresponding to the low affinity receptor.

X2 = X(16:end);
Y2 = Y(16:end);

f = fittype('a*x + b');
[Scatchard_2,GOF_2] = fit(X2,Y2,f,'StartPoint',[-0.1 0.015]);
plot(Scatchard_2,'-r');
xlabel('[Bound Ligand]');
ylabel('[Bound Ligand]/[Free Ligand]');

% Retrieve the fit parameters.
Scatchard_2
Scatchard_2_params = coeffvalues(Scatchard_2);
Kd_2 = -1/Scatchard_2_params(1)
Receptor_conc_2 = -Scatchard_2_params(2)/Scatchard_2_params(1)

% Finally, using only 1 binding site:
figure
% X = yvec_n*.1;
% Y = X./xvec;
plot(X,Y,'s',...
             'MarkerEdgeColor','k',...
             'MarkerFaceColor','g',...
             'MarkerSize',7);

xlabel('[Bound Ligand]');
ylabel('[Bound Ligand]/[Free Ligand]');
title('Scatchard Plot');
hold on

f = fittype('a*x + b');
[Scatchard_0,GOF_0] = fit(X,Y,f,'StartPoint',[-0.5 0.02]);
plot(Scatchard_0,'-b');
ylim([0,Scatchard_0(0)]);
xlabel('[Bound Ligand]');
ylabel('[Bound Ligand]/[Free Ligand]');

% Retrieve the fit parameters.
Scatchard_0
GOF_0
% or simply:
Scatchard_0_params = coeffvalues(Scatchard_0);
Kd_0 = -1/Scatchard_0_params(1)

% As y/x = -slope, x = y/-slope
Receptor_conc_0 = -Scatchard_0_params(2)/Scatchard_0_params(1)


%% Problem 2.
% Calculate the 1st derivative of vector b (representing an absorbance
% spectrum) to 2nd order and 4th order accuracy using the finite difference
% method.

importfile('../DATABASE/Absorbance_spectrum.txt');
b = Absorbance_spectrum;
plot(b(:,1),b(:,2),'-r')
hold on

A = toeplitz([0 1 zeros(1,99)]);
openvar A
A = triu(A);
A = A-A';
A_O2 = A/2;
openvar A_O2
 
% Here we calculate the 1st derivative of the spectrum with 2nd Order
% accuracy:
b2 = b(:,2);
b_1st_der_O2 = A_O2*b2;
plot(b(2:end-1,1),b_1st_der_O2(2:end-1,1)*30,'-b')
 
% Alternatively we could have used a direct expression:
dx = 1;
dfdx = (b2([2:100]+dx)-b2([2:100]-dx))/(2*dx)
plot(b(2:end-1,1),dfdx*30,'-c')
 
% If we use the MATLAB 'gradient' function we don't lose the 1st and last point:
dfdx = gradient(b2);
plot(b(:,1),dfdx*30,'-c')

% Here we calculate the 1st derivative of the spectrum with (O4):

A = toeplitz([0 8 -1 zeros(1,98)]);
openvar A
A = triu(A);
A = A-A';
A_O4 = A/12;
openvar A_O4
b_1st_der_O4 = A_O4*b2;
plot(b(3:end-2,1),b_1st_der_O4(3:end-2,1)*30,'-g')

% Do you recognize what is the problem in calculating directly the
% derivative of an experimental spectrum?
% 
% The data is very 'noisy': it is important to first get a 'smoothed' model
% of the data and then to calculate the derivative. For higher order
% derivatives it is best to first smooth the lower order derivative and
% then to calculate the higher order one using the same finite difference
% matrix.
% 
% a. Do a search inside MATLAB help library or on the web to find functions
% that can be used to smooth your spectrum.

% b. Calculate both 1st and 2nd derivative of the spectrum doing a
% smoothing step before each derivative calculation.

smoothed_b = smooth(b(:,1),b(:,2),0.13,'loess');
figure;plot(b(:,1),smoothed_b(:),'-g')
hold on
b_1st_der = gradient(smoothed_b);
plot(b(:,1),b_1st_der*30,'-b')
smoothed_b_1st_der = smooth(b(:,1),b_1st_der,0.13,'loess');
b_2nd_der = gradient(smoothed_b_1st_der);
plot(b(:,1),b_2nd_der*200,'-m')

%% Problem 3. 
% Calculate the 1st and 2nd derivative of the function F(x) shown below
% using anonymous functions and complex step differentiation (CSD):

close all
F = @(x) exp(x)./((cos(x)).^3 + (sin(x)).^3)

h0 = ezplot(F,[-pi/4,pi/2])    
set(h0, 'Color','b');
axis([-pi/4,pi/2,0,6])    
set(gca,'xtick',[-pi/4,0,pi/4,pi/2])    
line([pi/4,pi/4],[F(pi/4),F(pi/4)],'marker','.','markersize',18)
hold on

% a. Compare the CSD algorithm to the centered difference

h = eps
Fp = @(x) imag(F(x+1i*h))/h;
h = eps^(1/3)
Fp2 = @(x) (F(x+h) - F(x-h))/(2*h);
Fdp = @(x) 2*(F(x) - real(F(x+1i*h)))/h^2;

h1 = ezplot(Fp,[0,pi/2])
set(h1, 'Color','r');
h2 = ezplot(Fp2,[0,pi/2])
set(h2, 'Color','y','LineStyle','--');
h3 = ezplot(Fdp,[0,pi/2])
set(h3, 'Color','g');

axis([0,pi/2,-10,20]) 

% b. Recalling how we derived the central finite difference matrix for the
% 1st derivative, derive the central finite difference matrix for the 2nd
% derivative.

Adp_O2 = toeplitz([-2 1 zeros(1,99)]);
openvar Adp_O2

