% Copyright (c) 2017, Domenico L. Gatti
% All rights reserved.
% 
% Redistribution and use in source and binary forms, with or without 
% modification, are permitted provided that the following conditions are 
% met:
% 
%     * Redistributions of source code must retain the above copyright 
%       notice, this list of conditions and the following disclaimer.
%     * Redistributions in binary form must reproduce the above copyright 
%       notice, this list of conditions and the following disclaimer in 
%       the documentation and/or other materials provided with the 
%       distribution
%       
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
% IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
% CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
% EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
%% General dependencies
% We always start from the CODE directory and we add to the path
% subdirectories containing various tools described in the book chapters.
addpath(genpath('../GENERAL_SCRIPTS_FUNCTIONS'));
% addpath(genpath('../DATABASE'));
% addpath(genpath('../TOOLBOXES'));

%% CHAPTER 6: Least squares 

%% Problem 1.
clear
close all

xvec = [1 2 3 4 5 6 7 8 9]';
yvec = [1 3 2.5 3.5 3 2.5 2.5 2 4]';
polyn_fit = figure; plot(xvec,yvec,'or')
xlim([0 10])
ylim([0 5])
hold on

npoints = length(xvec);
less = 3;
A = vander(xvec);
A = A(:,end:-1:1);
A = A(:,1:npoints-less);
[m,n] = size(A);
% Here we get the coefficients
pol = A\yvec
% Transparent least squares solution
pol = (A'*A)\A'*yvec

fine_xvec = [0:0.1:max(xvec)+xvec(1)];
nfx = length(fine_xvec);
fine_yvec = zeros(nfx,1);
for i = 1:nfx
    x = fine_xvec(i);
    x = [1 x x^2 x^3 x^4 x^5 x^6 x^7 x^8 ];
    x = x(1:npoints-less);
    fine_yvec(i) = x*pol;
end
plot(fine_xvec,fine_yvec,'-b')
xlim([0.85*min(xvec) 1.02*max(xvec)])
xlabel('[AM] (microM)')
ylabel('% stimulation')

% Here we find the residual by direct subtraction
ind_vec = zeros(nfx,1);
for i = 1:npoints
    ind = (fine_xvec == xvec(i))';
    ind_vec = ind_vec + ind;
end
resid_vec = yvec - fine_yvec(logical(ind_vec))

% Here we find the error in the least squares procedure
NA = null(A');
Pna = NA/(NA'*NA)*NA';
e = Pna*yvec

% Now we can add the error bars to our plot.
for i = 1:npoints
    x = [xvec(i),xvec(i)];
    y = [yvec(i),yvec(i)-e(i)];    
    line(x,y,'LineWidth',1,'Color','g');
end
legend('data','polynomial fit','errors','Location','best')

% Standard deviation ('less' = m-n)
sigma = sqrt((e'*e)/(m-n))    

% Rsquared: first we calculate the sum of squares about the mean for the
% experimental data.
c_yvec = yvec - mean(yvec);
sst = c_yvec'*c_yvec;
sse = (resid_vec'*resid_vec);
rsquare = 1-sse/sst

[p,S] = polyfit(xvec,yvec,5)
fine_yvec_2 = zeros(nfx,1);

% delta is an estimate of the standard deviation of the error in predicting
% a future observation at x by p(x). If the coefficients in p are least
% squares estimates computed by polyfit, and the errors in the data input
% to polyfit are independent, normal, and have constant variance, then
% ydelta contains at least 50% of the predictions of future observations
% at x.
[fine_yvec_2, delta] = polyval(p,fine_xvec,S) 

figure
plot(fine_xvec,fine_yvec_2,'-b',xvec,yvec,'or')
xlim([0.85*min(xvec) 1.02*max(xvec)])
xlabel('[AM] (microM)')
ylabel('% stimulation')
hold on

% Now we can add the error bars to our plot.
[yvec_2, delta_2] = polyval(p,xvec,S) 

for i = 1:npoints
    x = [xvec(i),xvec(i)];
    y = [yvec_2(i)+delta_2(i),yvec_2(i)-delta_2(i)];    
    line(x,y,'LineWidth',1,'Color','g');
end
legend('polynomial fit','data','deltas','Location','best')


% [p,S] = polyfit(xvec,zscore(yvec),5)
% [p,S] = polyfit(zscore(xvec),yvec,5)
% [p,S,mu] = polyfit(xvec,yvec,5)

%% Problem 2
clear, clc 
close all

% Molecular Weight
mw = 300;
% Inoculum in mg
inj = 90

% Our xvec is the different times
t = [0 5 10 15 20 25 30 35 40 45 50 55 60]'
nt = length(t)

yvec = [0.9669 0.5627 0.4608 0.2979 0.3493 0.4414 0.2387 0.2586 ...
           0.0988 0.0896 0.1247 0.0378 0.03031]';
       
OLS_plot = figure;       
set(gcf,'Unit','Normalized','Position',[0 0.2 0.5 0.8])
plot(t,yvec,'sb','MarkerSize',30)
xlim([-1 1.02*max(t)])
xlabel('Time (minutes)')
ylabel('[compound X] (microM)')

hold on

A = [ones(nt,1) t]
[m,n] = size(A)
b = log(yvec)
u = A\b
u = (A'*A)\A'*b

% Since we took the log, we have to exponentiate to recover the intercept
int = exp(u(1))

% Here we plot the solution
yvec_l = exp(A*u);
plot(t,yvec_l,'-r','LineWidth',3)

% Here we find the residual by direct subtraction
resid_vec = yvec - yvec_l

% Here we find the error in the least squares procedure
NA = null(A');
Pna = NA*inv(NA'*NA)*NA';
Pna = NA/(NA'*NA)*NA';

% No relationship between the real residual and the projection of the
% either the original Y concentrations or their logarithms onto the Left
% Null Space of A.
e = Pna*yvec
e = Pna*b
exp(e)

% Now we can add the error bars to our plot.
for i = 1:nt
    x = [t(i),t(i)];
    y = [yvec(i),yvec(i)-resid_vec(i)];    
    line(x,y,'LineWidth',3,'Color','g');
end
legend('data','exponential fit','errors','Location','best')

% Standard deviation (denominator = m-n)
sigma = sqrt((resid_vec'*resid_vec)/(nt-length(u))) 
y0_fit = exp(u(1))
decay_rate = u(2)

% Rsquared: first we calculate the sum of squares about the mean for the
% experimental data.
c_yvec = yvec - mean(yvec);
sst = c_yvec'*c_yvec;
sse = (resid_vec'*resid_vec);
rsquare = 1-sse/sst

% Back up for comparison
linear_decay_rate = decay_rate;
linear_sigma = sigma;
linear_rsquare = rsquare;

string1 = 'decay rate = ';
string2 = num2str(decay_rate,'%6.4f\n');
string3 = '/min';
string4 = 'sigma = ';
string5 = num2str(sigma,'%6.4f\n');
string6 = '    microM';
string7 = 'Rsquared = ';
string8 = [(num2str(rsquare,'%6.4f\n')) '       ']; 

% Create textbox
annotation(OLS_plot,'textbox',...
    [0.4 0.6 0.27 0.15],...
    'String',{[string1 string2 string3;string4 string5 string6;...
               string7 string8]},...
    'FontWeight','bold','FontSize',32,...
    'FitBoxToText','on',...
    'BackgroundColor',[1 1 0.8],...
    'Color',[1 0 0]);
title('Exponential Fit');


% Knowing the intercept at time 0 (= 60 minutes) we can calculate back the
% concentration of compound X at the moment of inoculum:
y0_real = y0_fit*exp(-decay_rate*60)
% Concentration in mg/liter
y0_real = y0_real*mw/1000
% Extracellular fluid: X_conc_time0 = inj/extr_fluid
ef_vol = inj/y0_real

%% Simulation of multiple experiments
n_exp = 5;
ymat_n = zeros(5,nt);

for j = 1:5
k = -0.05;
y0 = 1;
yvec = y0*exp(k*t);
error = normrnd(zeros(nt,1),0.3);
ymat_n(j,:) = yvec+error*mean(yvec);
end
std_yvec_n = std(ymat_n);
plot(t,ymat_n,'o');
% plot(t,m_yvec_n,'om');
% std_yvec_n = std(ymat_n);
% b2 = log(m_yvec_n);

%% Weighted least squares
% close all
clc

% Molecular Weight
mw = 300;
% Inoculum in mg
inj = 90;

% Our xvec is the different times
t = [0 5 10 15 20 25 30 35 40 45 50 55 60]'
nt = length(t)

ymat_n = ... 
[0.849,0.734,0.591,0.5,0.342,0.331,0.262,0.0484,0.0403,0.0311,0.0312,0.0318,0.051;
0.696,0.733,0.731,0.365,0.462,0.322,0.22,0.192,NaN,0.0969,0.243,0.0738,0.0539;
0.926,0.776,0.63,0.515,0.33,0.263,0.426,NaN,0.359,0.139,0.182,NaN,NaN;
0.972,0.821,0.439,0.52,0.246,0.293,0.289,0.207,0.244,0.206,0.0168,0.0897,NaN;
0.867,0.872,0.607,0.467,0.459,0.346,0.258,0.299,0.229,0.129,0.0129,NaN,0.169]
m_ymat_n = nanmean(ymat_n);

WLS_plot = figure;
plot(t,m_ymat_n,'om');hold on
plot(t,ymat_n,'x');

b2 = (nanmean(log(ymat_n)))';
std_b2 = nanstd(log(ymat_n))';
% nanind = isnan(ymat_n);
% ymat_n(nanind) = 0;
% b2 = log(nanmean(ymat_n))';
% std_b2 = log(nanstd(ymat_n))';
W = 1./std_b2;
W = diag(W);
% W = inv(nancov(ymat_n))
C = W'*W;
% C = W

% Standard least squares
% u2 = A\b2;

% Weighted least squares
% u2 = inv(A'*C*A)*A'*C*b2;

% Using the backslash operator
u2 = (A'*C*A)\A'*C*b2
inv(A'*C*A) % covariance matrix of the unknown

% Using QR factorization
WA = W*A;
Wb = W*b2;
[Q,R] = qr(WA,0);
u2 = R\Q'*Wb

int = exp(u2(1));

fine_t = [0:0.1:60]';
fine_A = [ones(length(fine_t),1) fine_t];
fine_yvec2 = exp(fine_A*u2);

plot(fine_t,fine_yvec2,'-r')

% Here we find the residual by direct subtraction
yvec2 = exp(A*u2);
resid_vec2 = m_ymat_n' - yvec2

% Now we can add the error bars to our plot.
for i = 1:nt
    x2 = [t(i),t(i)];
    y2 = [m_ymat_n(i),m_ymat_n(i)-resid_vec2(i)];    
    line(x2,y2,'LineWidth',2,'Color','g');
end

% Standard deviation (denominator = m-n)
sigma2 = sqrt((resid_vec2'*resid_vec2)/(nt-length(u))) 
y0_fit2 = exp(u2(1))
decay_rate2 = u2(2)

% Rsquared: first we calculate the sum of squares about the mean for the
% experimental data.
c_ymat_n = m_ymat_n - mean(m_ymat_n);
sst2 = c_ymat_n*c_ymat_n';
sse2 = (resid_vec2'*resid_vec2);
rsquare2 = 1-sse2/sst2

% Back up for comparison
% linear_decay_rate = decay_rate;
% linear_sigma = sigma;
% linear_rsquare = rsquare;

string1 = 'best decay rate = ';
string2 = num2str(decay_rate2,'%6.4f\n');
string3 = '/min';
string4 = 'best sigma = ';
string5 = num2str(sigma2,'%6.4f\n');
string6 = '    microM';
string7 = 'best Rsquared = ';
string8 = [(num2str(rsquare2,'%6.4f\n')) '       ']; 

% Create textbox
annotation(WLS_plot,'textbox',...
    [0.55 0.5 0.33 0.11],...
    'String',{[string1 string2 string3;string4 string5 string6;...
               string7 string8]},...
    'FontWeight','bold',...
    'FitBoxToText','off',...
    'BackgroundColor',[1 1 0.800000011920929],...
    'Color',[1 0 0]);
title('Exponential Fit');


% Knowing the intercept at time 0 (= 60 minutes) we can calculate back the
% concentration of compound X at the moment of inoculum:
y0_real2 = y0_fit2*exp(-decay_rate2*60)
% Concentration in mg/liter
y0_real2 = y0_real2*mw/1000
% Extracellular fluid
ef_vol2 = inj/y0_real2

