% Copyright (c) 2017, Domenico L. Gatti
% All rights reserved.
% 
% Redistribution and use in source and binary forms, with or without 
% modification, are permitted provided that the following conditions are 
% met:
% 
%     * Redistributions of source code must retain the above copyright 
%       notice, this list of conditions and the following disclaimer.
%     * Redistributions in binary form must reproduce the above copyright 
%       notice, this list of conditions and the following disclaimer in 
%       the documentation and/or other materials provided with the 
%       distribution
%       
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
% IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
% CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
% EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
%% General dependencies
% We always start from the CODE directory and we add to the path
% subdirectories containing various tools described in the book chapters.
addpath(genpath('../GENERAL_SCRIPTS_FUNCTIONS'));
addpath(genpath('../DATABASE'));
% addpath(genpath(../'TOOLBOXES'));

%% CHAPTER 11: singular value decomposition (SVD)
clear, clc, close all

A = rand(4,2)*rand(2,3), r = rank(A)
AtA = A'*A
[V,Svs] = eigs(AtA)
AAt = A*A'
[U,Sus] = eigs(AAt)
S = sqrt(diag(Svs))

[U_svd,S_svd,V_svd] = svd(A)
[U_svd,S_svd,V_svd] = svd(A,'econ')
A = rand(3,2)*rand(2,4), r = rank(A)
[U_svd,S_svd,V_svd] = svd(A)
[U_svd,S_svd,V_svd] = svd(A,'econ')

V = orth(A')
Vn = null(A)
U = orth(A)
Uln = null(A')

r = rank(A)
S_pos = diag(ones(r,1)./diag(S_svd(:,1:r)))
pinvA = V_svd(:,1:r)*S_pos(:,1:r)*U_svd(:,1:r)'
pinvA = pinv(A)
rank(pinvA)

S_svd*pinv(S_svd)
pinv(S_svd)*S_svd

A = [2 7 6;4 14 12;1 3 3], [m,n] = size(A), rank(A)
rA = orth(A')
nA = null(A)
x = rA(:,1) + null(A)
b = A*x
inv(A)
pinvA = pinv(A)
x_p = pinvA*b

V = orth(A')
P = V*inv(V'*V)*V'
P = pinvA*A
x_p = P*x
P*P

pinv_x = pinv(x)
A_recov = b*pinv_x
b = A*x
b = A_recov*x
b = A*(0.5*x)
b = A_recov*(0.5*x)
new_x = [1 2 3]'.*x
b = A*new_x
b = A_recov*new_x

b1 = A*x
b2 = A_recov*new_x
b2./b1

% [S,D] = eigs(A)
% [S_recov,D_recov] = eigs(A_recov)

A = [2 7 6;4 14 12;1 3 3] 
rank(A)
b = [1.2 1.9 3.1]'
AtA = A'*A 
rank(AtA)
inv(AtA)

x = pinv(A)*b
nA = null(A)
x2 = x -7.6*nA

b
p = A/(A'*A)*A'*b
A*x
A*x2
norm(x)
norm(x2)

null(A'), null(pinv(A))

A = [2 7 6;4 14 12;1 3 3], rank(A)
noise = rand(3)/100000
A_noisy = A + noise
rank(A_noisy)
[U,S,V] = svd(A_noisy)

A_denoised = U(:,1:2)*S(1:2,1:2)*V(:,1:2)'
rank(A_denoised)

%% SVD of a binding experiment
close all

A = dlmread('../DATABASE/Fluorescence_data.txt','\t');
[m,n] = size(A);

% We centered the data
mean_A = repmat(mean(A,2),1,n);
cA = A - mean_A;
% cA = A;
ca_conc=[0 2.6 7.8 17.8 38.8 ...
           80.8 163.8 331 664 1331 ...
           2664 3997 5330 6663 7996];
wl = [300:400]';

% Economy size svd.
[U,S,V] = svd(cA, 'econ');

Scree = figure;
plot(diag(S(1:14,1:14)),'--ro','Linewidth',1.0,'MarkerEdgeColor','m','MarkerFaceColor','c');
ylabel('Sigma value  '),xlabel('Sigma index  ')
xlim([0 15]),ylim([-20,1.6e3]),grid on

% Here we derive the spectral components and we add the mean back:
comp_1 = U(:,1)*S(1,1)*V(:,1)';
comp_1 = comp_1 + mean_A;
[XI,YI]=meshgrid(ca_conc,wl);
figure;surf(XI,YI,comp_1);
xlabel('[Ca] microM ')
ylabel('Wavelength (nm) ')
zlabel('Fluorescence ')

% Here we determine the Kd for Ca . U is the eigenvector matrix of AA'. 
% Therefore, the score matrix can be obtained also directly as:
SV = U(:,1)'*cA;

xvec = ca_conc;
yvec = SV - min(SV);
figure; plot(xvec,yvec,'ob')
hold on
f = fittype('b*(x/(a + x)) + d*(x/(c + x))');
[Hyperb,GOF] = fit(xvec',yvec',f,'StartPoint',[100 1 4000 1]);
U_1 = coeffvalues(Hyperb)
ls_yvec_1 = U_1(2)*xvec./(U_1(1)+xvec);
ls_yvec_2 = U_1(4)*xvec./(U_1(3)+xvec);
ls_yvec = ls_yvec_1 + ls_yvec_2;
plot(xvec,yvec,'ob',xvec,ls_yvec,'-c')
hold on
plot(xvec,ls_yvec_1,'--b',xvec,ls_yvec_2,'--r')
hold off
xlabel('[Ca] microM ')
ylabel('Fluorescence contribution')

%% Alternative way of considering PCA via SVD
clear, clc, close all

% Variables as columns
A = dlmread('../DATABASE/Fluorescence_data.txt','\t');
A = A';
[m,n] = size(A);
cA = A - mean(A);

ca_conc=[0 2.6 7.8 17.8 38.8 ...
           80.8 163.8 331 664 1331 ...
           2664 3997 5330 6663 7996];
wl = [300:400]';

% Economy size svd.
[U,S,V] = svd(cA, 'econ');

% Here we determine the Kd for Ca. The SVD gives directly both the score matrix 
% and the loading matrix:
T = U(:,1)*S(1,1)   % Score matrix
P = V(:,1)          % Loading matrix

xvec = ca_conc;
yvec = T - min(T);
f = fittype('b*(x/(a + x)) + d*(x/(c + x))');
[Hyperb,GOF] = fit(xvec',yvec,f,'StartPoint',[100 1 4000 1]);
U_1 = coeffvalues(Hyperb)
ls_yvec_1 = U_1(2)*xvec./(U_1(1)+xvec);
ls_yvec_2 = U_1(4)*xvec./(U_1(3)+xvec);
ls_yvec = ls_yvec_1 + ls_yvec_2;
figure; plot(xvec,yvec','ob',xvec,ls_yvec,'-c')
hold on
plot(xvec,ls_yvec_1,'--b',xvec,ls_yvec_2,'--r')
hold off
xlabel('[Ca] microM ')
ylabel('Fluorescence contribution')

%% Variables as rows
clear, clc, close all
A = dlmread('../DATABASE/Fluorescence_data.txt','\t');
[m,n] = size(A);
cA = A - mean(A,2);

ca_conc=[0 2.6 7.8 17.8 38.8 ...
           80.8 163.8 331 664 1331 ...
           2664 3997 5330 6663 7996];
wl = [300:400]';

% Economy size svd.
[U,S,V] = svd(cA, 'econ');

% Here we determine the Kd for Ca. The SVD gives directly both the score matrix 
% and the loading matrix:
T = S(1,1)*V(:,1)'  % Score matrix
P = U(:,1)          % Loading matrix

xvec = ca_conc;
yvec = T - min(T);
f = fittype('b*(x/(a + x)) + d*(x/(c + x))');
[Hyperb,GOF] = fit(xvec',yvec',f,'StartPoint',[100 1 4000 1]);
U_1 = coeffvalues(Hyperb)
ls_yvec_1 = U_1(2)*xvec./(U_1(1)+xvec);
ls_yvec_2 = U_1(4)*xvec./(U_1(3)+xvec);
ls_yvec = ls_yvec_1 + ls_yvec_2;
figure; plot(xvec,yvec,'ob',xvec,ls_yvec,'-c')
hold on
plot(xvec,ls_yvec_1,'--b',xvec,ls_yvec_2,'--r')
hold off
xlabel('[Ca] microM ')
ylabel('Fluorescence contribution')

%% SVD and image reconstruction/filtering
close all, clear, clc
A = imread('peppers.png');
imshow(A)
title('Original image: 24-bit RGB')
A1 = A(:,:,1);
[x,y] = size(A1)
figure;imagesc(double(A1))
colormap('gray');
[U,S,V] = svd(double(A1),'econ');
figure;plot(diag(S))
A1_denoise = U(:,1:175)*S(1:175,1:175)*V(:,1:175)';
figure;imagesc(A1_denoise)
colormap('gray');
rank(double(A1)), rank(double(A1_denoise))

orig = x*y

sU = U(:,1:175);
sV = V(:,1:175);
sS = diag(S);
svd_compressed = length(sU(:)) + length(sV(:)) + length(sS(:))
svd_compr_ratio = orig/svd_compressed

[echelon,ind_cols] = rref(A1_denoise);
openvar echelon
rref_compressed = sum(echelon(:)~=0)
rreff_compr_ratio = orig/rref_compressed

% figure;imshow(A1)
% A1_denoise = uint8(A1_denoise);
% figure;imshow(A1_denoise)
% rank(double(A1)), rank(double(A1_denoise))

%% Special topic: RMSD superposition of two conformations of the same 
%  structure of a biomolecule.

close all
clear, clc
trj_mat = dlmread('../DATABASE/md_trj_matrix.txt');
[nframes,ncoords] = size(trj_mat);
n = ncoords/3;

% First structure
A = trj_mat(1,:);
A_x = A(1:3:end);
A_y = A(2:3:end);
A_z = A(3:3:end);
A = [A_x;A_y;A_z];

% Notice that the sum of the elements in each 1-dimensional vector is the
% same as the trace of the 3-D matrix representation
trj_mat(1,:)*trj_mat(1,:)'
trace(A'*A)
trace(A*A')

% Last structure
B = trj_mat(end,:);
B_x = B(1:3:end);
B_y = B(2:3:end);
B_z = B(3:3:end);
B = [B_x;B_y;B_z];
B_orig = B;

% AB_structure = figure;
% plot3(A(1,:),A(2,:),A(3,:),'-bo','Linewidth',1.0,...
% 'MarkerEdgeColor','r','MarkerFaceColor','y')
% box('on'); grid('on')
% xlabel('X coord')
% ylabel('Y coord')
% zlabel('Z coord')
% 
% hold on
% plot3(B(1,:),B(2,:),B(3,:),'-mo','Linewidth',1.0,...
% 'MarkerEdgeColor','m','MarkerFaceColor','c')

%%
% Translation
trans_vec = [12 -24 -9]';
A = A + trans_vec(:,ones(1,n));

% Rotation
psi = 30;
theta = 210;
phi = 55;

Q1 = [1 0 0;0 cosd(phi) sind(phi);0 -sind(phi) cosd(phi)];
Q2 = [cosd(theta) 0 -sind(theta);0 1 0;sind(theta) 0 cosd(theta)];
Q3 = [cosd(psi) sind(psi) 0;-sind(psi) cosd(psi) 0;0 0 1];

% Q_for = Q3*Q2*Q1;

% We take the transpose of each matrix if we want to rotate the actual
% structure, rather than the frame
Q_for = Q1'*Q2'*Q3';

A = Q_for*A;
A_orig = A;

ABrot_structure = figure;
plot3(A(1,:),A(2,:),A(3,:),'-bo','Linewidth',1.0,...
'MarkerEdgeColor','r','MarkerFaceColor','y')
box('on'); grid('on')
xlabel('X coord')
ylabel('Y coord')
zlabel('Z coord')

hold on
plot3(B(1,:),B(2,:),B(3,:),'-mo','Linewidth',1.0,...
'MarkerEdgeColor','m','MarkerFaceColor','c')

Diff = A - B ;     
rmsd_orig = sqrt((Diff(:)'*Diff(:))/n) 

%% SVD solution

mA = mean(A,2);          % the centroid of A
mB = mean(B,2);          % the centroid of B
A = A - mA(:,ones(1,n));      % translating A to center the origin
B = B - mB(:,ones(1,n));      % translating B to center the origin

D = 3;
C = A*B';                
[U,S,V] = svd(C) ;   % singular value decomposition
I = eye(D) ;

% if (det(U*V') < 0)   % numerically more stable than det(U*S*V') = det(C) < 0
%                      % same as det(Q) = det(V*U')  
%     I(D,D) = -1 ;
% end

I(D,D) = sign(det(V*U'));

Q = V*I*U' ;

r = mB - Q*mA ;

Diff1 = Q*A - B ;     

Arot = Q*A;

ABrot_structure = figure;
plot3(Arot(1,:),Arot(2,:),Arot(3,:),'-bo','Linewidth',1.0,...
'MarkerEdgeColor','r','MarkerFaceColor','y')
box('on'); grid('on')
xlabel('X coord')
ylabel('Y coord')
zlabel('Z coord')

hold on
plot3(B(1,:),B(2,:),B(3,:),'-mo','Linewidth',1.0,...
'MarkerEdgeColor','m','MarkerFaceColor','c')

lrmsd = sqrt((Diff1(:)'*Diff1(:))/n) 

%% Check
Arot2 = Q*A_orig + r(:,ones(1,n));
Diff2 = Arot2 - B_orig ;     
lrmsd2 = sqrt((Diff2(:)'*Diff2(:))/n) 

ABrot2_structure = figure;
plot3(Arot2(1,:),Arot2(2,:),Arot2(3,:),'-bo','Linewidth',1.0,...
'MarkerEdgeColor','r','MarkerFaceColor','y')
box('on'); grid('on')
xlabel('X coord')
ylabel('Y coord')
zlabel('Z coord')

hold on
plot3(B_orig(1,:),B_orig(2,:),B_orig(3,:),'-mo','Linewidth',1.0,...
'MarkerEdgeColor','m','MarkerFaceColor','c')

%% Solution using 'Kabsch.m' script by Ehud Schreiber (MATLAB Exchange)

[Q_k,r_k,lrmsd_k] = Kabsch(A_orig,B_orig)


%% Partial Least Squares Regression and Principal Components Regression
% This example shows how to apply Partial Least Squares Regression (PLSR)
% and Principal Components Regression (PCR), and discusses the
% effectiveness of the two methods.  PLSR and PCR are both methods to model
% a response variable when there are a large number of predictor variables,
% and those predictors are highly correlated or even collinear.  Both
% methods construct new predictor variables, known as components, as linear
% combinations of the original predictor variables, but they construct
% those components in different ways.  PCR creates components to explain
% the observed variability in the predictor variables, without considering
% the response variable at all. On the other hand, PLSR does take the
% response variable into account, and therefore often leads to models that
% are able to fit the response variable with fewer components.  Whether or
% not that ultimately translates into a more parsimonious model, in terms
% of its practical use, depends on the context.
%

%% Loading the Data
% Load a data set comprising spectral intensities of 60 samples of gasoline at
% 401 wavelengths, and their octane ratings.  These data are described in
% Kalivas, John H., "Two Data Sets of Near Infrared Spectra," Chemometrics and
% Intelligent Laboratory Systems, v.37 (1997) pp.255-259.
clear, clc
load spectra
whos NIR octane

%%
[~,h] = sort(octane);
oldorder = get(gcf,'DefaultAxesColorOrder');
set(gcf,'DefaultAxesColorOrder',jet(60));
plot3(repmat(1:401,60,1)',repmat(octane(h),1,401)',NIR(h,:)');
set(gcf,'DefaultAxesColorOrder',oldorder);
xlabel('Wavelength Index'); ylabel('Octane'); axis('tight');
grid on

%% Fitting the data with 10 PCA components
X = NIR; % Independent variables are the different columns (wavelengths)
Y = octane;
[n,p] = size(X);

%%
% The first step is to perform Principal Components Analysis on |X|, using
% the |pca| function, and retaining two principal components. PCR is then
% just a linear regression of the response variable on those two
% components.
[PCALoadings,PCAScores,PCAVar] = pca(X,'Economy',false);
% Same as:
% [U,S,V] = svd((X-repmat(mean(X),60,1)))

% Remember that the PCALoadings are an orthogonal basis for the row space
% (the spectra) of the centered X matrix. Notice that with the convention
% used, in which the variables are the different columns of X, we have: 
% X = Scores * Loading^T, and we derive Scores = X * Loadings. Therefore:
PCAScores = (X-repmat(mean(X),60,1))*PCALoadings;

% The following figure shows the fraction of variance in X explained by the
% different principal components used in the PCR.
plot(1:10,100*cumsum(PCAVar(1:10))/sum(PCAVar(1:10)),'r-^');
xlabel('Number of Principal Components');
ylabel('Percent Variance Explained in X');
legend({'PCR'},'location','SE');
grid on

% Based on this result we decide to use only 4 components in the
% regression:
betaPCR = regress(Y-mean(Y), PCAScores(:,1:4));

% same as:
betaPCR = inv(PCAScores(:,1:4)'*PCAScores(:,1:4))*PCAScores(:,1:4)'*(Y-mean(Y));
betaPCR = (PCAScores(:,1:4)'*PCAScores(:,1:4))\PCAScores(:,1:4)'*(Y-mean(Y));

%%
% To make the PCR results easier to interpret in terms of the original
% spectral data, we bring back the regression coefficients into standard
% space.
betaPCR = PCALoadings(:,1:4)*betaPCR;
yfitPCR = X*betaPCR;

% Plot fitted vs. observed response for the PCR fits.
plot(Y,yfitPCR,'r^');
xlabel('Observed Response');
ylabel('Fitted Response');
legend({'PCR with 4 Components'},'location','NW');
grid on

% However, the fitted response shows a clear offset. We can correct this by
% adding a constant term to the fit. Since y = b + ax --> b = y - ax,
% accordingly we calculate the constant regression coefficient as:
betaPCR = [mean(Y) - mean(X)*betaPCR; betaPCR];
% We have a total of 402 regression coefficients:
yfitPCR = [ones(n,1) X]*betaPCR;

%%
% Plot fitted vs. observed response for the PCR fits.
figure
plot(Y,yfitPCR,'r^');
xlabel('Observed Response');
ylabel('Fitted Response');
legend({'PCR with 4 Components'},'location','NW');
grid on

%% Using pseudoinverse
% An alternative way to rationalize the PCR is to use the pseudoinverse
% after deriving a PCA reduced data set with only 4 components:
meanX = mean(X);
cX = X - repmat(meanX,60,1);
[coeff,score,latent] = pca(X);
rX = score(:,1:4)*coeff(:,1:4)' + repmat(meanX,60,1)

% Here we use the SVD explicitly:
[U,S,V] = svd(cX,'econ');
rX = U(:,1:4)*S(1:4,1:4)*V(:,1:4)' + repmat(meanX,60,1);

% Since the rank of rX and therefore [ones(n,1) rX] is 5 (4 from the svd
% and 1 from re-adding the mean) we can't use the normal equation, but the
% pseudoinverse gives the shortest least squares solution:
betaPCR_mp = pinv([ones(n,1) rX])*Y

% or alternatively
% betaPCR_mp = pinv(rX)*Y
% beta0 = mean(Y) - mean(rX)*betaPCR_mp;
% betaPCR_mp = [beta0;betaPCR_mp];

% Again, we have a total of 402 regression coefficients:
yfitPCR_mp = [ones(n,1) rX]*betaPCR_mp;

% We plot fitted vs. observed response for the PCR fit:
figure
plot(Y,yfitPCR_mp,'r^');
xlabel('Observed Response');
ylabel('Fitted Response');
legend({'PCR with 4 Components'},'location','NW');
grid on
hold on
% The fitted response obtained using the pseudoinverse is identical to that
% derived previously, but the norm of the psudoinverve solution for the
% regression coefficients is smaller.
norm(betaPCR)
norm(betaPCR_mp)

% Finally we calculate the r-squared value for the PCR regression:
SST = sum((Y-mean(Y)).^2); % Total variance
SSE_PCR = sum((Y-yfitPCR_mp).^2);
rsquaredPCR = 1 - SSE_PCR/SST

%% PCR versus MLR
% We can also compare the PCR regression result with that of a simple
% multiple linear regression (MLR)
betaMR1 = pinv([ones(n,1) X])*Y
betaMR2 = [ones(n,1) X]\Y
% Again, we have a total of 402 regression coefficients:
yfitMR1 = [ones(n,1) X]*betaMR1;
yfitMR2 = [ones(n,1) X]*betaMR2;

% We plot fitted vs. observed response for the PCR fit:
figure
plot(Y,yfitPCR_mp,'r^',Y,yfitMR1,'bo',Y,yfitMR2,'ms');
xlabel('Observed Response');
ylabel('Fitted Response');
legend({'PCR with 4 Components','All the data pinv','All the data direct'},'location','NW');
grid on
hold on

% Here we plot the regression coefficients
figure 
plot(1:401,[betaPCR(2:end) betaPCR_mp(2:end) betaMR1(2:end) betaMR2(2:end)])
figure 
plot(1:401,[betaPCR(2:end) betaPCR_mp(2:end) ])

%% Cross-validation for PCR
% We note here that, ideally, the choice of the number of components should
% be based on the goal of minimizing the expected error when predicting the
% response from future observations on the predictor variables.
% Cross-validation is the statistically most sound method for choosing the
% number of components in PCR. It avoids overfitting data by not reusing
% the same data to both fit a model and to estimate prediction error.

% For PCR, |crossval| combined with a simple function to compute the sum of
% squared errors for PCR, can estimate the mean squared error (MSEP), using
% 10-fold cross-validation.
close all
PCRmsep = sum(crossval(@pcrsse,X,Y,'KFold',10),1) / n;

% The MSEP curve for PCR confirms that four components are necessary to get
% the same prediction accuracy.
plot(0:10,PCRmsep,'r-^');
xlabel('Number of components');
ylabel('Estimated Mean Squared Prediction Error');
legend({'PCR'},'location','NE');
grid on

% In fact, the second component in PCR _increases_ the prediction error
% of the model, suggesting that the combination of predictor variables
% contained in that component is not strongly correlated with |y|.  Again,
% that's because PCR constructs components to explain variation in |X|, not
% |y|.

%% PLS with NIPALS
% addpath(genpath('TOOLBOXES/pls'))
% addpath(genpath('TOOLBOXES/EPLS'))
% model = PLS(X,Y,4)
% yfitPLS4 = X*model.theta;
% SST = sum((Y-mean(Y)).^2);
% SSE_PLS4 = sum((Y-yfitPLS4).^2);
% rsquaredPLS4 = 1 - SSE_PLS4/SST
% 
% % We plot fitted vs. observed response for the PLS1 fit:
% plot(Y,yfitPLS4,'bo');
% xlabel('Observed Response');
% ylabel('Fitted Response');
% legend({'PCR with 4 Components' 'PLS1 with 4 Components'},'location','NW');
% grid on

%% Fitting the Data with 10 PLS Components
% Use the |plsregress| function to fit a PLSR model with 10 PLS components
% and one response.
[P,Q,T,U,betaPLS10,PctVarPLS10,MSE,stats] = ...
    plsregress(X,Y,10);
%
meanX = mean(X);
meanY = mean(Y);
cX = X - repmat(mean(X),n,1)
cY = Y - repmat(mean(Y),n,1)

% the Xscores are derived as a linear combination of the columns of cX, the
% centered X:
W = stats.W;
Xscores = cX*W;

% We want to find the coefficients of the linear combination of the columns
% of Xscores (the PLS components) that produces both X0 and y0:
P_t = T\cX;
Q_t = T\cY;

% The residuals E and F are now easily calculated:
Xres = cX - T*P'
Yres = cY - T*Q'

% Tthe matrix of Yscores, the responses that have maximal covariance with
% each of the PLS components Xscores, is obtained as:
U = cY*Q;
U = cY*cY'*T;

% The PLS regression coefficients are finally derived adding a constant
% term
beta = W*Q';
beta = W*(T\cY)
beta_pls = [meanY - meanX*beta; beta];

%%
% One quick way to choose the number of components is to plot the percent
% of variance explained in the predictor and response variables as a
% function of the number of components.
yyaxis left
plot(1:10,cumsum(100*PctVarPLS10(2,:)),'-bo');
xlabel('Number of PLS components');
ylabel('Percent Variance Explained in Y');
yyaxis right
plot(1:10,cumsum(100*PctVarPLS10(1,:)),'-ro');
xlabel('Number of PLS components');
ylabel('Percent Variance Explained in X');
grid on

%
% The above plot suggests that PLSR with 3-4 components explains most of the
% variance in the observed [x] and |y|.

%% Choosing the Number of Components with Cross-Validation
% It's often useful to choose the number of components to minimize the
% expected error when predicting the response from future observations on
% the predictor variables.  Simply using a large number of components will
% do a good job in fitting the current observed data, but is a strategy
% that leads to overfitting.  Fitting the current data too well results in
% a model that does not generalize well to other data, and gives an
% overly-optimistic estimate of the expected error.
%
% Cross-validation is a more statistically sound method for choosing the
% number of components in either PLSR or PCR.  It avoids overfitting data
% by not reusing the same data to both fit a model and to estimate
% prediction error. Thus, the estimate of prediction error is not
% optimistically biased downwards.
%
% |plsregress| has an option to estimate the mean squared prediction error
% (MSEP) by cross-validation, in this case using 10-fold C-V.

[P,Q,T,U,beta,pctVar,PLSmsep] = plsregress(X,Y,10,'CV',10);

% The MSEP curve for PLSR indicates that three components does about as
% good a job as possible.  On the other hand, PCR needs four components to
% get the same prediction accuracy.
plot(0:10,PLSmsep(2,:),'b-o',0:10,PCRmsep,'r-^');
xlabel('Number of components');
ylabel('Estimated Mean Squared Prediction Error');
legend({'PLSR' 'PCR'},'location','NE');
grid on

%% PCR versus PLSR
[P,Q,T,U,betaPLS4] = plsregress(X,Y,4);
yfitPLS4 = [ones(n,1) X]*betaPLS4;

SSE_PLS4 = sum((Y-yfitPLS4).^2);
rsquaredPLS4 = 1 - SSE_PLS4/SST

%%
[P,Q,T,U,betaPLS3] = plsregress(X,Y,3);
yfitPLS3 = [ones(n,1) X]*betaPLS3;

SSE_PLS3 = sum((Y-yfitPLS3).^2);
rsquaredPLS3 = 1 - SSE_PLS3/SST

% We plot fitted vs. observed response for the PLS fit:
PCR_vs_PLS = figure
set(PCR_vs_PLS,'Unit','Normalized','Position',[0.4 0.4 0.4 0.6])
plot(Y,yfitPCR,'LineStyle','none','Marker','o','MarkerSize',10,'Color','k')
hold on
plot(Y,yfitPLS3,'LineStyle','none','Marker','s','MarkerSize',10,'Color','b')
plot(Y,yfitPLS4,'LineStyle','none','Marker','^','MarkerSize',10,'Color','r')
xlabel('Observed Response');
ylabel('Fitted Response');
legend({['PCR with 4 Components: r^2 = ' num2str(rsquaredPCR)] ...
    ['PLS SIMPLS with 3 Components: r^2 = ' num2str(rsquaredPLS3)] ...
    ['PLS SIMPLS with 4 Components: r^2 = ' num2str(rsquaredPLS4)]},...
    'location','NW');
grid on
axis equal

%% PCR vs PLSR Regression coefficients
PCR_vs_PLS_beta = figure
figure;
plot(1:401,betaPCR(2:end),'--b',1:401,betaPLS3(2:end),'-r');
xlabel('Variable');
ylabel('Beta');
xlim([0 401])
legend({'PCR' 'PLS3' },'location','NW');
grid on

%% Model Parsimony
% So if PCR requires four components to get the same prediction accuracy as
% PLSR with three components, is the PLSR model more parsimonious?  That
% depends on what aspect of the model you consider.
%
% The PLS weights are the linear combinations of the original variables
% that define the PLS components, i.e., they describe how strongly each
% component in the PLSR depends on the original variables, and in what
% direction.
[P,Q,T,U,beta,pctVar,mse,stats] = plsregress(X,Y,3);
plot(1:401,stats.W,'-');
xlim([0 401])
xlabel('Variable');
ylabel('PLS Weight');
legend({'1st Component' '2nd Component' '3rd Component'},  ...
	'location','NW');
%%
% The loadings P's are a basis for the row space of X, that is, the
% individual spectral components whose linear combination determined by the
% scores T produces the spectra in X:
PCR_vs_PLS_loadings = figure;
set(PCR_vs_PLS_loadings,'Unit','Normalized','Position',[0.4 0.3 0.3 0.7])
subplot(2,1,1)
plot(1:401,PCALoadings(:,1:4),'-');
xlabel('Variable');
ylabel('PCA Loading');
legend({'1st Component' '2nd Component' '3rd Component'  ...
	'4th Component'},'location','NW');
xlim([0 401]);grid on

subplot(2,1,2)
plot(1:401,P(:,1:3),'-');
xlabel('Variable');
ylabel('X PLS Loading');
legend({'1st Component' '2nd Component' '3rd Component'},...
    'location','NW');
xlim([0 401]);grid on

