% Copyright (c) 2017, Domenico L. Gatti
% All rights reserved.
% 
% Redistribution and use in source and binary forms, with or without 
% modification, are permitted provided that the following conditions are 
% met:
% 
%     * Redistributions of source code must retain the above copyright 
%       notice, this list of conditions and the following disclaimer.
%     * Redistributions in binary form must reproduce the above copyright 
%       notice, this list of conditions and the following disclaimer in 
%       the documentation and/or other materials provided with the 
%       distribution
%       
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
% IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
% CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
% EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
%% General dependencies
% We always start from the CODE directory and we add to the path
% subdirectories containing various tools described in the book chapters.
addpath(genpath('../GENERAL_SCRIPTS_FUNCTIONS'));
addpath(genpath('../DATABASE'));

%% CHAPTER 21: Non-negative matrix factorizazion and Tensor decomposition.
% The examples used in this practice are taken from the Tutorial
% "Interactive introduction to multi-way analysis in MATLAB" contained in
% the N-way Toolbox developed by Rasmus Bro:
% (https://www.mathworks.com/matlabcentral/fileexchange/1088-the-n-way-toolbox)
% After installing the N-way toolbox in your TOOLBOXES directory (typically
% as 'nway331'), add it to your path using 'addpath'. For example:
addpath(genpath('../TOOLBOXES/nway331'));

% For same operation you also need to remove from the path other toolboxes
% the allow operations with tensors. For example:
rmpath(genpath('../TOOLBOXES/tensorlab_2016-03-28'))
rmpath(genpath('../TOOLBOXES/tensor_toolbox'))

%
clear, clc, close all

% Example 1: spectral data contains fluorescence excitation/emission data
% from five samples containing tryptophan, phenylalanine, and tyrosine.
load ../TOOLBOXES/nway331/claus
[nsamples,n_em_wl,n_ex_wl] = size(X);

% Plot landscape from all 5 samples. 
AA_mixtures = figure;
set(gcf,'Unit','Normalized','Position', [0.1 0 0.25 1])
for i = 1:nsamples
subplot(nsamples,1,i),mesh(EmAx,ExAx,reshape(X(i,:),201,61)')
zlabel('Intensity')
axis tight;box on
title(['Sample ' num2str(i)])
end
xlabel('Emission wavelength')
ylabel('Excitation wavelength')

% Manage data,reduce the size of X for easier computations.
Xnew = X(:,1:6:end,1:3:end);
subplot(3,2,6),mesh(EmAx(1:6:end),ExAx(1:3:end),reshape(Xnew(2,:),34,21)')
axis tight

% For these data it is known that a three-component PARAFAC model should be
% adequate since there are three amino acids in the samples and each can
% ideally be described by one PARAFAC component.

[Factors] = parafac(X,3);
[A,B,C] = fac2let(Factors);

% Tensorlab
% [Factors_TL,output] = cpd(X,3);
% size_tens = [5 201 61]; R = 3;
% [Factors_TL,output] = cpd_als(X,cpd_rnd(size_tens,R))

% The output Factors is actually a cell array (type help cell) of size 1x3.
% Cell element one holds A, element two B and element three C and you can
% access those directly e.g. by typing A = Factors{1}. 

% The scores/loadings in A are the sample mode loadings, the loadings in B
% are the emission mode loadings and the loadings in C are the excitation
% mode loadings. For example, use plot(B) or plot(EmAx,B) to investigate
% the emission mode loadings. Use plot(ExAx,C) to plot the excitation mode
% loadings. 
plot(EmAx,B)
plot(ExAx,C)
% We can also plot all loadings by at once:
plotfac(Factors)

%%
% From the estimated loadings it is possible to estimate the model of the
% data slice by slice. For example the 3rd frontal slice (mode 1) is
% obtained as:
S1_3 = A*diag(C(3,:))*B';
% and the 5th mode 3 slice is obtained as:
S3_5 = B*diag(A(5,:))*C';

% We can matricize the model along 3 different modes using the Kathri-Rao
% product:
M1 = A*kr(C,B)'; % Column fibers provide the columns of M1
M2 = B*kr(C,A)'; % Row fibers provide the columns of M2
M3 = C*kr(B,A)'; % Tube fibers provide the columns of M3

% We reconstruct the 3-way array reshaping:
M = reshape(M1,[5 201 61]);

% Alternatively we simply use the function nmodel:
M = nmodel(Factors); 

% Residual in the fit can be obtained directly:
R = X(:)-M(:);
sse = R'*R

% or using a mofied syntax in tha parafac function:
[Factors,it,sse] = parafac(X,3);

% We can also repeat the fit using orthogonality and/or non-negativity
% constraints:

% Orthogonality constraints among the columns of each factor matrix:
[Factors_orth,it_orth,sse_orth] = parafac(X,3,[],[0 1 1]);
sse_orth
Factors_orth{1}'*Factors_orth{1}
Factors_orth{2}'*Factors_orth{2}
Factors_orth{3}'*Factors_orth{3}

plot(EmAx,B)
plot(ExAx,C)

% Non-negativity constraints on the columns of each factor matrix:
[Factors_nneg,it_nneg,sse_nneg] = parafac(X,3,[],[2 2 2]);
sse_nneg

claus_array_2 = figure;
set(gcf,'Units','Normalized','Position',[0.2 0.4 0.8 0.4])
subplot(1,3,1)
bar(Factors_nneg{1}')
xlabel('Analytes')
ylabel('Concentration (arbitrary units)')
grid on
subplot(1,3,2)
plot(EmAx,B)
xlabel('Emission wavelength')
ylabel('Intensity')
grid on
subplot(1,3,3)
plot(ExAx,C)
xlabel('Excitation wavelength')
ylabel('Quantum yield')
grid on

%% Solving the same problem with the Tensorlab Toolbox
rmpath(genpath('../TOOLBOXES/nway331'));
addpath(genpath('../TOOLBOXES/tensorlab_2016-03-28'))
% rmpath(genpath('TOOLBOXES/tensor_toolbox'))

% Nonnegative CPD.
I1 = 5;
I2 = 201;
I3 = 61;
R = 3;

% We create a structure model which defines the variables of the SDF
% problem. The 'variables' field defines the parameters
% which are optimized, and is also used as initialization for the SDF
% algorithm.

% Define model variables.
model.variables.u1 = randn(I1,R);
model.variables.u2 = randn(I2,R);
model.variables.u3 = randn(I3,R);

% Here, the variable u is defined as a MATLAB array. It is also perfectly
% valid to define variables as (nested) cell arrays of arrays, if desired.
% Now we need to define the factors. There are three factors in this CPD,
% which we  define as a transformation of the variables u1, u2, u3. We
% require the factor to be nonnegative.

% Define model factors as transformed variables.
model.factors.U1 = {'u1',@struct_nonneg}; 
model.factors.U2 = {'u2',@struct_nonneg}; 
model.factors.U3 = {'u3',@struct_nonneg};


% Finally, we define the data set to be factorized and which factors to
% use. Each factorization in the SDF problem should be given a new name. In
% this case there is only one factorization 'fact_1' and it contains two
% fields. The first is 'data' and contains the tensor to be factorized. The
% second should be either 'cpd' or 'btd', depending on which model to use,
% and should define the factors to be used in the decomposition. Note that
% it is not necessary to use fields to describe the names of the variables
% and factors. Instead, one may also create cell arrays of variables and
% factors and use indices to refer to them.

% Define model factorizations.
model.factorizations.fact_1.data = X;
model.factorizations.fact_1.cpd = {'U1','U2','U3'};

% Equivalent SDF model without using names for variables and factors.
% model.variables = { randn(I1,R) randn(I2,R) randn(I3,R)};
% model.factors = { {1,@struct_nonneg} {2,@struct_nonneg} {3,@struct_nonneg} };
% model.factorizations.fact_1.data = X;
% model.factorizations.fact_1.cpd = {1,2,3};

% The model can now be solved with one of the two families of algorithms
% for SDF problems: sdf_minf and sdf_nls. In the case of many missing
% entries, the sdf_minf family is likely to perform best. Their first
% output contains the optimized variables and factors in the fields
% 'variables' and 'factors', respectively.

% Solve the SDF problem.
options.Display = 5; % View convergence progress every 5 iterations.
sol = sdf_nls(model,options);
sol.variables
sol.factors

CPD_aa_mixture = figure;
set(gcf,'Unit','Normalized','Position',[0.2 0.6 0.5 0.3])
subplot(1,3,1)
bar(sol.factors.U1')
xlabel('Analytes');
ylabel('Concentration (arbitrary units)')
grid on
subplot(1,3,2)
plot(EmAx,sol.factors.U2)
xlabel('Emission wavelength')
ylabel('Intensity')
grid on
subplot(1,3,3)
plot(ExAx,sol.factors.U3)
xlabel('Excitation wavelength')
ylabel('Quantum yield')
grid on

U_hat = {sol.factors.U1 sol.factors.U2 sol.factors.U3}
T_hat = cpdgen(U_hat)
res_t = X(:)-T_hat(:);
sse_t = res_t'*res_t

%% 
clear, clc, close all
addpath(genpath('../TOOLBOXES/nway331'));

% Example 2: Here we compare a PCA to a N-way analysis of a data set
% consisting of the assessment by 8 judges of 10 breads with respect to 11
% attributes. The data are in the matrix X. Samples are in duplicate.
load ../TOOLBOXES/nway331/brod
size(X)

% Here we center along the observations (mode 1 = breads)
Cent  = [1 0 0];
Scal  = [0 0 0];
Xmean = nprocess(X,Cent,Scal);

% Before you can analyze the data with PCA, they must be rearranged into a
% matrix
Xmean = reshape(Xmean,10,88);
size(Xmean)

% PCA: Use truncated SVD to estimate the PCA model. 
[U,S,V] = svd(Xmean,0);
T = U(:,1:2)*S(1:2,1:2);    % Score matrix
P = V(:,1:2);               % loading matrix
Xmean_pca = T*P'
e_pca = Xmean(:) - Xmean_pca(:);
sse_pca = e_pca'*e_pca

% Here we fit a 2 component parafac model
Xmean_tens = nprocess(X,Cent,Scal);
[Factors,it,sse_parafac] = parafac(Xmean_tens,2,[],[0 0 0]);
A = Factors{1};
B = Factors{2};
C = Factors{3};

% Here we plot the two dimensions (the latent variables) of the loading
% matrix for the attributes against each other, and the scores from both
% PCA and PARAFAC:

Loading_Scores_plot = figure
set(gcf,'Unit','Normalized','Position', [0.1 0 0.25 0.9])

subplot(2,2,1:2)
plot(B(:,1),B(:,2),'sr','MarkerSize',10,'LineWidth',1.5)
for i = 1:size(B,1)
   text(B(i,1)-0.035,B(i,2)-0.04,attrib(i,:),'FontSize',14,'Color','blue')
end 
grid on
xlabel('Latent Variable 1')
ylabel('Latent Variable 2')
title('Loadings')

subplot(2,2,3)
plot(T(:,1),T(:,2),'or','MarkerSize',25,'LineWidth',2)
for i = 1:size(T,1)
  text(T(i,1)-0.3,T(i,2),num2str(i),'FontSize',14,'Color','blue')
end 
grid on
xlabel('Score: Latent Variable 1')
ylabel('Score: Latent Variable 2')
title('PCA')
subplot(2,2,4)
plot(-A(:,1),A(:,2),'or','MarkerSize',25,'LineWidth',2)
for i = 1:size(A,1)
  text(-A(i,1)-0.3,A(i,2),num2str(i),'FontSize',14,'Color','blue')
end 
grid on
xlabel('Score: Latent Variable 1')
title('PARAFAC')

%% TUCKER decomposition
%% Example 1
clear, clc

%Loads the data array and R
load howto2   

% Calculate and TUCKER1 models:
W2 = [3 -1 -1]
[Factors,G,SSE] = tucker(X,W2); 
W2 = [-1 -1 2]
[Factors,G,SSE] = tucker(X,W2); 
W2 = [-1 3 -1]
[Factors,G,SSE] = tucker(X,W2); 

% Calculate TUCKER2 models:
W2 = [3 3 -1]
[Factors2,G2,SSE2] = tucker(X,W2);

%Estimate the model from the calculated solutions
Xm2 = nmodel(Factors2,G2);

%Calculate the corresponding three-way residuals
Xres2 = X-Xm2;  
for k=1:size(X,1)
    mesh(squeeze(Xres2(k,:,:)));
    title(['Sample num.' int2str(k) '/' int2str(size(X,1))]);
    pause;
end
close

% Other Tucker2 models.
W2 = [3 -1 2]
[Factors,G,SSE] = tucker(X,W2); 
W2 = [-1 3 2]
[Factors,G,SSE] = tucker(X,W2); 

% Examine an ordinary Tucker3 model. For example, calculate a (3,3,2) Tucker3 model 
W3=[3 3 2];
[Factors3,G3,SSE3] = tucker(X,W3);  

%Estimate the model from the calculated solutions 
Xm3 = nmodel(Factors3,G3);

% Calculate the corresponding three-way residuals
Xres3 = X-Xm3;  
for k=1:size(X,1)
    mesh(squeeze(Xres3(k,:,:)));
    title(['Sample num. ', int2str(k),'/', int2str(size(X,1))]);
    pause;
end
close

% Here we compare the residuals of Tucker2 and Tucker3:
Xres2(:)'*Xres2(:)
Xres3(:)'*Xres3(:)

%% Example 2
clear, clc

% loads the data array X and W 
load howto1 

% Calculate the TUCKER3 model
[Factors,G,SSE] = tucker(X,W); 

% Automated way to plot the results in the 'Factors' directly 
plotfac(Factors) 

% Automated way to find the 5 most important factor combinations from 'G'  
explcore(G,5); 

%% Factor analysis
clear, clc, close all

% Load X and W:
load howto5 

% Make the Tucker model:
[Factors,Go] = tucker(X,W); 

% Convert to component matrices:
[A B C] = fac2let(Factors); 

% Rotate to optimum diagonality:
[Gd,Od1,Od2,Od3] = maxdia3(Go); 

% Rotate to optimum variance-of-squares:
[Gv,Ov1,Ov2,Ov3] = maxvar3(Go); 

% Inspect the unrotated solution:
explcore(Go,7); 

% Inspect the diagonalized solution:
explcore(Gd,7); 

% Inspect the variance-of-squares optimized solution:
explcore(Gv,7); 

% Reshape X and G to unfolded matrices:
Xunf = reshape(X,size(X,1),size(X,2)*size(X,3));

% We have the following three cores.

Go
Gounf = reshape(Go,size(Go,1),size(Go,2)*size(Go,3))

% In Go the variation is dispersed over many elements, and it is hard to
% see what elements are significant and how many should be accounted for in
% the interpretation.

Gd
Gdunf = reshape(Gd,size(Go,1),size(Go,2)*size(Go,3))

% In Gd, we verify that the diagonal elements have increased. However, note
% the large off-diagonal elements. More importantly, there is no clear cut
% between high important factor combinations and unimportants ones. There
% is a smooth and unclear transition between significant and
% non-significant core elements.

Gv
Gvunf = reshape(Gv,size(Go,1),size(Go,2)*size(Go,3))

% In Gv, we can identify 5 clearly significant elements. And the gap
% between significant and non-significant core elements is  between 18.75
% and 5.39. The optimization of variance-of-squares re-enforces the
% significant elements found in the initial core, yielding a core that is
% easier to interpret.

% Find the error:
sum(sum( (Xunf - A*Gounf*kron(C',B')).^2 )) 
sum(sum( (Xunf - (A*Od1)*Gdunf*kron((C*Od3)',(B*Od2)')).^2 )) 
sum(sum( (Xunf - (A*Ov1)*Gvunf*kron((C*Ov3)',(B*Ov2)')).^2 )) 

% If you have a model with A, B, C and G and want to predict the
% scores Anew for a new sample, do the following:
g = reshape(Go,size(Go,1),size(Go,2)*size(Go,3)); 

% Unfold G:
Z = g*kron(C,B)';

% Now unfold the data used to fit the model:
x = reshape(X,size(X,1),size(X,2)*size(X,3));

% then to verify, you should find that A_recov equals A: 
A_recov = x*pinv(Z);
A

% To fit new samples simply do:
% Anew = xnew*pinv(Z);

% Find the optimal Tucker factorization:
mlr = mlrankest(X)

% Here we explore possible/valid dimensionalities R = (1,1,1),
% (2,2,1),...,(4,4,4) to find the optimal dimensionality of the model of X.
% It's important to remember that the max. number of factors to extract
% cannot be higher than the product of the two lower. We have an optimal
% dimensionality when the increase in the complexity of the model no longer
% increases the fit of the model significantly. 
p = 0;
Rmat = zeros(1000,3);
SSE = zeros(1000,1);
FIT1 = zeros(1000,1);
FIT2 = zeros(1000,1);

for r1 = 1:10
    for r2 = 1:10
        for r3 = 1:10
            R = [r1 r2 r3];
            if prod(R)/max(R)>=max(R)
                p = p+1
                Rmat(p,:) = R;
                [Factors1,G1,SSE(p)] = tucker(X,R);
                T1 = tmprod(G1,Factors1,1:3);                
                FIT1(p) = 1 - frob(X-T1)/frob(X);
                [Factors2,G2,sv] = mlsvd(X,R);
                T2 = tmprod(G2,Factors2,1:3);
                FIT2(p) = 1 - frob(X-T2)/frob(X);
            end
        end
    end
end

MLR_scan = figure;
set(gcf,'unit','Normalized','Position',[0.2 0.6 0.5 0.4]);
subplot(1,3,1)
[sorted_SSE ind] = sort(SSE);
% plot(SSE(ind));
plot(sorted_SSE);
title('SSE (Tucker model)');
xlabel('Model dimensionality (sorted)');
ylabel('Explained variation of X');
grid on;

subplot(1,3,2)
[sorted_FIT1 ind1] = sort(FIT1);
% plot(FIT1(ind1));
plot(sorted_FIT1);
title('FIT (Tucker model)');
xlabel('Model dimensionality (sorted)');
ylabel('Explained variation of X');
grid on;

subplot(1,3,3)
[sorted_FIT2 ind2] = sort(FIT2);
% plot(FIT2(ind2));
plot(sorted_FIT2);
title('FIT (MLSVD)');
xlabel('Model dimensionality (sorted)');
ylabel('Explained variation of X');
grid on;

mlr_tucker_sse = Rmat(ind(492),:)
tucker_sse = SSE(ind(492))
mlr_tucker_fit = Rmat(ind(492),:)
tucker_fit = FIT1(ind1(492))

mlr_mlsvd_fit = Rmat(ind2(493),:)
mlsvd_fit = FIT2(ind2(493))


[Factors1,G1] = tucker(X,[3 3 3]);
T1 = tmprod(G1,Factors1,1:3);
FIT_tucker = 1 - frob(X-T1)/frob(X)

[Factors2,G2] = mlsvd(X,[4 3 3]);
T2 = tmprod(G2,Factors2,1:3);
FIT_mlsvd = 1 - frob(X-T2)/frob(X)
