% Copyright (c) 2017, Domenico L. Gatti
% All rights reserved.
% 
% Redistribution and use in source and binary forms, with or without 
% modification, are permitted provided that the following conditions are 
% met:
% 
%     * Redistributions of source code must retain the above copyright 
%       notice, this list of conditions and the following disclaimer.
%     * Redistributions in binary form must reproduce the above copyright 
%       notice, this list of conditions and the following disclaimer in 
%       the documentation and/or other materials provided with the 
%       distribution
%       
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
% IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
% CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
% EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
%% General dependencies
% We always start from the CODE directory and we add to the path
% subdirectories containing various tools described in the book chapters.
addpath(genpath('../GENERAL_SCRIPTS_FUNCTIONS'));
% addpath(genpath('../DATABASE'));
% addpath(genpath('../TOOLBOXES'));

%% CHAPTER 6: Least squares
clear
clc
close all

A = [-6 -15 -7 -1 1 6 0;
      2   5  3  1 1 0 3]'
b = [13 36 14 0 -6 -22 6]'
x = A\b
A'*A*x 
A'*b
p = A/(A'*A)*A'*b
A'*p

%% Simple least squares example.
close all, clear, clc
xvec = [1:7]';
m = length(xvec);
int = 5;
slope = 10;
yvec = int + slope*xvec;

lsq_plot = figure
h(1) = plot(xvec,yvec,'+r','MarkerSize', 20);
hold on
set(gca,'Xlim',[0 8],'Ylim',[0 100]);

sigma_vec = {'constant sigma' 'proportional sigma'};
sigma = sigma_vec(2);

nsamples = 1000;
sigma_y = 0.2*yvec;
mean_sigma_y = mean(sigma_y);

if strcmp(sigma, 'constant sigma')
noise = normrnd(zeros(m,nsamples),mean_sigma_y(ones(m,1),ones(nsamples,1)));
elseif strcmp(sigma,'proportional sigma')
noise = normrnd(zeros(m,nsamples),sigma_y(:,ones(nsamples,1)));
end

yvec_m = yvec(:,ones(nsamples,1)) + noise;

V_noise = cov(noise');
V = cov(yvec_m');

yvec_ind = randi(nsamples);
yvec_n = yvec_m(:,yvec_ind);

h(2) = plot(xvec,yvec_n,'sb','MarkerSize',20);
legend('perfect data', ...
     'data with noise','Location','best')
xlabel('x'),ylabel('y')

%% Ordinary least squares
% Projection matrix onto C(A)
A = [ones(7,1) xvec];
[m,n] = size(A);
P = A*inv(A'*A)*A';
P = A/(A'*A)*A';
P = A*((A'*A)\A');
P';
P = P*P;

p = P*yvec_n;
figure(lsq_plot)
h(3) = plot(xvec,p,'og','MarkerSize', 20);

u = (A'*A)\A'*yvec_n

yvec_ls = A*u;
figure(lsq_plot)
h(4) = plot(xvec,yvec_ls,'-m');
legend('perfect data', 'data with noise','projected data',...
    'least squares solution ','Location','best')

N_at = null(A');
P_ln = N_at*inv(N_at'*N_at)*N_at';
P_ln = N_at/(N_at'*N_at)*N_at';

I = eye(m);
P_ln = I-P;
 
error = P_ln*yvec_n;
figure(lsq_plot)
for i = 1:m
    x = [xvec(i),xvec(i)];
    y = [yvec_n(i),yvec_n(i)-error(i)];    
    h(4+i) = line(x,y,'LineWidth',3,'Color','g');
end
h(12) = plot(xvec,yvec,'--r');
legend({'perfect data','data with noise','projected data', ...
           'least squares solution','errors'},'Location','best')

sse = error'*error
% sse = yvec_n'*P_ln*yvec_n;
mse = sse/(m-n)    
sigma = sqrt(sse/(m-n))
c_yvec = yvec_n - mean(yvec_n);
sst = c_yvec'*c_yvec;

% Rsquare
rsquare = 1-sse/sst
ssr = sst - sse
ssr = (yvec_n-mean(yvec_n))'*(yvec_n-mean(yvec_n)) - error'*error
ssr = (yvec_n-error - mean(yvec_n))'*(yvec_n-error - mean(yvec_n))
ssr = (p - mean(yvec_n))'*(p - mean(yvec_n))
c_p = p - mean(yvec_n);
ssr = c_p'*c_p

rsquare = ssr/sst
rsquare = ((c_yvec/norm(c_yvec))'*((p-mean(p))/norm(p-mean(p))))^2
rsquare = (corr(yvec_n,p))^2

% Adjusted Rsquare
adj_rsquare = 1-sse*(m-1)/(sst*(m-n))

% Regression t and F statistics
% t values
se = sqrt(diag(mse*inv(A'*A)))
t = u(2)/se(2)

% t-statistics
nu = m-n
x = [0:.1:20];
y1 = tpdf(x,n);   
y2 = tcdf(x,n);   

T_statistics = figure;
set(gcf,'Unit','Normalized','Position',[0.2 0.5 0.5 0.4])
subplot(1,2,1)
plot(x,y1,'Color','blu','LineStyle','-.')
xlabel('t-value');ylabel('pdf');grid on
ca = gca;
line([t t],ca.YLim);
subplot(1,2,2)
plot(x,y2,'Color','black','LineStyle','-')
xlabel('t-value');ylabel('cdf');grid on
line([t t],[0 1]);

% p-value
p_t = 1-tcdf(t,nu)
p_t = tcdf(t,nu,'upper')

% F statistics
nu1 = m-1
nu2 = m-n
msr = ssr/(nu1-nu2)
F = ((sst-sse)/(nu1-nu2))/(sse/nu2)
F = msr/mse

x = [0:.1:80];
y1 = fpdf(x,nu1,nu2);   
y2 = fcdf(x,nu1,nu2);   

F_statistics = figure;
set(gcf,'Unit','Normalized','Position',[0.2 0.5 0.5 0.4])
subplot(1,2,1)
plot(x,y1,'Color','blu','LineStyle','-.')
xlabel('F-value');ylabel('pdf');grid on
ca = gca;
line([F F],ca.YLim);
subplot(1,2,2)
plot(x,y2,'Color','black','LineStyle','-')
xlabel('F-value');ylabel('cdf');grid on
line([F F],[0 1]);

p_f = 1-fcdf(F,nu1,nu2)
p_f = fcdf(F,nu1,nu2,'upper')

% QR factorization
[Q,R] = qr(A,0); % 'Economy' QR factorization
A;
Q*R;
Q'*yvec_n;
u_ols_qr = R\Q'*yvec_n

[u_ols_lscov] = lscov(A,yvec_n)       

fit_ols = [sse mse sigma rsquare adj_rsquare]';

%% Weighted least squares
% W = 1./sigma_y;
V_wls = diag(diag(V))

w = 1./sqrt(diag(V));
W = diag(w);
% C = W'*W;
C = diag(1./diag(V));
b = yvec_n;
u_wls = (A'*C*A)\A'*C*b
A_w = W*A;
b_w = W*b;
[Q,R] = qr(A_w,0);
u_wls_qr = R\Q'*b_w
yvec_wls = A*u_wls;

% Fit parameters
error = yvec_n - yvec_wls;
sse = error'*C*error
sse = yvec_n'*(C-C*A/(A'*C*A)*A'*C)*yvec_n
mse = sse/(m-n)
sigma = sqrt(mse)
c_yvec = yvec_n - mean(yvec_n);
sst = c_yvec'*C*c_yvec;

% Rsquare
rsquare = 1-sse/sst

c_p = yvec_wls - mean(yvec_n);
ssr = (c_p)'*C*(c_p);
rsquare = ssr/sst

% Adjusted Rsquare
adj_rsquare = 1-sse*(m-1)/(sst*(m-n))

% The following formula for Rsquared is not applicable in the WLS or GLS
% case:
% rsquare = (corr(yvec_n,yvec_wls))^2

figure(lsq_plot)
h(13) = plot(xvec,yvec_wls,'-c');
legend(h([1:5 13]), {'perfect data','data with noise','projected data', ...
           'OLS solution','errors','WLS solution'},'Location','best')

[u_wls_lscov,sigma_u_wls_lscov,mse_wls_lscov] = lscov(A,yvec_n,V_wls)       

fit_wls = [sse mse sigma rsquare adj_rsquare]';

%% %% Summary OLS, WLS
OLS = fit_ols;
WLS = fit_wls;
param = {'sse'; 'mse'; 'sigma'; 'rsquare'; 'adj_rsquare'};
T = table(OLS,WLS,'RowNames',param)

%% Generalized least squares
C = inv(V);
b = yvec_n;
u_gls = (A'*C*A)\A'*C*b
% Using QR
[Q,R] = qr(A,0);
s = (Q'*C*Q)\Q'*C*b;
t = Q*s;
u_gls_qr = R\Q'*t

yvec_gls = A*u_gls;
figure(lsq_plot)
h(14) = plot(xvec,yvec_gls,'-k');
legend(h([1:5 13:14]), {'perfect data','data with noise','projected data', ...
           'OLS solution','errors','WLS solution','GLS solution'},...
           'Location','best')

error = yvec_n - yvec_gls;
sse = error'*C*error
sse = yvec_n'*(C-C*A/(A'*C*A)*A'*C)*yvec_n
mse = sse/(m-n)
sigma = sqrt(mse)
c_yvec = yvec_n - mean(yvec_n);
sst = c_yvec'*C*c_yvec;

% Rsquare
rsquare = 1-sse/sst

c_p = yvec_gls - mean(yvec_n);
ssr = c_p'*C*c_p;
rsquare = ssr/sst

% The following formula for Rsquared is not applicable in the WLS or
% GLS case:
% rsquare = (corr(yvec_n,yvec_wls))^2

% Adjusted Rsquare
adj_rsquare = 1-sse*(m-1)/(sst*(m-n))

[u_gls_lscov,sigma_u_gls_lscov,mse_gls_lscov] = lscov(A,yvec_n,V)

fit_gls = [sse mse sigma rsquare adj_rsquare]';

%% Summary OLS, WLS, GLS
[fit_ols fit_wls fit_gls]
OLS = fit_ols;
WLS = fit_wls;
GLS = fit_gls;
param = {'sse'; 'mse'; 'sigma'; 'rsquare'; 'adj_rsquare'};
T = table(OLS,WLS,GLS,'RowNames',param)

%% S matrix
V3 = zeros(m,m,nsamples);
S3 = zeros(n,n,nsamples);
C = inv(V);
u_vec = [5 10]';

for i = 1:nsamples
    yvec_n = yvec_m(:,i);

    u_ls = (A'*C*A)\A'*C*yvec_n;

    error_u = u_ls - u_vec;
    error_b = yvec_n - yvec;

    S3(:,:,i) = error_u*error_u';
    V3(:,:,i) = error_b*error_b';

end

E_S3 = mean(S3,3)
E_V3 = mean(V3,3)

%% Multiple experiments       
B = (yvec_m(:,randi(nsamples,10,1)))'
b = mean(B)'
I = eye(size(b,1))

% Parameters for OLS,WLS,GLS
% OLS
C_ols = I;
% WLS
w_wls = var(B,1)
V_wls = diag(var(B,1))
C_wls = inv(V_wls)
% GLS
V_gls = cov(B,1)
C_gls = inv(V_gls)

%% MATLAB built in functions for least squares
% Ordinary Least Squares (OLS)
[u_ols,se_u_ols,scale_ols,S_ols] = lscov(A,b,I)
S_ols_corr = S_ols/scale_ols            % correction to remove scaling
se_u_ols_corr = sqrt(diag(S_ols_corr))  % correction to remove scaling
conf_95_ols = [u_ols-1.96*se_u_ols u_ols+1.96*se_u_ols]

% Weighted Least Squares (WLS)
% [u_wls,se_u_wls,scale_wls,S_wls] = lscov(A,b,w_wls)
% or
[u_wls,se_u_wls,scale_wls,S_wls] = lscov(A,b,V_wls)
S_wls_corr = S_wls/scale_wls                % correction to remove scaling
se_u_wls_corr = sqrt(diag(S_wls_corr))      % correction to remove scaling
conf_95_wls = [u_wls-1.96*se_u_wls u_wls+1.96*se_u_wls]

% Generalized Least Squares (GLS) using lscov
[u_gls,se_u_gls,scale_gls,S_gls] = lscov(A,b,V_gls)
S_gls_corr = S_gls/scale_gls                % correction to remove scaling
se_u_gls_corr = sqrt(diag(S_gls_corr))      % correction to remove scaling
conf_95_gls = [u_gls-1.96*se_u_gls u_gls+1.96*se_u_gls]

% Plotting all together
ols_wls_gls_plot_1 = figure
xvec = A(:,2);
% Get handles for each variable plotted
h1(1:12) = plot(xvec,B,'xk',xvec,b,'or',xvec,yvec,'sb')
hold on

yvec_ols = u_ols(1) + u_ols(2)*[0;xvec;8];
yvec_wls = u_wls(1) + u_wls(2)*[0;xvec;8];
yvec_gls = u_gls(1) + u_gls(2)*[0;xvec;8];
h1(13:15) = plot([0;xvec;8],yvec_ols,'-m',[0;xvec;8],yvec_wls,'-r',...
    [0;xvec;8],yvec_gls,'-b');

% Select only some handles for the legend
legend(h1(10:15), 'multiple observations','mean data',...
    'real data','OLS solution',...
    'WLS solution ','GLS solution ','Location','best')
xlabel('x'),ylabel('y')

% Observed residuals
r_ols = yvec_ols(2:8) - b
o_mse_ols = (r_ols'*r_ols)/(m-n)
r_wls = yvec_wls(2:8) - b
o_mse_wls = (r_wls'*r_wls)/(m-n)
r_gls = yvec_gls(2:8) - b
o_mse_gls = (r_gls'*r_gls)/(m-n)

%% Ordinary Least Squares (OLS) using standard MATLAB syntax
u_ols_m = (A'*C_ols*A)\A'*C_ols*b
% e_ols_m = (I - A/(A'*C_ols*A)*A'*C_ols)*b % error by projection
e_ols_m = b - A*u_ols_m  % error direct
% A'*e_ols_m
% A'*C_ols*e_ols_m

% All equivalent
sse = e_ols_m'*C_ols*e_ols_m
sse = b'*(C_ols-C_ols*A/(A'*C_ols*A)*A'*C_ols)*b

mse = sse/(m-n)
sigma = sqrt(mse)
b_mean = b - mean(b);
sst = b_mean'*b_mean;
rsquare = 1-sse/sst
S = inv(A'*C_ols*A)
se_u_ols_m = sqrt(diag(S))
conf_95_ols_m = [u_ols_m-1.96*se_u_ols_m u_ols_m+1.96*se_u_ols_m]
scaled_S = mse*S
scaled_se_u_ols_m = sqrt(diag(scaled_S))
scaled_conf_95_ols_m = [u_ols_m-1.96*scaled_se_u_ols_m ...
                        u_ols_m+1.96*scaled_se_u_ols_m]

%% Weighted Least Squares (WLS) using standard MATLAB syntax
u_wls_m = (A'*C_wls*A)\A'*C_wls*b

% Using QR
W = sqrt(C_wls);
A_w = W*A;
b_w = W*b;
[Q,R] = qr(A_w,0);
u_wls_qr = R\Q'*b_w

% same as
[Q,R] = qr(A,0);
s = (Q'*C_wls*Q)\Q'*C_wls*b;
t = Q*s;
u_wls_qr = R\Q'*t

% e_wls_m = (I - A/(A'*C_wls*A)*A'*C_wls)*b % error by projection
e_wls_m = b - A*u_wls_m % error direct
% A'*e_wls_m
% A'*C_wls*e_wls_m

% All equivalent
sse = e_wls_m'*C_wls*e_wls_m
sse = b'*(C_wls-C_wls*A/(A'*C_wls*A)*A'*C_wls)*b

mse = sse/(m-n)
sigma = sqrt(mse)
b_mean = b - mean(b);
sst = b_mean'*b_mean;
rsquare = 1-sse/sst
S = inv(A'*C_wls*A)
se_u_wls_m = sqrt(diag(S))
conf_95_wls_m = [u_wls_m-1.96*se_u_wls_m u_wls_m+1.96*se_u_wls_m]
scaled_S = mse*S
scaled_se_u_wls_m = sqrt(diag(scaled_S))
scaled_conf_95_wls_m = [u_wls_m-1.96*scaled_se_u_wls_m ...
                        u_wls_m+1.96*scaled_se_u_wls_m]

%% Generalized Least Squares (GLS) using standard MATLAB syntax
u_gls_m = (A'*C_gls*A)\A'*C_gls*b

% Using QR
[Q,R] = qr(A,0);
s = (Q'*C_gls*Q)\Q'*C_gls*b;
t = Q*s;
u_gls_qr = R\Q'*t

% e_gls_m = (I - A/(A'*C_gls*A)*A'*C_gls)*b  % error by projection
e_gls_m = b - A*u_gls_m % error direct
% A'*e_gls_m
% A'*C_gls*e_gls_m

% All equivalent
sse = e_gls_m'*C_gls*e_gls_m
sse = b'*(C_gls-C_gls*A/(A'*C_gls*A)*A'*C_gls)*b

mse = sse/(m-n)
sigma = sqrt(mse)
b_mean = b - mean(b);
sst = b_mean'*b_mean;
rsquare = 1-sse/sst
S = inv(A'*C_gls*A)
se_u_gls_m = sqrt(diag(S))
conf_95_gls_m = [u_gls_m-1.96*se_u_gls_m u_gls_m+1.96*se_u_gls_m]
scale = b'*(C_gls - C_gls*A/(A'*C_gls*A)*A'*C_gls)*b/(m-n)
scaled_S = scale*S
scaled_se_u_gls_m = sqrt(diag(scaled_S))
scaled_conf_95_gls_m = [u_gls_m-1.96*scaled_se_u_gls_m ...
                        u_gls_m+1.96*scaled_se_u_gls_m]

%% S matrix direct
V3 = zeros(m,m,10);
S3 = zeros(n,n,2);
C = C_gls;
u_vec = [5 10]';

for i = 1:10
    yvec_n = B(i,:)';

    u_ls = (A'*C*A)\A'*C*yvec_n;

    error_u = u_ls - u_vec;
    error_b = yvec_n - yvec;

    S3(:,:,i) = error_u*error_u';
    V3(:,:,i) = error_b*error_b';

end

E_S3 = mean(S3,3)
E_V3 = mean(V3,3)
                                        
%% Plotting all together
ols_m_wls_m_gls_m_plot = figure
xvec = A(:,2);
% Get handles for each variable plotted
h1(1:12) = plot(xvec,B,'xk',xvec,b,'or',xvec,yvec,'sb')
hold on

yvec_ols_m = u_ols_m(1) + u_ols_m(2)*[0;xvec;8];
yvec_wls_m = u_wls_m(1) + u_wls_m(2)*[0;xvec;8];
yvec_gls_m = u_gls_m(1) + u_gls_m(2)*[0;xvec;8];
h1(13:15) = plot([0;xvec;8],yvec_ols_m,'-m',[0;xvec;8],yvec_wls_m,'-r',...
    [0;xvec;8],yvec_gls_m,'-b');

% Select only some handles for the legend
legend(h1(10:15), 'multiple observations','mean data',...
    'real data','OLS solution',...
    'WLS solution ','GLS solution ','Location','best')
xlabel('x'),ylabel('y')

% Observed residuals
r_ols_m = yvec_ols_m(2:8) - b
o_mse_ols_m = (r_ols_m'*r_ols_m)/(m-n)
r_wls_m = yvec_wls_m(2:8) - b
o_mse_wls_m = (r_wls_m'*r_wls_m)/(m-n)
r_gls_m = yvec_gls_m(2:8) - b
o_mse_gls_m = (r_gls_m'*r_gls_m)/(m-n)

%% SPECIAL TOPIC: information or precision matrix
xvec = [1:7]'; 
int = 5; 
slope = 10; 
A = [ones(7,1) xvec]
yvec = int + slope*xvec;
lsq_plot = figure; 
plot(xvec,yvec,'+r');

du = eps^(1/3);
u1 = int; 
u2 = slope;
u1_plus = u1+du; 
u2_plus = u2+du;
yvec1_plus = u1_plus + u2*xvec; 
yvec2_plus = u1 + u2_plus*xvec;
u1_minus = u1-du; 
u2_minus = u2-du;
yvec1_minus = u1_minus + u2*xvec; 
yvec2_minus = u1 + u2_minus*xvec;

J = [(yvec1_plus-yvec1_minus)/(2*du) (yvec2_plus-yvec2_minus)/(2*du)]

%% SPECIAL TOPICS: Multiple Linear Regression
clear, clc, close all

load carsmall
x1 = Weight;
x2 = Horsepower;
x3 = Acceleration;
y = MPG;

% Use only X1 
X1 = [ones(size(x1)) x1];
% Here we use MATLAB 'regress' function
b = regress(y,X1)
% Here we use standard least squares
ind = ~isnan(y)
b = X1(ind,:)\y(ind)

% Use X1 and X2
X12 = [ones(size(x1)) x1 x2];
b = regress(y,X12)
ind1 = ~isnan(y)
ind2 = ~isnan(x2)
ind = logical(ind1.*ind2)
b = X12(ind,:)\y(ind)

% Here we have a least squares plane:
scatter3(x1,x2,y,'filled')
hold on
x1fit = min(x1):100:max(x1);
x2fit = min(x2):10:max(x2);
[X1FIT,X2FIT] = meshgrid(x1fit,x2fit);
YFIT = b(1) + b(2)*X1FIT + b(3)*X2FIT;
mesh(X1FIT,X2FIT,YFIT)
xlabel('Weight')
ylabel('Horsepower')
zlabel('MPG')
view(50,10)

% Use X1 and X2 plus an interaction term
X12i = [ones(size(x1)) x1 x2 x1.*x2];
b = regress(y,X12i)
ind1 = ~isnan(y)
ind2 = ~isnan(x2)
ind = logical(ind1.*ind2)
b = X12i(ind,:)\y(ind)


% The interaction term produces a curved least-squares surface:
scatter3(x1,x2,y,'filled')
hold on
x1fit = min(x1):100:max(x1);
x2fit = min(x2):10:max(x2);
[X1FIT,X2FIT] = meshgrid(x1fit,x2fit);
YFIT = b(1) + b(2)*X1FIT + b(3)*X2FIT + b(4)*X1FIT.*X2FIT;
mesh(X1FIT,X2FIT,YFIT)
xlabel('Weight')

ylabel('Horsepower')
zlabel('MPG')
view(50,10)

% t statistics
% number of observations
m = sum(ind)
% number of parameters
n = length(b)
% nu value
nu = m-n
e = y(ind,:) - X12i(ind,:)*b;
mse = (e'*e)/nu
% standard error
se = sqrt(diag(mse*inv(X12i(ind,:)'*X12i(ind,:))))
% student T
t = b./se
% p values
p_t = 1-tcdf(t,nu)
p_t = tcdf(t,nu,'upper')
p_t_2 = 2*tcdf(abs(t),nu,'upper') 	% for a 2-tail p-value

% F statistics
sse = e'*e; sst = (y(ind,:) - mean(y(ind,:)))'*(y(ind,:) - mean(y(ind,:))); ssr = sst-sse;
r2 = 1-sse/sst 
r2 = ssr/sst
adj_r2 = (sse/(m-n))/(sst/m-1) 

nu1 = m-1
nu2 = m-n
msr = ssr/(nu1-nu2)
F = ((sst-sse)/(nu1-nu2))/(sse/nu2)
F = msr/mse
p_f = 1-fcdf(F,nu1,nu2)
p_f = fcdf(F,nu1,nu2,'upper')

%% Stepwise fit
x1 = Weight;
x2 = Horsepower;
x3 = Acceleration;
y = MPG;

X123 = [x1 x2 x3];
% Here we use the function x2fx to derive a design matrix containing all
% possible interactions:
D = x2fx(X123,'interaction')
Predictors = {'x1','x2','x3','x1x2','x1x3','x2x3'}
[nobs,npred] = size(D);
% Here we select only the three original predictors plus a single
% interaction:
D = D(:,1:5)

% Use X1, X2, X3 and an interaction term X1*X2
[b,bint,r,rint,stats] = regress(y,D)
ind1 = ~isnan(y)
ind2 = ~isnan(x2)
ind3 = ~isnan(x3)
ind = logical(ind1.*ind2.*ind3)
b = D(ind,:)\y(ind)

% Standard multiple regression t statistics
% number of observations
m = sum(ind)
% number of parameters
n = length(b)
% nu value
nu = m-n
e = y(ind,:) - D(ind,:)*b;
mse = (e'*e)/nu
% standard error
se = sqrt(diag(mse*inv(D(ind,:)'*D(ind,:))))
% student T
t = b./se
% p values
p_t = 1-tcdf(t,nu)
p_t = tcdf(t,nu,'upper')
p_t_2 = 2*tcdf(abs(t),nu,'upper') 	% for a 2-tail p-value

T = table(y(ind),x1(ind),x2(ind),x3(ind),x1(ind).*x2(ind),'VariableNames',...
    {'MPG' 'Weight' 'Horsepower' 'Acceleration' 'WxH'})

% Stepwise fit
% Here we use again the function x2fx to derive a design matrix containing
% all possible interactions:
D = x2fx(X123,'interaction')
[nobs,npred] = size(D);
D(:,1) = []
% Here we use 'stepwise' regression
[b,se,pval,inmodel,stats,nextstep,history] = stepwisefit(D,y)

b_in = [stats.intercept;b(inmodel)];
% number of observations
m = sum(ind)
% number of parameters
n = length(b_in)
p_vec = [ones(m,1) D(ind,inmodel)]*b_in;
e = y(ind) - p_vec;
SSE = e'*e; SST = (y(ind) - mean(y(ind)))'*(y(ind) - mean(y(ind))); 
SSR = SST - SSE
r2 = 1-SSE/SST
r2 = SSR/SST
adj_r2 = (SSE/(m-n))/(SST/m-1) 

%% Regularization techniques

corr(D(ind,[2:5]))
corr([y(ind) D(ind,[2:5])])

X123 = [x1 x2 x3]

% Here we use the function x2fx to derive a design matrix containing all
% possible interactions:
D = x2fx(X123,'interaction')
[nobs,npred] = size(D);
D(:,1) = []
% Here we use 'stepwise' regression
[b,se,pval,inmodel,stats,nextstep,history] = stepwisefit(D,y)

b_in = [stats.intercept;b(inmodel)];
% number of observations
m = sum(ind)
% number of parameters
n = length(b_in)
p_vec = [ones(m,1) D(ind,inmodel)]*b_in;
e = y(ind) - p_vec;
SSE = e'*e; SST = (y(ind) - mean(y(ind)))'*(y(ind) - mean(y(ind))); 
SSR = SST - SSE
r2 = 1-SSE/SST
r2 = SSR/SST
adj_r2 = (SSE/(m-n))/(SST/m-1) 


% Here we use 'lasso' keeping the same number of predictors
[b,FitInfo] = lasso(D,y,'PredictorNames',{'x1','x2','x3','x1x2','x1x3','x2x3'},'DFmax',3)

best_predictors_ind = find(b(:,1))
best_predictors = Predictors(best_predictors_ind)
D_best = [ones(nobs,1) D]
p_vec = D_best(ind,:)*[FitInfo.Intercept(1);b(:,1)];
e = y(ind) - p_vec;
% number of observations
m = sum(ind)
% number of parameters
n = sum(b(:,1)~=0)
SSE = e'*e; SST = (y(ind) - mean(y(ind)))'*(y(ind) - mean(y(ind))); 
SSR = SST - SSE
r2 = 1-SSE/SST
r2 = SSR/SST
adj_r2 = (SSE/(m-n))/(SST/m-1) 

% Here we use 'lasso' with cross-validation keeping the same number of
% predictors
[b,FitInfo] = lasso(D,y,'CV',10,'PredictorNames',{'x1','x2','x3','x1x2','x1x3','x2x3'},'DFmax',3)
minMSEModel = FitInfo.PredictorNames(b(:,FitInfo.IndexMinMSE)~=0)

D_best = [ones(nobs,1) D]
p_vec = D_best(ind,:)*[FitInfo.Intercept(1);b(:,FitInfo.IndexMinMSE)];
e = y(ind) - p_vec;
% number of observations
m = sum(ind)
% number of parameters
n = sum(b(:,1)~=0)
SSE = e'*e; SST = (y(ind) - mean(y(ind)))'*(y(ind) - mean(y(ind))); 
SSR = SST - SSE
r2 = 1-SSE/SST
r2 = SSR/SST
adj_r2 = (SSE/(m-n))/(SST/m-1) 
