% Copyright (c) 2017, Domenico L. Gatti
% All rights reserved.
% 
% Redistribution and use in source and binary forms, with or without 
% modification, are permitted provided that the following conditions are 
% met:
% 
%     * Redistributions of source code must retain the above copyright 
%       notice, this list of conditions and the following disclaimer.
%     * Redistributions in binary form must reproduce the above copyright 
%       notice, this list of conditions and the following disclaimer in 
%       the documentation and/or other materials provided with the 
%       distribution
%       
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
% IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
% CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
% EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
%% General dependencies
% We always start from the CODE directory and we add to the path
% subdirectories containing various tools described in the book chapters.
addpath(genpath('../GENERAL_SCRIPTS_FUNCTIONS'));
% addpath(genpath('../DATABASE'));
addpath(genpath('../TOOLBOXES/BFGS'));

%% CHAPTER 17: Unconstrained minimization
clear, clc, close all

f = @(x) x.^3 - 3*x.^2 - 12*x
xvec = [-3:0.1:6]'
yvec = f(xvec);
plot(xvec,yvec,'-r')
xlabel('x');ylabel('f(x)');grid on
hold on

% Golden Ratio
p = [-1 -1 1];
r = roots(p)

x = 2;
lb = -3;
ub = 6;

[x_min,f_min,b_iter,s_iter] = line_search(f,x,-3,6)
% [x_min,f_min,b_iter,s_iter] = line_search(f,x,-3,6,0.1)
% for comparison
fminsearch(f,x)

vline(x_min,'b--') 

%% Optimization: minimum of a function.
clear, clc

% Quadratic surface
b = [5 3]';
c = 10;
H = [25 2;1 20];
H = H'*H;
eig(H)
x = [-100:5:100;-100:5:100];
[m,n] = size(x);

func = @(x) c + b'*x +1/2*x'*H*x

for i = 1:n
    for j = 1:n
        f(i,j) = func([x(1,i);x(2,j)]);
    end
end
[X,Y] = meshgrid(x(1,:),x(2,:));
% surf(X,Y,f)
meshc(X,Y,f)

minX = H\b
minf = func(minX)

% Unconstrained minimization
func = @(x) 10*x(1)^2 + 3*x(2)^2 -10*x(1)*x(2) + 2*x(1) 
x = [1 1]'

% Powell
[xmin,fmin,niter] = powell(func,x,0.01)
% for comparison
fminsearch(func,x)

% CG
A = [2 3 1;3 1 1;1 1 2]; A = A'*A;
b = [3 2 1]';
x0 = [0 0 0]'
[x,r,i] = conj_grad(A,x0,b)

[x,r,i] = conj_grad_mod(A,x0,b)

% Fletcher-Reeves
func = @(x) 10*x(1)^2 + 3*x(2)^2 -10*x(1)*x(2) + 2*x(1) 
x = [1 1]'
[x_min,f_min,niter] = fletcher_reeves(func,x,0.01)

%% Convergence rate

%% Sublinear convergence
clear x Q1 Q2 Q3

niter = 1000;
for k = 1:niter
x(k) = 1/k;
end

Q1 = abs(x(2:end))./(abs(x(1:end-1)).^0.5);
Q2 = abs(x(2:end))./(abs(x(1:end-1)).^1);
Q3 = abs(x(2:end))./(abs(x(1:end-1)).^2);
SubLinear_convergence = figure;
set(gcf,'Unit','Normalized','Position',[0 0.4 0.6 0.6])
subplot(2,2,1);semilogy([1:niter],x,'-g');
xlabel('Iteration (k)');ylabel('x(k)')
subplot(2,2,2);semilogy([1:niter-1],Q1(1:niter-1),'-b')
xlabel('Iteration (k)');ylabel('Q1(k)')
subplot(2,2,3);semilogy([1:niter-1],Q2(1:niter-1),'-r')
xlabel('Iteration (k)');ylabel('Q2(k)')
subplot(2,2,4);semilogy([1:niter-1],Q3(1:niter-1),'-b')
xlabel('Iteration (k)');ylabel('Q3(k)')

%% Linear convergence
clear x Q1 Q2 Q3

niter = 100;
for k = 1:niter
x(k) = 1/(2^k);
end

Q1 = abs(x(2:end))./(abs(x(1:end-1)).^0.5)
Q2 = abs(x(2:end))./(abs(x(1:end-1)).^1)
Q3 = abs(x(2:end))./(abs(x(1:end-1)).^2)
Linear_convergence = figure;
set(gcf,'Unit','Normalized','Position',[0 0.4 0.6 0.6])
subplot(2,2,1);semilogy([1:niter],x,'-g');
xlabel('Iteration (k)');ylabel('x(k)')
subplot(2,2,2);semilogy([1:niter-1],Q1(1:niter-1),'-b')
xlabel('Iteration (k)');ylabel('Q1(k)')
subplot(2,2,3);semilogy([1:niter-1],Q2(1:niter-1),'-r')
xlabel('Iteration (k)');ylabel('Q2(k)')
subplot(2,2,4);semilogy([1:niter-1],Q3(1:niter-1),'-b')
xlabel('Iteration (k)');ylabel('Q3(k)')

%% Quadratic convergence
clear x Q1 Q2 Q3

niter = 100;
for k = 1:niter
x(k) = 1/(2^(2^k));
end

Q1 = abs(x(2:end))./(abs(x(1:end-1)).^1)
Q2 = abs(x(2:end))./(abs(x(1:end-1)).^2)
Q3 = abs(x(2:end))./(abs(x(1:end-1)).^3)
Quadratic_convergence = figure;
set(gcf,'Unit','Normalized','Position',[0 0.4 0.6 0.6])
subplot(2,2,1);semilogy([1:niter],x,'-g');
xlabel('Iteration (k)');ylabel('x(k)')
subplot(2,2,2);semilogy([1:niter-1],Q1(1:niter-1),'-b')
xlabel('Iteration (k)');ylabel('Q1(k)')
subplot(2,2,3);semilogy([1:niter-1],Q2(1:niter-1),'-r')
xlabel('Iteration (k)');ylabel('Q2(k)')
subplot(2,2,4);semilogy([1:niter-1],Q3(1:niter-1),'-b')
xlabel('Iteration (k)');ylabel('Q3(k)')

%% Practical formula to calculate convergence order
% p is the limit of the formula
num = (x(4:end)-x(3:end-1))./(x(3:end-1)-x(2:end-2));
den = (x(3:end-1)-x(2:end-2))./(x(2:end-2)-x(1:end-3));
p = log(abs(num))./log(abs(den))

%% Newton and quasi-Newton mathods

% BFGS
func = @(x) 10*x(1)^2 + 3*x(2)^2 -10*x(1)*x(2) + 2*x(1) 
x = [1 1]'
[xMin1,fMin1,nCyc1] = bfgs(func,x,0.01,1.0e-4,1.0e-8,1.0e-8,2,1.0e4,1.0e4)

func = @(x) sin(x(1)) + x(2)^2 + log(x(3)) -7
x = [5 7 9]'
[xMin2,fMin2,nCyc2] = bfgs(func,x,0.00001,1.0e-4,1.0e-8,1.0e-8,2,1.0e4,1.0e4)

% fminunc with 'quasi-Newton'
options = optimoptions(@fminunc,'Algorithm','quasi-Newton',...
    'MaxFunEvals',5000,'MaxIter',5000,'GradObj','off',...
    'FinDiffType','central','DerivativeCheck','on',...
    'Display','final','TolFun',1e-9,'TolX',1e-9);
[xMin3,fMin3] = fminunc(func,x,options)

fg = @(x) gradx(func,x,2);
options = optimoptions(@fminunc,'Algorithm','quasi-Newton',...
    'MaxFunEvals',5000,'MaxIter',5000,'GradObj','on',...
    'FinDiffType','central','DerivativeCheck','on',...
    'Display','final','TolFun',1e-9,'TolX',1e-9);
[xMin4,fMin4] = fminunc(fg,x,options)

fg = @(x) gradx(func,x,3);
hess = hessian(func,x)
options = optimoptions(@fminunc,'Algorithm','quasi-Newton',...
    'MaxFunEvals',5000,'MaxIter',5000,'GradObj','on',...
    'FinDiffType','central','DerivativeCheck','on',...
    'Display','final','TolFun',1e-9,'TolX',1e-9);
[xMin5,fMin5] = fminunc(fg,x,options)

% Alternative optimization using fminunc with the 'trust region' algorithm.
fg = @(x) gradx(func,x,2);
options = optimoptions(@fminunc,'Algorithm','trust-region','SpecifyObjectiveGradient',true)
[xMin6,fMin6] = fminunc(fg,x,options)

% Downhill Simplex
options = optimset('MaxFunEvals',1000,'MaxIter',1000,'Display','iter','TolFun',1e-8,'TolX',1e-8);
[xMin7,fMin7] = fminsearch(func,x,options)
xMin8 = xMin7
for i = 1:100
    [xMin8,fMin8,exitflag,output] = fminsearch(func,xMin8,options)
end

