No BSD License  

Highlights from
LARS algorithm

from LARS algorithm by Sung Soo Kim
Pure vanilla implementation of LARS algorithm.

lars(yin, xin, XTX, type, stopCriterion, regularization, trace, quiet)
function [history, stopReason] = lars(yin, xin, XTX, type, stopCriterion, regularization, trace, quiet)
% %{
% This program implements "LARS" algorithm and its lasso modification
% introduced by Efron et. al. 2003.  Read the paper to understand codes
% of this function.  Each line of this file has corresponding equation
% number in Efron et. al. 2003 for reader's convenience.
%
%
% *** CAUTION
% history(1).mu_OLS contains original 'yin' to provide convinience in
% writing user defined stop criterion function. Actual mu_OLS of the first
% step should be just mean of yin which is a simple array of copy of
% history(1).mu. So, if history(1).mu_OLS contains that information, it is
% redundant. In fact, this is also contained in history(1).b, which is a
% bias of the output. Therefore, to provide more information to user who
% want to write his/her own stop criterion function, history(1).mu_OLS
% contains yin.
% 
% 
% Example 1: moderate size x.
%     stopCrioterion = {};
%     stopCrioterion{1,1} = 'maxKernels';
%     stopCrioterion{1,2} = 100;
%
%     XTX = lars_getXTX(x_original);      % this takes long time.
%     sol = lars(y, x, 'lasso', XTX, stopCriterion);
% 
% Example 2: very small size x, or a really really big size x
%     stopCrioterion = {};
%     stopCrioterion{1,1} = 'maxKernels';
%     stopCrioterion{1,2} = 100;
%
%     sol = lars(y, x, 'lasso', XTX, stopCriterion);
% 
% Note:
%     Users can add any kind of stop criterion by editing
%     the corresponding portion of this file.  See the code
%     for existing examples.
% 
% Note 2:
%     This m-file does not implement routine for missing data.
% %}
% 

global USING_CLUSTER;
global RESOLUTION_OF_LARS;
global REGULARIZATION_FACTOR;
lars_init();

regularization_factor = REGULARIZATION_FACTOR; % Tikhonov regularization factor (or the ridge regression factor)
                              % -> This should be small enough in this case to get
                              %    a reasonable pseudoinverse.

stopReason = {};

%%% Check parameters
if length(yin)==0 | length(xin)==0
    warning('\nInput or Output has zero length.\n');
    history.active_set = [];
    stopReason{1} = 'Parameter error';
    stopReason{2} = 0;
    return;
end
if size(yin,1) ~= size(xin,1)
    warning('\nSize of y does not match to that of x.\n');
    history.active_set = [];
    stopReason{1} = 'Parameter error';
    stopReason{2} = 0;
    return;
end
if ~strcmp(type, 'lasso') & ~strcmp(type, 'lars') & ~strcmp(type, 'forward_stepwise')
    warning('\nUnknown type of regression.\n');
    history.active_set = [];
    stopReason{1} = 'Parameter error';
    stopReason{2} = 0;
    return;
end
if strcmp(type, 'forward_stepwise')
    warning('\nForward_stepwise is not implemented.\n');
    history.active_set = [];
    stopReason{1} = 'Parameter error';
    stopReason{2} = 0;
    return;
end
    

if exist('regularization','var') & ~isempty(regularization)
    regularization = 10;
else
    regularization  = 0;
end

if ~exist('trace','var') | isempty(trace)
    trace=0;
end

if ~exist('quiet','var') | isempty(quiet)
    quiet=0;
elseif quiet==1
    trace=0;
end



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Data preparation

% Program automatically centers and standardizes predictors.
if ~exist('XTX','var')
    XTX=[];
end
no_xtx = 0;
if ~isempty(XTX)
    if ~quiet & trace >=0
        fprintf('\nLars is using the provided xtx.\n');
    end
elseif size(xin,2)^2 > 10^6
    if ~quiet & trace >=0
        fprintf('Too large matrix (size(x,2)^2 > 10^6). lars will not pre-calculate xtx.\n');
    end
    no_xtx      = 1;
    XTX         = lars_getXTX(xin,no_xtx);
else
%     fprintf('\nCalculating xtx.\n');
    XTX         = lars_getXTX(xin);
end

x               = XTX.x;            % normalized xin
mx              = XTX.mx;           % mean xin
sx              = XTX.sx;           % length of each column of xin
ignores         = XTX.ignores;      % indices for constant terms
all_candidate   = XTX.all_candidate;% indices for all possible columns
if ~no_xtx
    xtx         = XTX.xtx;          % xtx matrix
    dup_columns = XTX.dup_columns;  % duplicated columns which will be automatically ignored.
end

my              = mean(yin);
y               = yin-my;

n               = size(x,1);        % # of samples
m               = size(x,2);        % # of predictors

% Now, we can determine the maximum number of kernels.
%maxKernels = min(maxKernels, min(size(xin,1)-1, length(all_candidate)));
%maxKernels = min(maxKernels, min(rank(xin), length(all_candidate)));
existMaxKernels = 0;
existMSE        = 0;
for is = 1:size(stopCriterion,1)
    if strcmp(stopCriterion{is,1},'maxKernels')
        existMaxKernels = 1;
%        stopCriterion{is,2} = min(stopCriterion{is,2}, min(size(xin,1)-1, length(all_candidate)));
        stopCriterion{is,2} = min(stopCriterion{is,2}, min(rank(xin), length(all_candidate)));
        if stopCriterion{is,2}<1
            warning('Max Kernel is less than 1. It must be larger than 0.\n');
            stopCriterion{is,2} = 1;
        end
    end
    if strcmp(stopCriterion{is,1},'MSE')
        existMSE        = 1;
        if stopCriterion{is,2}<1.0e-10
            warning('Maximum MSE is too small. Automatically set to 1.0e-10\n');
            stopCriterion{is,2} = 1.0e-10;
        end
    end
end
if ~existMaxKernels
    is = size(stopCriterion,1);
    stopCriterion{is+1,1} = 'maxKernels';
%    stopCriterion{is+1,2} = min(size(xin,1)-1, length(all_candidate));  % Stop when size of active set is data.maxKernels.
    stopCriterion{is+1,2} = min(rank(xin), length(all_candidate));  % Stop when size of active set is data.maxKernels.
end
if ~existMSE
    is = size(stopCriterion,1);
    stopCriterion{is+1,1} = 'MSE';
    stopCriterion{is+1,2} = 1.0e-10;
end
    




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialization

active      = [];               % active set
inactive    = all_candidate;    % inactive set

mu_a        = zeros(n,1);       % current estimate (eq. 2.8)
mu_a_plus   = 0;                % next estimate (eq. 2.12)
mu_a_OLS    = 0;                % OLS estimate  (eq. 2.19)

beta        = zeros(1,size(x,2));
beta_new    = beta;
beta_OLS    = beta;

history.active_set          = [];
history.add                 = [];
history.drop                = [];
history.beta_norm           = [];
history.beta                = [];
history.b                   = my;
history.mu                  = my;
history.beta_OLS_norm       = [];
history.beta_OLS            = [];
history.b_OLS               = my;
history.mu_OLS              = my*ones(size(yin));
history.MSE                 = sum(y.^2)/length(y);
history.R_square            = 0;
history.resolution_warning  = [];


if var(yin)==0
    stopReason{1} = 'zeroVarY';
    stopReason{2} = var(yin);
    return;
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Main loop
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


c               = 0;                % correlation vector
C_max           = max(abs(c));
C_max_ind       = [];
C_max_ind_pl    = [];
drop            = [];               % used for 'lasso'
k               = 1;                % iteration index
if ~quiet & trace >= 0
    fprintf('Active predictors / total  :::::  Current iteration\n                            ');
end
while 1

    
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%           Exit Criterions
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if exist('stopCriterion','var')     % If there is any stop criterion...
        % Any of these is satisfied, algorithm stops.
        %
        % Other criterions: Make your own cost function.
        
        for is = 1:size(stopCriterion,1)  % there can be a number of stop criterions.

            % Default Criterions.
            % Maximum number of consecutive drops or maximum number of drops within a window.
            if strcmp(stopCriterion{is,1},'maxDrops')
                drop_window = stopCriterion{is,2}(1);   % number of backward history to be considerd. 0 means all history.
                if drop_window==0
                    drop_window=k;
                end
                drop_n      = min(drop_window,stopCriterion{is,2}(2));   % number of maximum drops within the window.
                drop_vector = [];
                for z = max(k-drop_window+1,1):k
                    drop_vector=[drop_vector, history(z).drop];
                end
                if length(drop_vector)>=drop_n
                    stopReason{1} = 'maxDrops';
                    stopReason{2} = drop_n;
                    break;
                end
            end
            % Maximum number of kernels.
            if strcmp(stopCriterion{is,1},'maxKernels')
                if length(active) >= min(stopCriterion{is,2}, min(size(xin,1)-1, length(all_candidate)))
                    stopReason{1} = 'maxKernels';
                    stopReason{2} = length(active);
                    break;
                end
            end
            % Maximum number of iterations.
            if strcmp(stopCriterion{is,1},'maxIterations')
                if k >= stopCriterion{is,2}
                    stopReason{1} = 'maxIterations';
                    stopReason{2} = k;
                    break;
                end
            end
            % MSE.
            if strcmp(stopCriterion{is,1},'MSE')
                if history(k).MSE <= stopCriterion{is,2}
                    stopReason{1} = 'MSE';
                    stopReason{2} = history(k).MSE;
                    break;
                end
            end
            
            % User defined stop criterion.
            if strcmp(stopCriterion{is,1},'userDefinedCriterion')
                fhandle = stopCriterion{is,2}.fhandle;
                r_fhandle = fhandle(history, stopCriterion{is,2}.data);
                if r_fhandle.stop
                    stopReason{1} = 'userDefinedCriterion';
                    stopReason{2} = r_fhandle;
                    break;
                end
            end
            
        end

        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        
    end % end of stop criterion checking :  if exist('stopCriterion','var')
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    
    if length(stopReason)>0 % if there is any reason of stopping the algorithm, exit loop.
        break;
    end
    
    
    
    
    
    
    
    
    
    
    
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%           LARS Algorithm
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    
    %if ~USING_CLUSTER
    %fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%5d/%5d    :::::   %5d',length(active),length(all_candidate),k);
    %end
    % start of algorithm
    c                   = x'*(y-mu_a);                      % eq 2.8
    [C_max,C_max_ind]   = max(abs(c(inactive)));            % eq 2.9
    C_max_ind           = inactive(C_max_ind);
    % active          = sort(union(active,C_max_ind));      % If there is no machine limit, this can be used.
    % But because of machine limit, there can be multiple new predictors.
    % This dramatically improves the overall precision of the result,
    % and speeds up the whole process.
    C_max_ind_pl        = abs(c(inactive))>C_max-RESOLUTION_OF_LARS;
    C_max_ind_pl        = inactive(C_max_ind_pl);
    active          = sort(union(active,C_max_ind_pl));  
    inactive        = setdiff(all_candidate, active);   % eq 2.9
    if strcmp(type,'lasso')
        if ~isempty(drop) & length(find(drop==C_max_ind))==0           % If there is a drop, that must have the maximum correlation in inactive set.
            if ~quiet & trace >=0
                fprintf('\n');
                warning('Dropped item and index of maximum correlation is not the same. But it is being ignored here...');
                fprintf('\n                            ');
            end
            active(find(active==C_max_ind))=[];
        end
        if ~isempty(drop)
            C_max_ind   = [];
            C_max_ind_pl= [];
        end
        active          = setdiff(active,drop);             % eq 3.6
        inactive        = sort(union(inactive,drop));       % eq 3.6
    end

    s       = sign(c(active));              % eq 2.10
    xa      = x(:,active).*repmat(s',n,1);  % eq 2.4
    if ~no_xtx
        ga  = xtx(active,active).*(s*s');   % eq 2.5
    else
        ga  = xa'*xa;                       % eq 2.5
    end
    if regularization > 2
        ga  = ga+eye(length(ga))*regularization_factor; % This routine will make the test below
    end
    invga   = ga\eye(size(ga,1));           % eq 2.5
    aa      = sum(sum(invga))^(-1/2);       % eq 2.5
    wa      = aa*sum(invga,2);              % eq 2.6
    ua      = xa*wa;                        % eq 2.6
    
    % test using eq 2.7
    test_1      = xa'*ua;
    test_2      = aa*ones(size(test_1));
    test_1_2    = sum(sum(abs(test_1-test_2)));
    test_3      = norm(ua) - 1;
    
    history(k+1).resolution_warning=0;
    if test_1_2 > RESOLUTION_OF_LARS*100 | abs(test_3 ) > RESOLUTION_OF_LARS*100
        if regularization <=2
            if ~quiet & trace>0
                fprintf('\n');
                warning('Eq 2.7 test failure.');
                fprintf('\n                            ');
            end
            regularization = regularization + 1;
            if regularization > 2
                if ~quiet & trace>0
                    fprintf('\n');
                    warning('Lots of Eq 2.7 test failure. Regularization will be applied from now on.');
                    fprintf('\n                            ');
                end
            end
        end
        history(k+1).resolution_warning=1;
    end
    


    a               = x'*ua;                % eq 2.11
    tmp_1           = (C_max - c(inactive))./(aa - a(inactive));
    tmp_2           = (C_max + c(inactive))./(aa + a(inactive));
    tmp_3           = [tmp_1, tmp_2];
    tmp             = tmp_3(find(tmp_3>0));
    gamma = min(tmp);                       % eq 2.13
    if length(gamma)==0 % if this is the last step (i.e. length(active)==maxKernels)
        gamma = C_max/aa;                   % eq 2.19, eq 2.21 and 5 lines below eq 2.22
    end
    
    d               = zeros(1,m);
    d(active)       = s.*wa;

    if length(find(d(active)==0))
        fprintf('\n');
        warning('Something wrong with vector d: eq 3.4.');
        fprintf('\n                            ');
    end

    tmp             = zeros(1,m);
    tmp(active)     = -1*beta(active)./d(active);   % eq 3.4
    tmp2            = tmp(find(tmp>0));
    
    drop            = [];
    gamma_tilde = inf;                              % eq 3.5
    if ~isempty(tmp2) & gamma >= min(tmp2)
        gamma_tilde = min(tmp2);                    % eq 3.5
        drop        = find(tmp==gamma_tilde);       % eq 3.6
    end
    
    if strcmp(type, 'lars')
        mu_a_plus   = mu_a + gamma*ua;              % eq 2.12
        beta_new    = beta + gamma*d;               % eq 3.3
        drop        = [];
    elseif strcmp(type, 'lasso')
        mu_a_plus   = mu_a + min(gamma, gamma_tilde)*ua;    % eq 3.6
        beta_new    = beta + min(gamma, gamma_tilde)*d;     % eq 3.3
        active      = setdiff(active,drop);                 % eq 3.6
        inactive    = setdiff(all_candidate,active);
        beta_new(drop) = 0;
    elseif strcmp(type, 'forward_stepwise')
        drop        = [];
        error('forward.stepwise has not been implemented yet.');
        return;
    end
    
    mu_a_OLS    = mu_a + C_max/aa*ua;          % eq 2.19, 2.21
    beta_OLS    = beta + C_max/aa*d;           % eq 2.19, 2.21
    MSE         = sum((y - mu_a_OLS).^2)/length(y);
    
    
    
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % update and save
    mu_a = mu_a_plus;
    beta = beta_new;
    
    % history with scale correction
    k = k+1;
    history(k).active_set   = active;
    history(k).drop         = drop;
    history(k).add          = C_max_ind_pl;
    history(k).beta_norm    = beta(active);
    history(k).beta         = beta(active)./sx(active);
    history(k).b            = my - sum(mx./sx.*beta);
    history(k).mu           = xin * (beta./sx)' + history(k).b;
    history(k).beta_OLS_norm= beta_OLS(active);
    history(k).beta_OLS     = beta_OLS(active)./sx(active);
    history(k).b_OLS        = my - sum(mx./sx.*beta_OLS);
    history(k).mu_OLS       = xin * (beta_OLS./sx)' + history(k).b_OLS;
    history(k).MSE          = MSE;
    history(k).R_square     = 1 - var(yin  - history(k).mu_OLS)/var(yin);
    
    
    % exit if exact mathing is achieved.
    if abs(C_max/aa - min(gamma,gamma_tilde)) < RESOLUTION_OF_LARS
        stopReason{1}   = 'ExactMatching';
        stopReason{2}   = 0;
        break;
    end

    
    
    
end % end of while loop
if ~quiet & trace >=0
    fprintf('\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b%5d/%5d    :::::   %5d\n',length(active),length(all_candidate),k);
end    


return;



Contact us at files@mathworks.com