Code covered by the BSD License  

Highlights from
Statistical Learning Toolbox

from Statistical Learning Toolbox by Dahua Lin
Functions for statistical learning, pattern recognition and computer vision, covering many topics.

sllogistreg(X, nums, varargin)
function [A, b, props, info] = sllogistreg(X, nums, varargin)
%SLLOGISTREG Performs Multivariate Logistic Regression
%
% $ Syntax $
%   - [A, b] = sllogistreg(X, nums, ...)
%   - [A, b, props] = sllogistreg(X, nums, ...)
%   - [A, b, props, info] = sllogistreg(X, nums, ...)
%
% $ Arguments $
%   - X:            The input sample matrix
%   - nums:         The numbers of samples in different classes
%   - A:            The solved logistic coefficients
%   - b:            The solved logistic shift value
%   - props:        The probability values under the resultant model
%   - info:         The information of learning process
%                   - exitflag: The exitflag given by fminunc
%                   - numiters: The number of iterations
%                   - fval:     The final objective value
%
% $ Description $
%   - [A, b] = sllogistreg(X, nums, ...) solves the multivariate logistic 
%     regression. Suppose there are n samples, from C classes, then C
%     models are learned for the C classes. The k-th model is characterized
%     by a coefficient vector a_k and a shift value b_k. Then a sample x,
%     the probability that it belongs to the k-th class is given by the
%     following formula:
%           p(x, k) = P_k * h_k(x) / sum_l (P_l * h_l(x))
%     here, we have
%           h_k(x) = a_k^T * x + b_k
%     The objective of logistic regression is to maximize the sum of 
%     the probabilities that the samples belong to its own class:
%           maximize sum_i p(x_i, c_i)
%     here, c_i is the class label corresponding to x_i.
%     In the input arguments, X should be a d x n matrix containing the
%     x samples with the samples from the same class gathering together,
%     nums should be a 1 x C row vector. 
%     In the output arguments, A should be a d x C matrix, and b is a
%     1 x C row vector. Each column in A together with the corresponding
%     shift value in b describe a model for one class.
%
%     You can specify the following parameter to control the numeric 
%     optimization process:
%       - 'weights':        The weights of the samples  (1 x n)
%                           default = []: means all weights are 1
%       - 'priors':         The priors of the classes (1 x C)
%                           default = []: means equal priors
%       - 'maxiter':        The maximum number of iterations
%                           default = 300
%       - 'tolF':           The tolerance of the change of objective func
%                           default = 1e-6
%       - 'tolX':           The tolerance of the change of parameters
%                           default = 1e-6
%       - 'display':        The level of display
%                           default = 'off'
%       - 'init':           A cell array as {A0, b0}
%                           default = {}, using random initialization
%
%   - [A, b, props] = sllogistreg(X, nums, ...) also output the 
%     probability values that the input samples belong to the true class.
%     It would be a 1 x n row vector.
%
%   - [A, b, props, info] = sllogistreg(X, nums, ...) outputs the info
%     about the optimization process
% 
% $ Remarks $
%   - This function is implemented based on the optimization function
%     fminunc and fmincon, and thus requires the optimization toolbox 
%     be installed.
%
% $ History $
%   - Created by Dahua Lin, on Sep 16, 2006
%

%% parse and verify input arguments

if nargin < 2
    raise_lackinput('sllogistreg', 2);
end

[d, n] = size(X);
if ~isvector(nums) || size(nums, 1) ~= 1
    error('sltoolbox:invalidarg', ...
        'nums should be a 1 x C row vector');
end
if sum(nums) ~= n
    error('sltoolbox:sizmismatch', ...
        'The numbers in nums are not consistent with the sample number');
end

C = length(nums);

if C < 2
    error('sltoolbox:invalidarg', ...
        'There should be at least 2 classes.');
end

opts.weights = [];
opts.priors = [];
opts.maxiter = 300;
opts.tolF = 1e-6;
opts.tolX = 1e-6;
opts.display = 'off';
opts.init = {};
opts = slparseprops(opts, varargin{:});

w = opts.weights;
if ~isempty(w)
    if ~isequal(size(w), [1, n])
        error('sltoolbox:sizmismatch', ...
            'w should be a 1 x n row vector');
    end
end

pri = opts.priors;
if ~isempty(pri)
    if ~isequal(size(pri), [1, C])
        error('sltoolbox:sizmismatch', ...
            'pri should be a 1 x C row vector');
    end
end

if isempty(opts.init)
    is_inited = false;
else
    A0 = opts.init{1};
    b0 = opts.init{2};
    if ~isequal(size(A0), [d, C])
        error('sltoolbox:sizmismatch', 'A0 should be a d x C matrix');
    end
    if ~isequal(size(b0), [1, C])
        error('sltoolbox:sizmismatch', 'b0 should be a 1 x C row vector');
    end
    is_inited = true;    
end


%% main

%NOTE: we convert the problem of maximization to minimization of the
%      negative object

% augment problem

Xa = [X; ones(1, n)];

% prepare for optimization
optimfunc = @(v) logistic_objfun(v, Xa, nums, d, n, C, pri, w);
optimopts = optimset(optimset('fminunc'), ...
    'LargeScale', 'on', ...
    'GradObj', 'on', ...
    'MaxIter', opts.maxiter, ...
    'Display', opts.display, ...
    'TolFun', opts.tolF, ...
    'TolX', opts.tolX);
    
% initialization
if ~is_inited
    v0 = rand((d+1)*C, 1);
else
    v0 = reshape([A0; b0], (d+1)*C, 1);
end

% do optimization
[v, fval, exitflag, optimoutput] = fminunc(optimfunc, v0, optimopts);
clear v0;
clear optimfunc;
clear Xa;

% make output
v = reshape(v, d+1, C);
A = v(1:d, :);
b = v(d+1, :);
clear v;

if nargout >= 3
    L = compute_logit(A, X) ;
    L = sladdvec(L, b', 1);
    props = slposterioritrue(L, nums, pri, 'log');            
end

if nargout >= 4
    info.exitflag = exitflag;
    info.numiters = optimoutput.iterations;
    info.fval = -fval; 
end


%% The core functions for objective and gradient evaluation

function [f, g] = logistic_objfun(v, Xa, nums, d, n, C, pri, w)
% v is a reshape version of Aa

% get input
Aa = reshape(v, d+1, C);
L = compute_logit(Aa, Xa);
clear Aa;

% compute f
P = slposteriori(L, pri, 'log');
[sps, eps] = slnums2bounds(nums);
pps = zeros(1, n);
for k = 1 : C
    sk = sps(k); ek = eps(k);
    pps(sk:ek) = P(k, sk:ek);
end

if isempty(w)
    f = -sum(log(pps));
else
    f = -sum(log(pps) .* w);
end

% compute g
M = make_indicatormap(nums, C, n);
M = M - P;
clear P;
if ~isempty(w)
    M = slmulvec(M, w, 2);
end
g = Xa * M';
clear M;
g = -g(:);


function L = compute_logit(Aa, Xa)
% L is the logit values (a_k' * x_i + b_k): C x n matrix
L = Aa' * Xa;


%% Auxiliary functions

function M = make_indicatormap(nums, C, n)
% C x n
% C(i, j) = 1, when sample j belongs to class i

M = zeros(C, n);
I = slexpand(nums);
J = 1:n;
inds = sub2ind([C, n], I, J);
clear I J;
M(inds) = 1;

Contact us at files@mathworks.com