Code covered by the BSD License  

Highlights from
Negative Binomial Regression

from Negative Binomial Regression by Surojit Biswas
NB2 Negative-Binomial regression.

nbreg( x, y, varargin )
function stats = nbreg( x, y, varargin )
% nbreg - Negative binomial regression with estimation of dispersion
% parameter using Chi^2 dampening.
% 
% Author: Surojit Biswas
% Email: sbiswas@live.unc.edu
% Institution: The University of North Carolina at Chapel Hill
%
% References: 
% [1] Hardin, J.W. and Hilbe, J.M. Generalized Linear Models and
% Extensions. 3rd Ed. p. 251-254. 
% 
% INPUT:
% x     =   [n x p] Design matrix. Rows are observations, Columns are
%           variables. (required)*
% y     =   [n x 1] Response matrix. Rows are observations. (required)
% 
% The following inputs are optional. For optional inputs the function call
% should include the input name as a string and then the input itself.
% For example, nbreg(x, y, 'b', myStartingRegCoeffs)
%
% alpha =   Scalar value. Starting estimate for dispersion parameter.
%           (optional, DEFAULT = 0.01)
% b     =   [p x 1] starting vector of regression coefficients. (optional,
%           DEFAULT = estimated with poisson regression)
% offset =  [n x 1] offset vector. (optional, default = 0)**
% trace =   logical value indicating if the algorithm's progress should be
%           printed to the screen. (optional, default = false)
%
% OUTPUT:
% stats.b       =   [p x 1] vector of estimated regression coefficients.
% stats.alpha   =   Estimated dispersion parameter.
% stats.disp    =   Estimated dispersion statistic (Chi^2/df). Should be
%                   close to 1 if the algorithm properly converged.
%
% NOTES:
% *  Supply your own column of ones for an intercept term.
% ** Make sure you have taken the log of the offset vector if necessary.
% If one wants to include an offset to control for exposure time/fit a rate
% model, then typically the offset vector should log-ed before it's
% supplied as an input. The regression routine assumes that E[y|x] ~ X*b +
% log(offset)


n = size(y,1);
p = size(x,2);
assert(n == size(x,1), 'Dimension mismatch between x and y');
df = n - p;

%% Get variable arguments
% Offset.
offset = getVararg(varargin, 'offset');
if isempty(offset)
    offset = 0;
end

% Regression coefficients.
b = getVararg(varargin, 'b');
if isempty(b)
    if offset == 0
        b = glmfit(x, y, 'poisson', 'Constant', 'off');
    else
        b = glmfit(x, y, 'poisson', 'Constant', 'off', 'offset', offset);
    end
end

% Alpha
alpha = getVararg(varargin, 'alpha');
if isempty(alpha)
    alpha = 0.01;
end

% Trace
trace = getVararg(varargin, 'trace');
if isempty(trace)
    trace = false;
end

if trace
    fprintf('alpha\t');
    for i = 1 : p
        fprintf('b_%0.0f\t', i)
    end
    fprintf('disp\n');
end


%% Optimize
eps = 1e-4;
REG = 1e-10*eye(p);
LSOPTS.SYM = true;
LSOPTS.POSDEF = true;

eta = x*b + offset;
mu = exp(eta);
del = Inf;
adel = Inf;

if trace
    fprintf('%0.4f\t', [alpha, b', sum( ((y - mu).^2)./(mu + alpha*(mu.^2)) )/df]);
    fprintf('\n');
end
while adel > eps
    while any(del > eps)
        w =  repmat(mu./(1 + alpha*mu), 1, p)';     % Weight matrix
        z = (eta + (y - mu)./mu) - offset;          % Working response
        xtw = x'.*w;                                % X'*W
        bold = b;           
        b = linsolve(xtw*x + REG, xtw*z, LSOPTS);   % Update
        eta = x*b + offset;                         % Linear predictor
        mu = exp(eta);                              % Mean

        del = abs(bold - b);
    end
    
    % Chi^2 dampening of alpha.
    disp = sum( ((y - mu).^2)./(mu + alpha*(mu.^2)) )/df; % Dispersion statistic
    olda = alpha;
    alpha = disp*alpha;                                   % Update for alpha
    adel = abs(alpha - olda);
    
    % Display an update if trace switch is on.
    if trace
        fprintf('%0.4f\t', [alpha, b', disp]);
        fprintf('\n');
    end
end

stats.alpha = alpha;
stats.b = b;
stats.disp = disp;
    
    
    function a = getVararg(v, param)
        ind = find(strcmpi(v, param)) + 1;
        if isempty(ind)
            a = [];
        else
            a = v{ind};
        end
    end

        
end

Contact us