function stats = nbreg( x, y, varargin )
% nbreg - Negative binomial regression with estimation of dispersion
% parameter using Chi^2 dampening.
%
% Author: Surojit Biswas
% Email: sbiswas@live.unc.edu
% Institution: The University of North Carolina at Chapel Hill
%
% References:
% [1] Hardin, J.W. and Hilbe, J.M. Generalized Linear Models and
% Extensions. 3rd Ed. p. 251-254.
%
% INPUT:
% x = [n x p] Design matrix. Rows are observations, Columns are
% variables. (required)*
% y = [n x 1] Response matrix. Rows are observations. (required)
%
% The following inputs are optional. For optional inputs the function call
% should include the input name as a string and then the input itself.
% For example, nbreg(x, y, 'b', myStartingRegCoeffs)
%
% alpha = Scalar value. Starting estimate for dispersion parameter.
% (optional, DEFAULT = 0.01)
% b = [p x 1] starting vector of regression coefficients. (optional,
% DEFAULT = estimated with poisson regression)
% offset = [n x 1] offset vector. (optional, default = 0)**
% trace = logical value indicating if the algorithm's progress should be
% printed to the screen. (optional, default = false)
%
% OUTPUT:
% stats.b = [p x 1] vector of estimated regression coefficients.
% stats.alpha = Estimated dispersion parameter.
% stats.disp = Estimated dispersion statistic (Chi^2/df). Should be
% close to 1 if the algorithm properly converged.
%
% NOTES:
% * Supply your own column of ones for an intercept term.
% ** Make sure you have taken the log of the offset vector if necessary.
% If one wants to include an offset to control for exposure time/fit a rate
% model, then typically the offset vector should log-ed before it's
% supplied as an input. The regression routine assumes that E[y|x] ~ X*b +
% log(offset)
n = size(y,1);
p = size(x,2);
assert(n == size(x,1), 'Dimension mismatch between x and y');
df = n - p;
%% Get variable arguments
% Offset.
offset = getVararg(varargin, 'offset');
if isempty(offset)
offset = 0;
end
% Regression coefficients.
b = getVararg(varargin, 'b');
if isempty(b)
if offset == 0
b = glmfit(x, y, 'poisson', 'Constant', 'off');
else
b = glmfit(x, y, 'poisson', 'Constant', 'off', 'offset', offset);
end
end
% Alpha
alpha = getVararg(varargin, 'alpha');
if isempty(alpha)
alpha = 0.01;
end
% Trace
trace = getVararg(varargin, 'trace');
if isempty(trace)
trace = false;
end
if trace
fprintf('alpha\t');
for i = 1 : p
fprintf('b_%0.0f\t', i)
end
fprintf('disp\n');
end
%% Optimize
eps = 1e-4;
REG = 1e-10*eye(p);
LSOPTS.SYM = true;
LSOPTS.POSDEF = true;
eta = x*b + offset;
mu = exp(eta);
del = Inf;
adel = Inf;
if trace
fprintf('%0.4f\t', [alpha, b', sum( ((y - mu).^2)./(mu + alpha*(mu.^2)) )/df]);
fprintf('\n');
end
while adel > eps
while any(del > eps)
w = repmat(mu./(1 + alpha*mu), 1, p)'; % Weight matrix
z = (eta + (y - mu)./mu) - offset; % Working response
xtw = x'.*w; % X'*W
bold = b;
b = linsolve(xtw*x + REG, xtw*z, LSOPTS); % Update
eta = x*b + offset; % Linear predictor
mu = exp(eta); % Mean
del = abs(bold - b);
end
% Chi^2 dampening of alpha.
disp = sum( ((y - mu).^2)./(mu + alpha*(mu.^2)) )/df; % Dispersion statistic
olda = alpha;
alpha = disp*alpha; % Update for alpha
adel = abs(alpha - olda);
% Display an update if trace switch is on.
if trace
fprintf('%0.4f\t', [alpha, b', disp]);
fprintf('\n');
end
end
stats.alpha = alpha;
stats.b = b;
stats.disp = disp;
function a = getVararg(v, param)
ind = find(strcmpi(v, param)) + 1;
if isempty(ind)
a = [];
else
a = v{ind};
end
end
end