image thumbnail
from Fit GLM with quadratic penalty by Patrick Mineault
Fits GLM with a quadratic penalty, determines hyperparams through cross-validation or evidence

cvglmfitqp(y,X,qf,folds,opts)
function results = cvglmfitqp(y,X,qf,folds,opts)
    %[thefit] = cvglmfitqp(y,X,qf,folds,opts)
    %
    % Optimizes a GLM model through MAP where the internal variable is generated through:
    % r = X*w
    %
    % Where there is a quadratic penalty on the model parameters:
    %
    % p(w) = 1/2*lambda*w'*qf*w
    %
    % The options are the same as with glmfitqp. folds is a length(y) X
    % kfold matrix which specifies how data is distributed across
    % validation folds. It can be generated by getcvfolds.
    %
    % lambda is optimized by cross-validation. Guesses for lambda are given
    % by a trust region second-order polynomial method.
    %
    % Example use:
    % %%
    % %Figure out optimal strength of prior through cross validation
    % %Assume smoothness of the model parameters
    % qf = blkdiag(qfsmooth1D(16),.01);
    % rg = (-7.5:7.5)';
    % 
    % %Simulate a model with w = Gabor function
    % w = exp(-rg.^2/3^2).*sin(rg*2*pi/6);
    % nobs = 150;
    % X = [randn(nobs,length(w)),ones(nobs,1)];
    % r = 3*X*[w;.01];
    % 
    % %output is binary -> logistic regression
    % r = binornd(1,1./(1+exp(-r)));
    % 
    % %Set up 5-fold CV
    % folds = getcvfolds(length(r),5,1001);
    % 
    % %Fit the data
    % clear opts
    % opts.family = 'binomlogit';
    % opts.lambda0 = 1;
    % results = cvglmfitqp(r,X,qf,folds,opts);
    % 
    % plot(results.w(1:end-1))
    %
    % See also: glmfitqp, getcvfolds

    defaults.precision = .05;
    defaults.stepsize = 5;
    defaults.minDiffSecondRound = 3;
    defaults.maxiter  = 15;
    defaults.familyextra = 1;
    defaults.family = 'binomlogit';
    defaults.baseline = zeros(length(y),1);
    defaults.lambda0 = mean(sum(X.^2))*.01; %finds a reasonable range for lambda0
    defaults.parallel = 0;
    defaults.P0 = sparse(size(X,2),size(X,2));
    defaults.getH = 0;
    defaults.weights = ones(size(y));
    defaults.Display = 'iter';
    
    opts = setdefaults(opts,defaults);
    
    if opts.parallel && matlabpool('size') < 1
        warning('cvglmfitqp:parallel0','matlabpool open must be called beforehand to enable parallel processing on folds');
    end
    
    subopts = rmfield(opts,{'stepsize','maxiter','lambda0','parallel','precision','minDiffSecondRound','P0'});
    
    results = crossValidate(y,(1:length(y))',opts.lambda0,...
                            @(y,I,oldfit,lambda,a) fitAFold(y,I,oldfit,lambda,a,qf,X,opts.P0,subopts),...
                            @(y,I,thefit       ) evalFold(y,I,thefit,X,subopts),...
                            folds,opts);
    results.w = results.finalfit.w;
    results.loglikelihood = results.finalfit.loglikelihood;
    results.logpenalty = results.finalfit.logpenalty;
end

function thefit = fitAFold(y,I,oldfit,lambda,isfinal,qf,X,P0,glmopts)
    X = X(I,:);
    glmopts.baseline = glmopts.baseline(I);
    glmopts.weights  = glmopts.weights(I);
    if ~isfinal
        glmopts.Display = 'off';
    end
    
    if ~isempty(oldfit)
        glmopts.w0 = oldfit.w;
    end

    thefit = glmfitqp(y,X,qf*lambda+P0,glmopts);
end

function cvll = evalFold(y,I,thefit,X,glmopts)
    cvll = evalGlmLikelihood(y,X(I,:),thefit.w,glmopts.baseline(I),glmopts.family,glmopts.familyextra);
end

Contact us at files@mathworks.com