Code covered by the BSD License  

Highlights from
Dynamic Pooled Forecasting

image thumbnail
from Dynamic Pooled Forecasting by Semin Ibisevic
Dynamic combined forecasting and dynamic principal component regression frameworks.

pcafc(X, y, m, J, Zbin, rol, lag, distr)
function [Yp, Ep, coeff, score, latent] = pcafc(X, y, m, J, Zbin, rol, lag, distr)
%PCFAC: Make forecasts with the principal components
%   [Yp, Ep, ...] = PCAFC(X, Y, M, ...) makes forecasts by taking the 
%   principal components of X as predictors and y as the response variable.
%   The estimates of the coefficients are obtained through GLMFIT.
%
%   'X' is a matrix with rows corresponding to observations, and columns to
%   predictor variables. GLMFIT automatically includes a constant term
%   in the model (do not enter a column of ones directly into 'X').
%   'y' is a vector of response values. 'm' refers to the start of the
%   hold-out period and is assumed to be sufficiently larger than 1, to 
%   avoid for unnecessary large estimation errors.
%
%   [Yp, Ep, ...] = PCAFC(X, Y, M, 'val1', 'val2', 'val3', 'val4', ...
%   'val5', 'val6') allows you to to specify additional parameter values to
%   control the model fit and forecasts. Parameter values are:
% 
%       'J' - the number of principal components to include in the pooled 
%       regression. J is set equal to 1 as default.
%
%       'Zbin' - defines whether de principal components should be obtained 
%       through taking the eigenvectors of the correlation matrix of X 
%       accessed through 'Zbin' = 'corr', or on the covariance matrix, set 
%       'Zbin' equal to 'cov'. 
%
%       'rol' - length of the rolling window. In case 'rol' is set equal to 
%       zero, the corresponding individual forecasts and pooled regression 
%       are made through an expanding window framework. Important, it is
%       obligated to set rol > m and it is advisably to set rol >> m.
%
%       'lag' - the number of lags in the regression Y(t) = c1 +
%       c2*X(t-lag)
%
%       'distr' -   Acceptable values for DISTR are 'normal', 'binomial', 
%       'poisson', 'gamma', and 'inverse gaussian'.  The distribution is 
%       fit using the canonical link corresponding to DISTR. See also the
%       GLMFIT function for further explanation.
%
%   GLMFIT treats NaNs in 'X' and 'y' as missing data, and removes the
%   corresponding observations.
%
%   [YIV, EIV, ...] = PCAFC(...) returns the n-by-1 vectors:
%       'Yp'      the forecasts on the base of the J principal components
%       of the explanatory factors.
%       'Ep'      the realized prediction errors
%   
%   [YIV, EIV, COEFF] = PCAFC(...) returns the latest principal component 
%   coefficients per time t, also known as loadings. 
%   Rows of X correspond to observations, columns to variables. 
%   COEFF is a p-by-p-by-t matrix, each column containing coefficients 
%   for one principal component. The columns are in order of decreasing 
%   component variance.
%
%   [YIV, EIV, COEFF, SCORE] = PCAFC(...) returns SCORE, the latest (t)
%   principal component scores; that is, the representation of X in the 
%   principal component space. Rows of SCORE correspond to the components
%   at every point time t.
%
%   [YIV, EIV, COEFF, SCORE, LATENT] = PCAFC(...) returns LATENT, a vector
%   containing the eigenvalues of the covariance matrix of X at every point 
%   of the time t.

%  Semin Ibisevic (2011)
%  $Date: 6/10/2011 20:22:34 $

if nargin < 3
    error('stats:indivfc:TooFewInputs','At least three arguments are required');
end

if nargin < 4 || isempty(J), J = 1; end
if nargin < 5 || isempty(Zbin), Zbin = 'corr'; end
if nargin < 6 || isempty(rol), rol = 0; end
if nargin < 7 || isempty(lag), lag = 1; end
if nargin < 8 || isempty(distr), distr = 'normal'; end

if lag < 0
    error('lag:indivfc:InvalidInput','Lag cannot be negative');
end

if rol > m
    error('rol:indivfc:InvalidInput','m should be always larger than rol');
end


% Initialization
n = size(y,1);
k = size(X,2);
if size(X,1) ~= size(y,1)
    error('y:indivfc:Dimension','The dimensions of X and y do not agree');
end

if isequal(Zbin,'corr') || isequal(Zbin,'cov')
    % correctly specified
else
    error('Zbin:combinefc:InvalidInput','Zbin can be only applied on cov or corr');
end

% retrieve the dynamic pca's
coeff = zeros(k,k,n);
score = zeros(n,J);
latent = zeros(n,k);

iter = 1;
for t = 1:n
    Xtemp = X( iter:t , :);
    switch Zbin
        case 'corr'
            [coeffAll,scoreAll,latentAll] = princomp(zscore(Xtemp));
        case 'cov'
            [coeffAll,scoreAll,latentAll] = princomp(Xtemp);
    end
    coeff(:,:,t) = coeffAll;
    score(t,1:J) = scoreAll(t,1:J);
    latent(t,:) = latentAll;
    
    % in case of a rolling window
    if rol > 0
        iter = iter+1;
    end
    
end

% get the individual forecasts
Yp = zeros(n,1);
Ep = zeros(n,1);
iter = rol+1;
for t=m:n
        Xtemp = score( iter:t-lag , :);
        Ytemp = y( iter+lag:t );
        
        b = glmfit(Xtemp,Ytemp,distr);
        
        Yp(t+1) = [1, score(t,:)]*b;
        
        if t < n
            Ep(t+1) = y(t+1) - Yp(t+1);
        end
    
    % in case of a rolling window
    if rol > 0
        iter = iter+1;
    end
end

Contact us