Code covered by the BSD License  

Highlights from
Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

by

 

16 Sep 2011 (Updated )

My dissertation for the MSc in Finance & Economics from Warwick Business School

realized_var(data,type,scheme,interval,nsamples)
function [rv,rvss,dates] = realized_var(data,type,scheme,interval,nsamples)

% REALIZED_VAR Estimate Realized Variance (RV) and subsampled RV
%
%   ... = REALIZED_VAR(DATA,TYPE,SCHEME,INTERVAL, [NSAMPLES])
%
%       DATA     is an m by 2 single/double matrix:
%                   - column 1      increasing serial dates
%                   - column 2      prices
%
%       TYPE     is a string indicating how data is sampled.
%                   - 'CalendarTime'    sampling on calendar time forms a
%                                     regular intraday grid every INTERVAL
%                                     seconds. The timestamps of the
%                                     observations do not necessary fall on
%                                     the gridpoints and the datapoints
%                                     are selected according to the SCHEME.
%                   - 'BusinessTime'    sampling on business time forms a
%                                     regular intraday grid every INTERVAL
%                                     ticks/trades. Grid-points fall exacly
%                                     on observations rather than on
%                                     timestamps.
%                   - 'FixedTime'       sample at specific points in time.
%                                     When using 'FixedTime', INTERVAL must
%                                     be a vector of values between [0 1].
%
%       SCHEME   is a string indicating how the datapoints corresponding to
%                the gridpoints are selected.
%
%                'CalendarTime' and 'FixedTime' schemes:
%                   - 'First'       first observation after the previous
%                                 gridpoint.
%                   - 'Last'        last observation after the previous
%                                 gridpoint.
%                   - 'Min'         observation with minimum price in the
%                                 interval (previous actual] gridpoint.
%                   - 'Max'         observation with maximum price in the
%                                 interval (previous actual] gridpoint.
%                   - 'Previous'    last available observation up to the
%                                 actual gridpoint (aka last price
%                                 interpolation).
%                   - 'Next'        first available observation after the
%                                 actual gridpoint (aka first price
%                                 interpolation).
%                   - 'Linear'      linear interpolation between datapoints
%                                 selected with 'Previous' and 'Next'.
%                   - 'Nearest'     closest datapoint to the gridpoint.
%                   - 'Uniform'     daily first and last observations are
%                                 included by default and the remaining
%                                 INTERVAL-2 points are selected as
%                                 'Nearest' from a uniformly spread grid.
%                                 Only for 'CalendarTime'.
%
%                'BusinessTime' schemes:
%                   - 'Standard'    the gridpoints fall exactly on the
%                                 datapoints.
%                   - 'Uniform'     daily first and last observations are
%                                 included by default and the remaining
%                                 INTERVAL-2 points intervealed with equal
%                                 equal amount of trades.
%
%       INTERVAL is a scalar integer value between [1 86400] indicating the
%                frequency of sampling in seconds. If TYPE 'FixedTime' 
%                is used, INTERVAL should be a single/double vector sorted 
%                in ascending order. The vector can consist of values 
%                between [0 1] representing the intraday points in time of 
%                a fixed grid that will be replicated for all days.
%                Otherways it should contain serial dates in which case the
%                grid is limited to the values supplied.
%                Note: 1 second = 1/86400. 18:39 = (3600* 18+39 *60)/86400.
%
%       NSAMPLES [optional, default = empty] is a positive integer value
%                indicating how many subsampled RVs (RVSS) to average with 
%                the original RV. If empty, RVSS will be empty if assigned.
%                The RVSS differs from the RV by the starting point of the grid.
%                The starting points are uniformly spaced between the first
%                and second gridpoint of the RV. 
%                Example:  NSAMPLES = 4; INTERVAL = 300; 
%                          If the day starts at 8:00 then the RVs gridpoint
%                          is [8:00 8:05 ...]. Then the first gridpoint for
%                          1st, 2nd, 3rd and 4th  RVSS will be respectively
%                          at 8:01, 8:02, 8:03 and 8:04.
%                Note: using NSAMPLES with one output returns the 
%                      subsampled version of realized measure in place of
%                      the original one.
%
%
%   [RV,RVSS,DATES] = REALIZED_VAR...   
%
%       RV     double vector of daily realized variances.
%       RVSS   double vector of daily subsampled realized variances. 
%              [Empty] if NSAMPLES was not supplied.
%       DATES  vector of daily serial dates
%
% Examples:
% 
%  See also 

% Based on K. Sheppard's REALIZED_VARIANCE - MFE toolbox v.3 (lastupdate 15 Mar 2011)
% Author: Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. In-between compatibility is assumed.
% 31 aug 2011 - Created

% Ninput
error(nargchk(4,5,nargin))

% Nsamples
if nargin == 5
    if isempty(nsamples) || ~isnumeric(nsamples) && ~isscalar(nsamples) &&...
       mod(nsamples,1) > 0 && nsamples <= 0
        error('realized_var:nsamples','NSAMPLES should be a positive integer value.')
    end
else
    nsamples = [];
    rvss     = [];
end

% Engine starts here
% -------------------------------------------------------------------------

% Filter prices: checks on the first 4 inputs are issued by fltprice
[fltdata,opt] = fltprice(data,type,scheme,interval);

% Last observation per day
datedf = diff(fix(fltdata(:,1)));
last   = find(datedf);

% Which obs belongs to which day
subs = cumsum([1; logical(datedf(2:end))]);

% Returns
fltdata = [fltdata(2:end,1), diff(log(fltdata(:,2)))];

% Eliminate first obs next day with last obs prev day 
subs(last)      = [];
fltdata(last,:) = [];

% RV
rv = accumarray(subs,fltdata(:,2).^2);

% RVSS
if ~isempty(nsamples)
    rvss = zeros(size(rv,1),nsamples);
    
    switch opt.type
        case 'BusinessTime'
            % Don't include an increasing part of the beginning of the day
            if interval <= nsamples + 1
                warning('realized_var:businessSubSampling','Ensure INTERVAL > NSAMPLES +1: one (or more) subsampled RV coincides with the original RV.')
            end
            % Offset of the initial gridpoint for the subsamples
            df = fix(interval/(nsamples+1));
            % Last observation of each day
            last = [0; find(diff(fix(data(:,1)))); size(data,1)];
            for c = 1:nsamples
                rvss(:,c) = realized_var(data(mcolon(last(1:end-1)+df*c+1,last(2:end),1),:),...
                                         type,scheme,interval);
            end
            
        case 'CalendarTime'
            % Don't include an increasing part of the beginning of the day
            df   = interval/(nsamples+1)/86400;
            last = [0; find(diff(fix(data(:,1)))); size(data,1)];
            ii   = [data(last(1:end-1)+1,1)...
                    data(last(2:end),1) + 1/(86400*1000)-eps];
            for c = 1:nsamples
                fxdinter  = mcolon(ii(:,1)+df*c, ii(:,2), interval/86400);
                rvss(:,c) = realized_var(data,'FixedTime',scheme,fxdinter);
            end
            
        case 'FixedTime'
            % Shift the grid 
            df = diff(interval([1 2]))/(nsamples+1);
            for c = 1:nsamples
                rvss(:,c,:) = realized_var(data,type,scheme,interval + df*c);
            end
    end
    % Average RV with RVSS
    rvss = mean([rv rvss],2);
    if nargout == 1
        rv = rvss;
    end
end

% Dates
if nargout == 3
    dates = fix(fltdata(cumsum(accumarray(subs,1)),1));
end
end

Contact us