Code covered by the BSD License  

Highlights from
Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

by

 

16 Sep 2011 (Updated )

My dissertation for the MSc in Finance & Economics from Warwick Business School

realized_tripv(data,type,scheme,interval,nsamples,threshold)
function [tripv,tripvss,dates] = realized_tripv(data,type,scheme,interval,nsamples,threshold)

% REALIZED_TRIPV Estimate simple and threshold Tri Power Variation (TriPV) and subsampled TriPV
%
%   ... = REALIZED_TRIPV(DATA,TYPE,SCHEME,INTERVAL, [NSAMPLES],[THRESHOLD])
%
%       DATA        is an m by 2 single/double matrix:
%                       - column 1      increasing serial dates
%                       - column 2      prices
%
%       TYPE        is a string indicating how data is sampled.
%                       - 'CalendarTime'    sampling on calendar time forms a
%                                         regular intraday grid every INTERVAL
%                                         seconds. The timestamps of the
%                                         observations do not necessary fall on
%                                         the gridpoints and the datapoints
%                                         are selected according to the SCHEME.
%                       - 'BusinessTime'    sampling on business time forms a
%                                         regular intraday grid every INTERVAL
%                                         ticks/trades. Grid-points fall exacly
%                                         on observations rather than on
%                                         timestamps.
%                       - 'FixedTime'       sample at specific points in time.
%                                         When using 'FixedTime', INTERVAL must
%                                         be a vector of values between [0 1].
%
%       SCHEME      is a string indicating how the datapoints corresponding to
%                   the gridpoints are selected.
%
%                   'CalendarTime' and 'FixedTime' schemes:
%                       - 'First'       first observation after the previous
%                                     gridpoint.
%                       - 'Last'        last observation after the previous
%                                     gridpoint.
%                       - 'Min'         observation with minimum price in the
%                                     interval (previous actual] gridpoint.
%                       - 'Max'         observation with maximum price in the
%                                     interval (previous actual] gridpoint.
%                       - 'Previous'    last available observation up to the
%                                     actual gridpoint (aka last price
%                                     interpolation).
%                       - 'Next'        first available observation after the
%                                     actual gridpoint (aka first price
%                                     interpolation).
%                       - 'Linear'      linear interpolation between datapoints
%                                     selected with 'Previous' and 'Next'.
%                       - 'Nearest'     closest datapoint to the gridpoint.
%                       - 'Uniform'     daily first and last observations are
%                                     included by default and the remaining
%                                     INTERVAL-2 points are selected as
%                                     'Nearest' from a uniformly spread grid.
%                                     Only for 'CalendarTime'.
%
%                   'BusinessTime' schemes:
%                       - 'Standard'    the gridpoints fall exactly on the
%                                     datapoints.
%                       - 'Uniform'     daily first and last observations are
%                                     included by default and the remaining
%                                     INTERVAL-2 points intervealed with equal
%                                     equal amount of trades.
%
%       INTERVAL    is a scalar integer value between [1 86400] indicating the
%                   frequency of sampling in seconds. If TYPE 'FixedTime'
%                   is used, INTERVAL should be a single/double vector sorted
%                   in ascending order. The vector can consist of values
%                   between [0 1] representing the intraday points in time of
%                   a fixed grid that will be replicated for all days.
%                   Otherways it should contain serial dates in which case the
%                   grid is limited to the values supplied.
%                   Note: 1 second = 1/86400. 18:39 = (3600* 18+39 *60)/86400.
%
%       NSAMPLES    [optional, default = empty] is a positive integer value
%                   indicating how many subsampled TriPVs (TriPVSS) to average with
%                   the original TriPV. If empty, TriPVSS will be empty if assigned.
%                   The TriPVSS differs from the TriPV by the starting point of
%                   the grid. The starting points are uniformly spaced between
%                   the first and second gridpoint of the TriPV.
%                   Example:  NSAMPLES = 4; INTERVAL = 300;
%                             If the day starts at 8:00 then the TriPVs gridpoint
%                             is [8:00 8:05 ...]. Then the first gridpoint for
%                             1st, 2nd, 3rd and 4th  TriPVSS will be respectively
%                             at 8:01, 8:02, 8:03 and 8:04.
%                   Note: using NSAMPLES with one output returns the 
%                         subsampled version of realized measure in place of
%                         the original one.
%
%       THRESHOLD   [optional, default = false] logical true/false (or 0/1)
%                   indicating if the to compute the threshold version of 
%                   the TriPV. 
%
%
%   [TRIPV,TRIPVSS,DATES] = REALIZED_TRIPV...
%
%       TRIPV     double vector of daily tripower variations.
%               The threshold tripower variation is computed if TRESHOLD is true.
%       TRIPVSS   double vector of daily subsampled tripower variations.
%               The threshold tripower variation is computed if TRESHOLD is true.
%               [Empty] if NSAMPLES was not supplied.
%       DATES   vector of daily serial dates
%
% Examples:
%
%  See also

% Author: Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. In-between compatibility is assumed.
% 13 sep 2011 - Created

% Ninput
error(nargchk(4,6,nargin))

% Nsamples
if nargin > 4
    if isempty(nsamples)
        tripvss   = [];
    elseif ~isnumeric(nsamples) && ~isscalar(nsamples) &&...
       mod(nsamples,1) > 0 && nsamples <= 0
        error('realized_tripv:nsamples','NSAMPLES should be a positive integer value.')
    end
else
    nsamples = [];
end

% Threshold
if nargin == 6
    if threshold == 0 || threshold == 1
        threshold = logical(threshold); 
    elseif ~islogical(threshold)
        error('realized_tripv:iscorrected','ISCORRECTED should be logical or one of 0, 1.')
    end
else
    threshold = false; 
end

% Engine starts here
% -------------------------------------------------------------------------

% Filter prices: checks on the first 4 inputs are issued by fltprice
[fltdata,opt] = fltprice(data,type,scheme,interval);

% Last observation per day
datedf = diff(fix(fltdata(:,1)));

% Which obs belongs to which day
subs = cumsum([1; logical(datedf(3:end))]);

% Returns
fltdata = [fltdata(2:end,1), diff(log(fltdata(:,2)))];
last    = find(datedf);
fltdata(last,:) = [];
subs   (last,:) = [];

last = uint32([0; last-(1:size(last,1)).'; size(fltdata,1)]);

% If threshold filter returns with local variance. Take |ret|^(4/3) in any case
if threshold
    % Bandwidth for local variance
    k2   = fix(diff(last)/4);
    fltdata(:,2) = abs(fltret(fltdata(:,2),last,k2)).^(4/3);
else
    fltdata(:,2) = abs(fltdata(:,2)).^(4/3);
end
fltdata = [fltdata(3:end,1) fltdata(1:end-2,2) fltdata(2:end-1,2) fltdata(3:end,2)];

% Eliminate intersections of firstObs nextDay with lastObs prevDay
fltdata([last(2:end-1); last(2:end-1)-1],:) = [];
subs   ([last(2:end-1); last(2:end  )-1],:) = [];

% TriPV
cons  = (2^(2/3)*gamma(7/6)/gamma(0.5))^(-3) * accumarray(subs,1);
tripv =  cons .* accumarray(subs,prod(fltdata(:,2:end),2));

% TriPVSS
if ~isempty(nsamples)
    tripvss = zeros(size(tripv,1),nsamples);
    
    switch opt.type
        case 'BusinessTime'
            % Don't include an increasing part of the beginning of the day
            if interval <= nsamples + 1
                warning('realized_var:businessSubSampling','Ensure INTERVAL > NSAMPLES +1: one (or more) subsampled RV coincides with the original RV.')
            end
            % Offset of the initial gridpoint for the subsamples
            df = fix(interval/(nsamples+1));
            % Last observation of each day
            last = [0; find(diff(fix(data(:,1)))); size(data,1)];
            for c = 1:nsamples
                tripvss(:,c) = realized_tripv(data(mcolon(last(1:end-1)+df*c+1,last(2:end),1),:),...
                                               type,scheme,interval,[],threshold);
            end
            
        case 'CalendarTime'
            % Don't include an increasing part of the beginning of the day
            df   = interval/(nsamples+1)/86400;
            last = [0; find(diff(fix(data(:,1)))); size(data,1)];
            ii   = [data(last(1:end-1)+1,1)...
                data(last(2:end),1) + 1/(86400*1000)-eps];
            for c = 1:nsamples
                fxdinter    = mcolon(ii(:,1)+df*c, ii(:,2), interval/86400);
                tripvss(:,c) = realized_tripv(data,'FixedTime',scheme,fxdinter,[],threshold);
            end
            
        case 'FixedTime'
            % Shift the grid
            df = diff(interval([1 2]))/(nsamples+1);
            for c = 1:nsamples
                tripvss(:,c) = realized_tripv(data,type,scheme,interval + df*c,[],threshold);
            end
    end
    % Average TBPV with TBPVSS
    tripvss = mean([tripv tripvss],2);
    if nargout == 1
        tripv = tripvss;
    end
end

% Dates
if nargout == 3
    dates = fix(fltdata(cumsum(accumarray(subs,1)),1));
end
end

Contact us