Code covered by the BSD License  

Highlights from
Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

by

 

16 Sep 2011 (Updated )

My dissertation for the MSc in Finance & Economics from Warwick Business School

realized_tbpv(data,type,scheme,interval,nsamples,iscorrected)
function [tbpv,tbpvss,dates] = realized_tbpv(data,type,scheme,interval,nsamples,iscorrected)

% REALIZED_TBPV Estimate Threshold Bipower Variation (TBPV) and subsampled TBPV and their corrected versions
%
%   ... = REALIZED_TBPV(DATA,TYPE,SCHEME,INTERVAL, [NSAMPLES])
%
%       DATA        is an m by 2 single/double matrix:
%                       - column 1      increasing serial dates
%                       - column 2      prices
%
%       TYPE        is a string indicating how data is sampled.
%                       - 'CalendarTime'    sampling on calendar time forms a
%                                         regular intraday grid every INTERVAL
%                                         seconds. The timestamps of the
%                                         observations do not necessary fall on
%                                         the gridpoints and the datapoints
%                                         are selected according to the SCHEME.
%                       - 'BusinessTime'    sampling on business time forms a
%                                         regular intraday grid every INTERVAL
%                                         ticks/trades. Grid-points fall exacly
%                                         on observations rather than on
%                                         timestamps.
%                       - 'FixedTime'       sample at specific points in time.
%                                         When using 'FixedTime', INTERVAL must
%                                         be a vector of values between [0 1].
%
%       SCHEME      is a string indicating how the datapoints corresponding to
%                   the gridpoints are selected.
%
%                   'CalendarTime' and 'FixedTime' schemes:
%                       - 'First'       first observation after the previous
%                                     gridpoint.
%                       - 'Last'        last observation after the previous
%                                     gridpoint.
%                       - 'Min'         observation with minimum price in the
%                                     interval (previous actual] gridpoint.
%                       - 'Max'         observation with maximum price in the
%                                     interval (previous actual] gridpoint.
%                       - 'Previous'    last available observation up to the
%                                     actual gridpoint (aka last price
%                                     interpolation).
%                       - 'Next'        first available observation after the
%                                     actual gridpoint (aka first price
%                                     interpolation).
%                       - 'Linear'      linear interpolation between datapoints
%                                     selected with 'Previous' and 'Next'.
%                       - 'Nearest'     closest datapoint to the gridpoint.
%                       - 'Uniform'     daily first and last observations are
%                                     included by default and the remaining
%                                     INTERVAL-2 points are selected as
%                                     'Nearest' from a uniformly spread grid.
%                                     Only for 'CalendarTime'.
%
%                   'BusinessTime' schemes:
%                       - 'Standard'    the gridpoints fall exactly on the
%                                     datapoints.
%                       - 'Uniform'     daily first and last observations are
%                                     included by default and the remaining
%                                     INTERVAL-2 points intervealed with equal
%                                     equal amount of trades.
%
%       INTERVAL    is a scalar integer value between [1 86400] indicating the
%                   frequency of sampling in seconds. If TYPE 'FixedTime'
%                   is used, INTERVAL should be a single/double vector sorted
%                   in ascending order. The vector can consist of values
%                   between [0 1] representing the intraday points in time of
%                   a fixed grid that will be replicated for all days.
%                   Otherways it should contain serial dates in which case the
%                   grid is limited to the values supplied.
%                   Note: 1 second = 1/86400. 18:39 = (3600* 18+39 *60)/86400.
%
%       NSAMPLES    [optional, default = empty] is a positive integer value
%                   indicating how many subsampled TBPVs (TBPVSS) to average with
%                   the original TBPV. If empty, TBPVSS will be empty if assigned.
%                   The TBPVSS differs from the TBPV by the starting point of
%                   the grid. The starting points are uniformly spaced between
%                   the first and second gridpoint of the TBPV.
%                   Example:  NSAMPLES = 4; INTERVAL = 300;
%                             If the day starts at 8:00 then the TBPVs gridpoint
%                             is [8:00 8:05 ...]. Then the first gridpoint for
%                             1st, 2nd, 3rd and 4th  TBPVSS will be respectively
%                             at 8:01, 8:02, 8:03 and 8:04.
%                   Note: using NSAMPLES with one output returns the 
%                         subsampled version of realized measure in place of
%                         the original one.
%
%       !!WARNING!! doesn't work
%       ISCORRECTED [optional, default = false] logical true/false (or 0/1)
%                   indicating if the corrected version of the TBPV should
%                   be used. Returns exceeding the threshold are replaced
%                   with their expected value.
%
%
%   [TBPV,TBPVSS,DATES] = REALIZED_TBPV...
%
%       TBPV   double vector of daily threshold bipower variations.
%              The corrected version is computed if ISCORRECTED is true.
%       TBPVSS double vector of daily subsampled threshold bipower variations.
%              The corrected version is computed if ISCORRECTED is true.
%              [Empty] if NSAMPLES was not supplied.
%       DATES  vector of daily serial dates
%
% Examples:
%
%  See also

% Author: Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. In-between compatibility is assumed.
% 02 sep 2011 - Created

% Ninput
error(nargchk(4,6,nargin))

% Nsamples
if nargin > 4
    if isempty(nsamples)
        tbpvss   = [];
    elseif ~isnumeric(nsamples) && ~isscalar(nsamples) &&...
       mod(nsamples,1) > 0 && nsamples <= 0
        error('realized_tbpv:nsamples','NSAMPLES should be a positive integer value.')
    end
else
    nsamples = [];
end

% Iscorrected
if nargin == 6
    if iscorrected == 0 || iscorrected == 1
        iscorrected = logical(iscorrected); %#ok:
    elseif ~islogical(iscorrected)
        error('realized_tbpv:iscorrected','ISCORRECTED should be logical or one of 0, 1.')
    end
else
    iscorrected = false; %#ok:
end
iscorrected = false; %#ok: NOTE doesn't work the corrected version!

% Engine starts here
% -------------------------------------------------------------------------

% Filter prices: checks on the first 4 inputs are issued by fltprice
[fltdata,opt] = fltprice(data,type,scheme,interval);

% Last observation per day
datedf = diff(fix(fltdata(:,1)));

% Which obs belongs to which day
subs = cumsum([1; logical(datedf(3:end))]);

% Returns
fltdata = [fltdata(2:end,1), diff(log(fltdata(:,2)))];
last    = find(datedf);
fltdata(last,:) = [];
subs   (last,:) = [];

% Bandwidth for local variance
last = uint32([0; last-(1:size(last,1)).'; size(fltdata,1)]);
k2   = fix(diff(last)/4);

% Filter returns with local variance and take |ret| (absolute)
fltdata(:,2) = abs(fltret(fltdata(:,2),last,k2));
fltdata      = [fltdata(2:end,1) fltdata(1:end-1,2) fltdata(2:end,2)];

% Eliminate intersections of firstObs nextDay with lastObs prevDay
fltdata(last(2:end-1),:) = [];
subs   (last(2:end-1),:) = [];

% TBPV
tbpv = pi/2 * accumarray(subs,prod(fltdata(:,2:3),2));

% TBPVSS
if ~isempty(nsamples)
    tbpvss = zeros(size(tbpv,1),nsamples);
    
    switch opt.type
        case 'BusinessTime'
            % Don't include an increasing part of the beginning of the day
            if interval <= nsamples + 1
                warning('realized_var:businessSubSampling','Ensure INTERVAL > NSAMPLES +1: one (or more) subsampled RV coincides with the original RV.')
            end
            % Offset of the initial gridpoint for the subsamples
            df = fix(interval/(nsamples+1));
            % Last observation of each day
            last = [0; find(diff(fix(data(:,1)))); size(data,1)];
            for c = 1:nsamples
                tbpvss(:,c) = realized_tbpv(data(mcolon(last(1:end-1)+df*c+1,last(2:end),1),:),...
                                            type,scheme,interval);
            end
            
        case 'CalendarTime'
            % Don't include an increasing part of the beginning of the day
            df   = interval/(nsamples+1)/86400;
            last = [0; find(diff(fix(data(:,1)))); size(data,1)];
            ii   = [data(last(1:end-1)+1,1)...
                data(last(2:end),1) + 1/(86400*1000)-eps];
            for c = 1:nsamples
                fxdinter    = mcolon(ii(:,1)+df*c, ii(:,2), interval/86400);
                tbpvss(:,c) = realized_tbpv(data,'FixedTime',scheme,fxdinter);
            end
            
        case 'FixedTime'
            % Shift the grid
            df = diff(interval([1 2]))/(nsamples+1);
            for c = 1:nsamples
                tbpvss(:,c) = realized_tbpv(data,type,scheme,interval + df*c);
            end
    end
    % Average TBPV with TBPVSS
    tbpvss = mean([tbpv tbpvss],2);
    if nargout == 1
        tbpv = tbpvss;
    end
end

% Dates
if nargout == 3
    dates = fix(fltdata(cumsum(accumarray(subs,1)),1));
end
end

Contact us