Code covered by the BSD License  

Highlights from
Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

by

 

16 Sep 2011 (Updated )

My dissertation for the MSc in Finance & Economics from Warwick Business School

realized_bpv(data,type,scheme,interval,nsamples)
function [bpv,bpvss,dates] = realized_bpv(data,type,scheme,interval,nsamples)

% REALIZED_BPV Estimate Bipower Variation (BPV) and subsampled BPV
%
%   ... = REALIZED_BPV(DATA,TYPE,SCHEME,INTERVAL, [NSAMPLES])
%
%       DATA     is an m by 2 single/double matrix:
%                   - column 1      increasing serial dates
%                   - column 2      prices
%
%       TYPE     is a string indicating how data is sampled.
%                   - 'CalendarTime'    sampling on calendar time forms a
%                                     regular intraday grid every INTERVAL
%                                     seconds. The timestamps of the
%                                     observations do not necessary fall on
%                                     the gridpoints and the datapoints
%                                     are selected according to the SCHEME.
%                   - 'BusinessTime'    sampling on business time forms a
%                                     regular intraday grid every INTERVAL
%                                     ticks/trades. Grid-points fall exacly
%                                     on observations rather than on
%                                     timestamps.
%                   - 'FixedTime'       sample at specific points in time.
%                                     When using 'FixedTime', INTERVAL must
%                                     be a vector of values between [0 1].
%
%       SCHEME   is a string indicating how the datapoints corresponding to
%                the gridpoints are selected.
%
%                'CalendarTime' and 'FixedTime' schemes:
%                   - 'First'       first observation after the previous
%                                 gridpoint.
%                   - 'Last'        last observation after the previous
%                                 gridpoint.
%                   - 'Min'         observation with minimum price in the
%                                 interval (previous actual] gridpoint.
%                   - 'Max'         observation with maximum price in the
%                                 interval (previous actual] gridpoint.
%                   - 'Previous'    last available observation up to the
%                                 actual gridpoint (aka last price
%                                 interpolation).
%                   - 'Next'        first available observation after the
%                                 actual gridpoint (aka first price
%                                 interpolation).
%                   - 'Linear'      linear interpolation between datapoints
%                                 selected with 'Previous' and 'Next'.
%                   - 'Nearest'     closest datapoint to the gridpoint.
%                   - 'Uniform'     daily first and last observations are
%                                 included by default and the remaining
%                                 INTERVAL-2 points are selected as
%                                 'Nearest' from a uniformly spread grid.
%                                 Only for 'CalendarTime'.
%
%                'BusinessTime' schemes:
%                   - 'Standard'    the gridpoints fall exactly on the
%                                 datapoints.
%                   - 'Uniform'     daily first and last observations are
%                                 included by default and the remaining
%                                 INTERVAL-2 points intervealed with equal
%                                 equal amount of trades.
%
%       INTERVAL is a scalar integer value between [1 86400] indicating the
%                frequency of sampling in seconds. If TYPE 'FixedTime' 
%                is used, INTERVAL should be a single/double vector sorted 
%                in ascending order. The vector can consist of values 
%                between [0 1] representing the intraday points in time of 
%                a fixed grid that will be replicated for all days.
%                Otherways it should contain serial dates in which case the
%                grid is limited to the values supplied.
%                Note: 1 second = 1/86400. 18:39 = (3600* 18+39 *60)/86400.
%
%       NSAMPLES [optional, default = empty] is a positive integer value
%                indicating how many subsampled BPVs (BPVSS) to average with the 
%                original BPV. If empty, BPVSS will be empty if assigned.
%                The BPVSS differs from the BPV by the starting point of 
%                the grid. The starting points are uniformly spaced between
%                the first and second gridpoint of the BPV. 
%                Example:  NSAMPLES = 4; INTERVAL = 300; 
%                          If the day starts at 8:00 then the BPVs 
%                          gridpoint is [8:00 8:05 ...]. Then the first 
%                          gridpoint for 1st, 2nd, 3rd and 4th  BPVSS will 
%                          be respectively at 8:01, 8:02, 8:03 and 8:04.
%                Note: using NSAMPLES with one output returns the 
%                      subsampled version of realized measure in place of
%                      the original one.
%
%
%   [BPV,BPVSS,DATES] = REALIZED_BPV ...
%
%       BPV    double vector of daily bipower variations.
%       BPVSS  double vector of daily subsampled bipower variations. 
%              [Empty] if NSAMPLES was not supplied.
%       DATES  vector of daily serial dates
%
% Examples:
% 
%  See also 

% Based on K. Sheppard's REALIZED_BIPOWER_VARIATION - MFE toolbox v.3 (lastupdate 15 Mar 2011)
% Author: Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. In-between compatibility is assumed.
% 31 aug 2011 - Created

% Ninput
error(nargchk(4,5,nargin))

% Nsamples
if nargin == 5
    if isempty(nsamples) || ~isnumeric(nsamples) && ~isscalar(nsamples) &&...
       mod(nsamples,1) > 0 && nsamples <= 0
        error('realized_bpv:nsamples','NSAMPLES should be a positive integer value.')
    end
else
    nsamples = [];
    bpvss    = []; 
end

% Engine starts here
% -------------------------------------------------------------------------

% Filter prices: checks on the first 4 inputs are issued by fltprice
[fltdata,opt] = fltprice(data,type,scheme,interval);

% Last observation per day
datedf = diff(fix(fltdata(:,1)));
last   = find(datedf);

% Which obs belongs to which day
subs = cumsum([1; logical(datedf(3:end))]);

% Absolute returns
fltdata = [fltdata(2:end,1), abs(diff(log(fltdata(:,2))))];
fltdata = [fltdata(2:end,1) fltdata(1:end-1,2) fltdata(2:end,2)];
       
% Eliminate intersections of firstObs nextDay with lastObs prevDay 
fltdata([last;last-1],:) = [];
subs   ([last;last-1],:) = [];

% BPV
bpv = pi/2 * accumarray(subs,prod(fltdata(:,2:3),2));

% BPVSS
if ~isempty(nsamples)
    bpvss = zeros(size(bpv,1),nsamples);
    
    switch opt.type
        case 'BusinessTime'
            % Don't include an increasing part of the beginning of the day
            if interval <= nsamples + 1
                warning('realized_var:businessSubSampling','Ensure INTERVAL > NSAMPLES +1: one (or more) subsampled RV coincides with the original RV.')
            end
            % Offset of the initial gridpoint for the subsamples
            df = fix(interval/(nsamples+1));
            % Last observation of each day
            last = [0; find(diff(fix(data(:,1)))); size(data,1)];
            for c = 1:nsamples
                bpvss(:,c) = realized_bpv(data(mcolon(last(1:end-1)+df*c+1,last(2:end),1),:),...
                                         type,scheme,interval);
            end
            
        case 'CalendarTime'
            % Don't include an increasing part of the beginning of the day
            df   = interval/(nsamples+1)/86400;
            last = [0; find(diff(fix(data(:,1)))); size(data,1)];
            ii   = [data(last(1:end-1)+1,1)...
                    data(last(2:end),1) + 1/(86400*1000)-eps];
            for c = 1:nsamples
                fxdinter  = mcolon(ii(:,1)+df*c, ii(:,2), interval/86400);
                bpvss(:,c) = realized_bpv(data,'FixedTime',scheme,fxdinter);
            end
            
        case 'FixedTime'
            % Shift the grid 
            df = diff(interval([1 2]))/(nsamples+1);
            for c = 1:nsamples
                bpvss(:,c) = realized_bpv(data,type,scheme,interval + df*c);
            end
    end
    % Average BPV with BPVSS
    bpvss = mean([bpv bpvss],2);
    if nargout == 1
        bpv = bpvss;
    end
end

% Dates
if nargout == 3
    dates = fix(fltdata(cumsum(accumarray(subs,1)),1));
end
end

Contact us