function [bpv,bpvss,dates] = realized_bpv(data,type,scheme,interval,nsamples)
% REALIZED_BPV Estimate Bipower Variation (BPV) and subsampled BPV
%
% ... = REALIZED_BPV(DATA,TYPE,SCHEME,INTERVAL, [NSAMPLES])
%
% DATA is an m by 2 single/double matrix:
% - column 1 increasing serial dates
% - column 2 prices
%
% TYPE is a string indicating how data is sampled.
% - 'CalendarTime' sampling on calendar time forms a
% regular intraday grid every INTERVAL
% seconds. The timestamps of the
% observations do not necessary fall on
% the gridpoints and the datapoints
% are selected according to the SCHEME.
% - 'BusinessTime' sampling on business time forms a
% regular intraday grid every INTERVAL
% ticks/trades. Grid-points fall exacly
% on observations rather than on
% timestamps.
% - 'FixedTime' sample at specific points in time.
% When using 'FixedTime', INTERVAL must
% be a vector of values between [0 1].
%
% SCHEME is a string indicating how the datapoints corresponding to
% the gridpoints are selected.
%
% 'CalendarTime' and 'FixedTime' schemes:
% - 'First' first observation after the previous
% gridpoint.
% - 'Last' last observation after the previous
% gridpoint.
% - 'Min' observation with minimum price in the
% interval (previous actual] gridpoint.
% - 'Max' observation with maximum price in the
% interval (previous actual] gridpoint.
% - 'Previous' last available observation up to the
% actual gridpoint (aka last price
% interpolation).
% - 'Next' first available observation after the
% actual gridpoint (aka first price
% interpolation).
% - 'Linear' linear interpolation between datapoints
% selected with 'Previous' and 'Next'.
% - 'Nearest' closest datapoint to the gridpoint.
% - 'Uniform' daily first and last observations are
% included by default and the remaining
% INTERVAL-2 points are selected as
% 'Nearest' from a uniformly spread grid.
% Only for 'CalendarTime'.
%
% 'BusinessTime' schemes:
% - 'Standard' the gridpoints fall exactly on the
% datapoints.
% - 'Uniform' daily first and last observations are
% included by default and the remaining
% INTERVAL-2 points intervealed with equal
% equal amount of trades.
%
% INTERVAL is a scalar integer value between [1 86400] indicating the
% frequency of sampling in seconds. If TYPE 'FixedTime'
% is used, INTERVAL should be a single/double vector sorted
% in ascending order. The vector can consist of values
% between [0 1] representing the intraday points in time of
% a fixed grid that will be replicated for all days.
% Otherways it should contain serial dates in which case the
% grid is limited to the values supplied.
% Note: 1 second = 1/86400. 18:39 = (3600* 18+39 *60)/86400.
%
% NSAMPLES [optional, default = empty] is a positive integer value
% indicating how many subsampled BPVs (BPVSS) to average with the
% original BPV. If empty, BPVSS will be empty if assigned.
% The BPVSS differs from the BPV by the starting point of
% the grid. The starting points are uniformly spaced between
% the first and second gridpoint of the BPV.
% Example: NSAMPLES = 4; INTERVAL = 300;
% If the day starts at 8:00 then the BPVs
% gridpoint is [8:00 8:05 ...]. Then the first
% gridpoint for 1st, 2nd, 3rd and 4th BPVSS will
% be respectively at 8:01, 8:02, 8:03 and 8:04.
% Note: using NSAMPLES with one output returns the
% subsampled version of realized measure in place of
% the original one.
%
%
% [BPV,BPVSS,DATES] = REALIZED_BPV ...
%
% BPV double vector of daily bipower variations.
% BPVSS double vector of daily subsampled bipower variations.
% [Empty] if NSAMPLES was not supplied.
% DATES vector of daily serial dates
%
% Examples:
%
% See also
% Based on K. Sheppard's REALIZED_BIPOWER_VARIATION - MFE toolbox v.3 (lastupdate 15 Mar 2011)
% Author: Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. In-between compatibility is assumed.
% 31 aug 2011 - Created
% Ninput
error(nargchk(4,5,nargin))
% Nsamples
if nargin == 5
if isempty(nsamples) || ~isnumeric(nsamples) && ~isscalar(nsamples) &&...
mod(nsamples,1) > 0 && nsamples <= 0
error('realized_bpv:nsamples','NSAMPLES should be a positive integer value.')
end
else
nsamples = [];
bpvss = [];
end
% Engine starts here
% -------------------------------------------------------------------------
% Filter prices: checks on the first 4 inputs are issued by fltprice
[fltdata,opt] = fltprice(data,type,scheme,interval);
% Last observation per day
datedf = diff(fix(fltdata(:,1)));
last = find(datedf);
% Which obs belongs to which day
subs = cumsum([1; logical(datedf(3:end))]);
% Absolute returns
fltdata = [fltdata(2:end,1), abs(diff(log(fltdata(:,2))))];
fltdata = [fltdata(2:end,1) fltdata(1:end-1,2) fltdata(2:end,2)];
% Eliminate intersections of firstObs nextDay with lastObs prevDay
fltdata([last;last-1],:) = [];
subs ([last;last-1],:) = [];
% BPV
bpv = pi/2 * accumarray(subs,prod(fltdata(:,2:3),2));
% BPVSS
if ~isempty(nsamples)
bpvss = zeros(size(bpv,1),nsamples);
switch opt.type
case 'BusinessTime'
% Don't include an increasing part of the beginning of the day
if interval <= nsamples + 1
warning('realized_var:businessSubSampling','Ensure INTERVAL > NSAMPLES +1: one (or more) subsampled RV coincides with the original RV.')
end
% Offset of the initial gridpoint for the subsamples
df = fix(interval/(nsamples+1));
% Last observation of each day
last = [0; find(diff(fix(data(:,1)))); size(data,1)];
for c = 1:nsamples
bpvss(:,c) = realized_bpv(data(mcolon(last(1:end-1)+df*c+1,last(2:end),1),:),...
type,scheme,interval);
end
case 'CalendarTime'
% Don't include an increasing part of the beginning of the day
df = interval/(nsamples+1)/86400;
last = [0; find(diff(fix(data(:,1)))); size(data,1)];
ii = [data(last(1:end-1)+1,1)...
data(last(2:end),1) + 1/(86400*1000)-eps];
for c = 1:nsamples
fxdinter = mcolon(ii(:,1)+df*c, ii(:,2), interval/86400);
bpvss(:,c) = realized_bpv(data,'FixedTime',scheme,fxdinter);
end
case 'FixedTime'
% Shift the grid
df = diff(interval([1 2]))/(nsamples+1);
for c = 1:nsamples
bpvss(:,c) = realized_bpv(data,type,scheme,interval + df*c);
end
end
% Average BPV with BPVSS
bpvss = mean([bpv bpvss],2);
if nargout == 1
bpv = bpvss;
end
end
% Dates
if nargout == 3
dates = fix(fltdata(cumsum(accumarray(subs,1)),1));
end
end