realized_tbpv(data,type,scheme,interval,nsamples,iscorrected)

function [tbpv,tbpvss,dates] = realized_tbpv(data,type,scheme,interval,nsamples,iscorrected)
% REALIZED_TBPV Estimate Threshold Bipower Variation (TBPV) and subsampled TBPV and their corrected versions
%
% ... = REALIZED_TBPV(DATA,TYPE,SCHEME,INTERVAL, [NSAMPLES])
%
% DATA is an m by 2 single/double matrix:
%  column 1 increasing serial dates
%  column 2 prices
%
% TYPE is a string indicating how data is sampled.
%  'CalendarTime' sampling on calendar time forms a
% regular intraday grid every INTERVAL
% seconds. The timestamps of the
% observations do not necessary fall on
% the gridpoints and the datapoints
% are selected according to the SCHEME.
%  'BusinessTime' sampling on business time forms a
% regular intraday grid every INTERVAL
% ticks/trades. Gridpoints fall exacly
% on observations rather than on
% timestamps.
%  'FixedTime' sample at specific points in time.
% When using 'FixedTime', INTERVAL must
% be a vector of values between [0 1].
%
% SCHEME is a string indicating how the datapoints corresponding to
% the gridpoints are selected.
%
% 'CalendarTime' and 'FixedTime' schemes:
%  'First' first observation after the previous
% gridpoint.
%  'Last' last observation after the previous
% gridpoint.
%  'Min' observation with minimum price in the
% interval (previous actual] gridpoint.
%  'Max' observation with maximum price in the
% interval (previous actual] gridpoint.
%  'Previous' last available observation up to the
% actual gridpoint (aka last price
% interpolation).
%  'Next' first available observation after the
% actual gridpoint (aka first price
% interpolation).
%  'Linear' linear interpolation between datapoints
% selected with 'Previous' and 'Next'.
%  'Nearest' closest datapoint to the gridpoint.
%  'Uniform' daily first and last observations are
% included by default and the remaining
% INTERVAL2 points are selected as
% 'Nearest' from a uniformly spread grid.
% Only for 'CalendarTime'.
%
% 'BusinessTime' schemes:
%  'Standard' the gridpoints fall exactly on the
% datapoints.
%  'Uniform' daily first and last observations are
% included by default and the remaining
% INTERVAL2 points intervealed with equal
% equal amount of trades.
%
% INTERVAL is a scalar integer value between [1 86400] indicating the
% frequency of sampling in seconds. If TYPE 'FixedTime'
% is used, INTERVAL should be a single/double vector sorted
% in ascending order. The vector can consist of values
% between [0 1] representing the intraday points in time of
% a fixed grid that will be replicated for all days.
% Otherways it should contain serial dates in which case the
% grid is limited to the values supplied.
% Note: 1 second = 1/86400. 18:39 = (3600* 18+39 *60)/86400.
%
% NSAMPLES [optional, default = empty] is a positive integer value
% indicating how many subsampled TBPVs (TBPVSS) to average with
% the original TBPV. If empty, TBPVSS will be empty if assigned.
% The TBPVSS differs from the TBPV by the starting point of
% the grid. The starting points are uniformly spaced between
% the first and second gridpoint of the TBPV.
% Example: NSAMPLES = 4; INTERVAL = 300;
% If the day starts at 8:00 then the TBPVs gridpoint
% is [8:00 8:05 ...]. Then the first gridpoint for
% 1st, 2nd, 3rd and 4th TBPVSS will be respectively
% at 8:01, 8:02, 8:03 and 8:04.
% Note: using NSAMPLES with one output returns the
% subsampled version of realized measure in place of
% the original one.
%
% !!WARNING!! doesn't work
% ISCORRECTED [optional, default = false] logical true/false (or 0/1)
% indicating if the corrected version of the TBPV should
% be used. Returns exceeding the threshold are replaced
% with their expected value.
%
%
% [TBPV,TBPVSS,DATES] = REALIZED_TBPV...
%
% TBPV double vector of daily threshold bipower variations.
% The corrected version is computed if ISCORRECTED is true.
% TBPVSS double vector of daily subsampled threshold bipower variations.
% The corrected version is computed if ISCORRECTED is true.
% [Empty] if NSAMPLES was not supplied.
% DATES vector of daily serial dates
%
% Examples:
%
% See also
% Author: Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. Inbetween compatibility is assumed.
% 02 sep 2011  Created
% Ninput
error(nargchk(4,6,nargin))
% Nsamples
if nargin > 4
if isempty(nsamples)
tbpvss = [];
elseif ~isnumeric(nsamples) && ~isscalar(nsamples) &&...
mod(nsamples,1) > 0 && nsamples <= 0
error('realized_tbpv:nsamples','NSAMPLES should be a positive integer value.')
end
else
nsamples = [];
end
% Iscorrected
if nargin == 6
if iscorrected == 0  iscorrected == 1
iscorrected = logical(iscorrected); %#ok:
elseif ~islogical(iscorrected)
error('realized_tbpv:iscorrected','ISCORRECTED should be logical or one of 0, 1.')
end
else
iscorrected = false; %#ok:
end
iscorrected = false; %#ok: NOTE doesn't work the corrected version!
% Engine starts here
% 
% Filter prices: checks on the first 4 inputs are issued by fltprice
[fltdata,opt] = fltprice(data,type,scheme,interval);
% Last observation per day
datedf = diff(fix(fltdata(:,1)));
% Which obs belongs to which day
subs = cumsum([1; logical(datedf(3:end))]);
% Returns
fltdata = [fltdata(2:end,1), diff(log(fltdata(:,2)))];
last = find(datedf);
fltdata(last,:) = [];
subs (last,:) = [];
% Bandwidth for local variance
last = uint32([0; last(1:size(last,1)).'; size(fltdata,1)]);
k2 = fix(diff(last)/4);
% Filter returns with local variance and take ret (absolute)
fltdata(:,2) = abs(fltret(fltdata(:,2),last,k2));
fltdata = [fltdata(2:end,1) fltdata(1:end1,2) fltdata(2:end,2)];
% Eliminate intersections of firstObs nextDay with lastObs prevDay
fltdata(last(2:end1),:) = [];
subs (last(2:end1),:) = [];
% TBPV
tbpv = pi/2 * accumarray(subs,prod(fltdata(:,2:3),2));
% TBPVSS
if ~isempty(nsamples)
tbpvss = zeros(size(tbpv,1),nsamples);
switch opt.type
case 'BusinessTime'
% Don't include an increasing part of the beginning of the day
if interval <= nsamples + 1
warning('realized_var:businessSubSampling','Ensure INTERVAL > NSAMPLES +1: one (or more) subsampled RV coincides with the original RV.')
end
% Offset of the initial gridpoint for the subsamples
df = fix(interval/(nsamples+1));
% Last observation of each day
last = [0; find(diff(fix(data(:,1)))); size(data,1)];
for c = 1:nsamples
tbpvss(:,c) = realized_tbpv(data(mcolon(last(1:end1)+df*c+1,last(2:end),1),:),...
type,scheme,interval);
end
case 'CalendarTime'
% Don't include an increasing part of the beginning of the day
df = interval/(nsamples+1)/86400;
last = [0; find(diff(fix(data(:,1)))); size(data,1)];
ii = [data(last(1:end1)+1,1)...
data(last(2:end),1) + 1/(86400*1000)eps];
for c = 1:nsamples
fxdinter = mcolon(ii(:,1)+df*c, ii(:,2), interval/86400);
tbpvss(:,c) = realized_tbpv(data,'FixedTime',scheme,fxdinter);
end
case 'FixedTime'
% Shift the grid
df = diff(interval([1 2]))/(nsamples+1);
for c = 1:nsamples
tbpvss(:,c) = realized_tbpv(data,type,scheme,interval + df*c);
end
end
% Average TBPV with TBPVSS
tbpvss = mean([tbpv tbpvss],2);
if nargout == 1
tbpv = tbpvss;
end
end
% Dates
if nargout == 3
dates = fix(fltdata(cumsum(accumarray(subs,1)),1));
end
end

