realized_tripv(data,type,scheme,interval,nsamples,threshold)

function [tripv,tripvss,dates] = realized_tripv(data,type,scheme,interval,nsamples,threshold)
% REALIZED_TRIPV Estimate simple and threshold Tri Power Variation (TriPV) and subsampled TriPV
%
% ... = REALIZED_TRIPV(DATA,TYPE,SCHEME,INTERVAL, [NSAMPLES],[THRESHOLD])
%
% DATA is an m by 2 single/double matrix:
%  column 1 increasing serial dates
%  column 2 prices
%
% TYPE is a string indicating how data is sampled.
%  'CalendarTime' sampling on calendar time forms a
% regular intraday grid every INTERVAL
% seconds. The timestamps of the
% observations do not necessary fall on
% the gridpoints and the datapoints
% are selected according to the SCHEME.
%  'BusinessTime' sampling on business time forms a
% regular intraday grid every INTERVAL
% ticks/trades. Gridpoints fall exacly
% on observations rather than on
% timestamps.
%  'FixedTime' sample at specific points in time.
% When using 'FixedTime', INTERVAL must
% be a vector of values between [0 1].
%
% SCHEME is a string indicating how the datapoints corresponding to
% the gridpoints are selected.
%
% 'CalendarTime' and 'FixedTime' schemes:
%  'First' first observation after the previous
% gridpoint.
%  'Last' last observation after the previous
% gridpoint.
%  'Min' observation with minimum price in the
% interval (previous actual] gridpoint.
%  'Max' observation with maximum price in the
% interval (previous actual] gridpoint.
%  'Previous' last available observation up to the
% actual gridpoint (aka last price
% interpolation).
%  'Next' first available observation after the
% actual gridpoint (aka first price
% interpolation).
%  'Linear' linear interpolation between datapoints
% selected with 'Previous' and 'Next'.
%  'Nearest' closest datapoint to the gridpoint.
%  'Uniform' daily first and last observations are
% included by default and the remaining
% INTERVAL2 points are selected as
% 'Nearest' from a uniformly spread grid.
% Only for 'CalendarTime'.
%
% 'BusinessTime' schemes:
%  'Standard' the gridpoints fall exactly on the
% datapoints.
%  'Uniform' daily first and last observations are
% included by default and the remaining
% INTERVAL2 points intervealed with equal
% equal amount of trades.
%
% INTERVAL is a scalar integer value between [1 86400] indicating the
% frequency of sampling in seconds. If TYPE 'FixedTime'
% is used, INTERVAL should be a single/double vector sorted
% in ascending order. The vector can consist of values
% between [0 1] representing the intraday points in time of
% a fixed grid that will be replicated for all days.
% Otherways it should contain serial dates in which case the
% grid is limited to the values supplied.
% Note: 1 second = 1/86400. 18:39 = (3600* 18+39 *60)/86400.
%
% NSAMPLES [optional, default = empty] is a positive integer value
% indicating how many subsampled TriPVs (TriPVSS) to average with
% the original TriPV. If empty, TriPVSS will be empty if assigned.
% The TriPVSS differs from the TriPV by the starting point of
% the grid. The starting points are uniformly spaced between
% the first and second gridpoint of the TriPV.
% Example: NSAMPLES = 4; INTERVAL = 300;
% If the day starts at 8:00 then the TriPVs gridpoint
% is [8:00 8:05 ...]. Then the first gridpoint for
% 1st, 2nd, 3rd and 4th TriPVSS will be respectively
% at 8:01, 8:02, 8:03 and 8:04.
% Note: using NSAMPLES with one output returns the
% subsampled version of realized measure in place of
% the original one.
%
% THRESHOLD [optional, default = false] logical true/false (or 0/1)
% indicating if the to compute the threshold version of
% the TriPV.
%
%
% [TRIPV,TRIPVSS,DATES] = REALIZED_TRIPV...
%
% TRIPV double vector of daily tripower variations.
% The threshold tripower variation is computed if TRESHOLD is true.
% TRIPVSS double vector of daily subsampled tripower variations.
% The threshold tripower variation is computed if TRESHOLD is true.
% [Empty] if NSAMPLES was not supplied.
% DATES vector of daily serial dates
%
% Examples:
%
% See also
% Author: Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. Inbetween compatibility is assumed.
% 13 sep 2011  Created
% Ninput
error(nargchk(4,6,nargin))
% Nsamples
if nargin > 4
if isempty(nsamples)
tripvss = [];
elseif ~isnumeric(nsamples) && ~isscalar(nsamples) &&...
mod(nsamples,1) > 0 && nsamples <= 0
error('realized_tripv:nsamples','NSAMPLES should be a positive integer value.')
end
else
nsamples = [];
end
% Threshold
if nargin == 6
if threshold == 0  threshold == 1
threshold = logical(threshold);
elseif ~islogical(threshold)
error('realized_tripv:iscorrected','ISCORRECTED should be logical or one of 0, 1.')
end
else
threshold = false;
end
% Engine starts here
% 
% Filter prices: checks on the first 4 inputs are issued by fltprice
[fltdata,opt] = fltprice(data,type,scheme,interval);
% Last observation per day
datedf = diff(fix(fltdata(:,1)));
% Which obs belongs to which day
subs = cumsum([1; logical(datedf(3:end))]);
% Returns
fltdata = [fltdata(2:end,1), diff(log(fltdata(:,2)))];
last = find(datedf);
fltdata(last,:) = [];
subs (last,:) = [];
last = uint32([0; last(1:size(last,1)).'; size(fltdata,1)]);
% If threshold filter returns with local variance. Take ret^(4/3) in any case
if threshold
% Bandwidth for local variance
k2 = fix(diff(last)/4);
fltdata(:,2) = abs(fltret(fltdata(:,2),last,k2)).^(4/3);
else
fltdata(:,2) = abs(fltdata(:,2)).^(4/3);
end
fltdata = [fltdata(3:end,1) fltdata(1:end2,2) fltdata(2:end1,2) fltdata(3:end,2)];
% Eliminate intersections of firstObs nextDay with lastObs prevDay
fltdata([last(2:end1); last(2:end1)1],:) = [];
subs ([last(2:end1); last(2:end )1],:) = [];
% TriPV
cons = (2^(2/3)*gamma(7/6)/gamma(0.5))^(3) * accumarray(subs,1);
tripv = cons .* accumarray(subs,prod(fltdata(:,2:end),2));
% TriPVSS
if ~isempty(nsamples)
tripvss = zeros(size(tripv,1),nsamples);
switch opt.type
case 'BusinessTime'
% Don't include an increasing part of the beginning of the day
if interval <= nsamples + 1
warning('realized_var:businessSubSampling','Ensure INTERVAL > NSAMPLES +1: one (or more) subsampled RV coincides with the original RV.')
end
% Offset of the initial gridpoint for the subsamples
df = fix(interval/(nsamples+1));
% Last observation of each day
last = [0; find(diff(fix(data(:,1)))); size(data,1)];
for c = 1:nsamples
tripvss(:,c) = realized_tripv(data(mcolon(last(1:end1)+df*c+1,last(2:end),1),:),...
type,scheme,interval,[],threshold);
end
case 'CalendarTime'
% Don't include an increasing part of the beginning of the day
df = interval/(nsamples+1)/86400;
last = [0; find(diff(fix(data(:,1)))); size(data,1)];
ii = [data(last(1:end1)+1,1)...
data(last(2:end),1) + 1/(86400*1000)eps];
for c = 1:nsamples
fxdinter = mcolon(ii(:,1)+df*c, ii(:,2), interval/86400);
tripvss(:,c) = realized_tripv(data,'FixedTime',scheme,fxdinter,[],threshold);
end
case 'FixedTime'
% Shift the grid
df = diff(interval([1 2]))/(nsamples+1);
for c = 1:nsamples
tripvss(:,c) = realized_tripv(data,type,scheme,interval + df*c,[],threshold);
end
end
% Average TBPV with TBPVSS
tripvss = mean([tripv tripvss],2);
if nargout == 1
tripv = tripvss;
end
end
% Dates
if nargout == 3
dates = fix(fltdata(cumsum(accumarray(subs,1)),1));
end
end

