function [out,spec] = fltprice(data,type,scheme,interval)
% FLTPRICE Filter a single intraday time series with a sampling scheme
%
% ... = FLTPRICE(DATA,TYPE,SCHEME,INTERVAL)
%
% DATA is an m by 2 single/double matrix:
% - column 1 increasing serial dates
% - column 2 prices
%
% TYPE is a string indicating how data is sampled.
% - 'CalendarTime' sampling on calendar time forms a
% regular intraday grid every INTERVAL
% seconds. The timestamps of the
% observations do not necessary fall on
% the gridpoints and the datapoints
% are selected according to the SCHEME.
% - 'BusinessTime' sampling on business time forms a
% regular intraday grid every INTERVAL
% ticks/trades. Grid-points fall exacly
% on observations rather than on
% timestamps.
% - 'FixedTime' sample at specific points in time.
% When using 'FixedTime', INTERVAL must
% be a vector of values between [0 1].
%
% SCHEME is a string indicating how the datapoints corresponding to
% the gridpoints are selected.
%
% 'CalendarTime' and 'FixedTime' schemes:
% - 'First' first observation after the previous
% gridpoint.
% - 'Last' last observation after the previous
% gridpoint.
% - 'Min' observation with minimum price in the
% interval (previous actual] gridpoint.
% - 'Max' observation with maximum price in the
% interval (previous actual] gridpoint.
% - 'Previous' last available observation up to the
% actual gridpoint (aka last price
% interpolation).
% - 'Next' first available observation after the
% actual gridpoint (aka first price
% interpolation).
% - 'Linear' linear interpolation between datapoints
% selected with 'Previous' and 'Next'.
% - 'Nearest' closest datapoint to the gridpoint.
% - 'Uniform' daily first and last observations are
% included by default and the remaining
% INTERVAL-2 points are selected as
% 'Nearest' from a uniformly spread grid.
% Only for 'CalendarTime'.
%
% 'BusinessTime' schemes:
% - 'Standard' the gridpoints fall exactly on the
% datapoints.
% - 'Uniform' daily first and last observations are
% included by default and the remaining
% INTERVAL-2 points intervealed with equal
% equal amount of trades.
%
% INTERVAL indicates the frequency of sampling depending on the TYPE.
% 'CalendarTime': should be a scalar value between [1 86400].
% Sampling in seconds.
% 'BusinessTime': should be a posititve integer value.
% Sampling in ticks.
% 'FixedTime' : should be a single/double scalar or vector
% sorted in ascending order. If all the
% values fall in the [0 1] range it is an
% intraday grid which will be replicated
% for all days. Otherways, INTERVAL should
% contain serial dates.
% Note: 1 second = 1/86400. 18:39 = (3600* 18+39 *60)/86400.
%
% [OUT,SPEC] = ...
%
% OUT filtered DATA.
% SPEC a structure with TYPE, SCHEME and INTERVAL chosen.
%
% Examples:
%
% See also REALIZED_COMPUTE_MEDIAN, REALIZED_VAR, HISTC, MCOLON
% Based on Sheppard's REALIZED_COMPUTE_MEDIAN - MFE toolbox v.3 (lastupdate 15 Mar 2011)
% Author: Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. In-between compatibility is assumed.
% 25 aug 2011 - Created
% Ninput
error(nargchk(4,4,nargin))
% Data
szData = size(data);
if isempty(data) || ~isfloat(data) || szData(2) ~= 2
error('fltprice:data','DATA should be a single/double m by 2 matrix.')
end
if ~issorted(data(:,1))
error('fltprice:data1stColumn','DATA''s 1st column (serial dates) should be sorted in ascending order.')
end
% Type
if ~ischar(type) || ~isvector(type)
error('fltprice:type','TYPE should be a string.')
else
% Try to match
whichType = {'CalendarTime','BusinessTime','FixedTime'};
idx = strncmpi(type,whichType,numel(type));
if any(idx)
type = whichType{idx};
else
error('fltprice:type','TYPE ''%s'' unrecognized.',type)
end
end
% Scheme
if ~ischar(scheme) || ~isvector(scheme)
error('fltprice:scheme','SCHEME should be a string.')
else
% Try to match
[err,scheme] = getScheme(scheme,type);
if ~isempty(err)
error('fltprice:scheme',err)
end
end
% Interval
switch type
case 'FixedTime'
if ~isnumeric(interval) && ~issorted(interval) && any(interval < 0)
error('fltprice:interval','TYPE ''FixedTime'': INTERVAL should be positive and a scalar or a sorted vector.')
end
case 'CalendarTime'
if ~isnumeric(interval) || isempty(interval) || ~isscalar(interval) || ...
interval < 1 || interval > 86400
error('fltprice:interval','TYPE ''CalendarTime'': INTERVAL should be a value between [1 86400].')
end
case 'BusinessTime'
if ~isnumeric(interval) || isempty(interval) || ~isscalar(interval) || ...
mod(interval,1) > 0 || interval < 1
error('fltprice:interval','TYPE ''BusinessTime'': INTERVAL should be a positive integer value.')
end
end
% -------------------------------------------------------------------------
% ENGINE: create grid according to the TYPE
% -------------------------------------------------------------------------
% Slightly less than millisecond tolerance
tol = 1/(86400*1000)-eps;
% Position of last observations for each day
last = [0; find(diff(fix(data(:,1)))); szData(1)];
% Create grid
switch type
case 'BusinessTime'
idx = false(szData(1),1);
case 'CalendarTime'
if ~strcmp(scheme,'Uniform')
tgrid = mcolon(data(last(1:end-1)+1,1),...
data(last(2:end) ,1), interval/86400);
if ~any(strcmp(scheme,{'Max','Min'}))
n = histc(data(:,1), tgrid + tol);
end
end
case 'FixedTime'
% Vector of intraday times [0 1]: replicate grid for every day
if all(interval <= 1)
tgrid = bsxfun(@plus,interval(:),fix(data(last(2:end),1)).');
tgrid = tgrid(:);
% Grid with serial dates, use as is
else
tgrid = interval(:);
end
% Add beginning of the day or the very first gridpoint is skipped
tgrid = [fix(tgrid(1))-tol; tgrid];
% Trim consecutive days whose datapoints fall before the grid
% Example: each day data are recorded from 8:00-12:00. If we sample
% from 13:00-16:00 then we'll trim everything away.
[n,bin] = histc(data(:,1), tgrid + tol);
if bin(1) == 0
from = find(diff(bin == 0) == -1,1,'first');
bin = bin(from+1:end,:);
else
from = 0;
end
% Trim gridpoints that don't capture any observation
idx = [n(1:end-1)~=0; true];
tgrid = tgrid(idx);
n = n(idx);
% Shrink data to the first observation before the second gridpoint
data = data(from + n(1):end,:);
last(2:end) = last(2:end)-n(1)+1;
szData(1) = last(end);
% Adjust n and tgird because the schemes always include the first point
tgrid = tgrid(2:end);
n = n(2:end);
if isempty(data)
warning('fltprice:data','No data fall between INTERVALs.')
out = [];
return
end
end
% -------------------------------------------------------------------------
% ENGINE: select observations from the grid according to the SCHEME
% -------------------------------------------------------------------------
switch scheme
case 'First'
% If no values are found within a grid interval we don't want to
% interpolate the first value. This is accomplished with consequent
% indexing of the same position instead of using position themselves.
idx = false(szData(1),1);
n = cumsum([2; n(1:end-1)]);
% Take care values that fall to the next day
n(ismember(n,last+1) | n > szData(1)) = [];
idx(n) = true;
case 'Last'
% Same concept for missing values as in First
idx = false(szData(1),1);
idx(cumsum([1; n(1:end-1)])) = true;
case 'Linear'
% Calculate Previous and Next and interpolate time and price
prev = cumsum([1; n(1:end-1)]);
next = prev + 1;
% Remove first and last values
prev(ismember(prev,last+1)) = [];
[idx,loc] = ismember(prev,last);
prev = prev(~idx);
next(ismember(next,[last+1;last+2]) | next > szData(1)) = [];
% Output
out = (data(prev,:) + data(next,:))/2;
% Add first/last back and sort back (not efficient but will do for
% now)
out = sort([out;
data(last(1:end-1)+1,:);
data(last(setdiff(loc,0)),:)]);
case 'Max'
if ~strcmp(type,'FixedTime')
[n,bin] = histc(data(:,1), tgrid + tol);
end
% Only prices because there can be multiple max prices per grid
% interval
out = accumarray([1;bin(2:end-1)],data(1:sum(n)+1,2),[],@max);
case 'Min'
if ~strcmp(type,'FixedTime')
[n,bin] = histc(data(:,1), tgrid + tol);
end
% Only prices because there can be multiple min prices per grid
% interval
out = accumarray([1;bin(2:end-1)],data(1:sum(n)+1,2),[],@min);
case 'Nearest'
% Previous and next
n = cumsum([1; n(1:end-1)]);
n = [n, n+1];
% If next overshots the day set to last observation
if n(end,2) > szData(1)
n(end,2) = szData(1);
end
% Find minimum distance from gridpoint (nearest)
[~,pos] = min(diff([data(n(:,1),1) tgrid(:)...
data(n(:,2),1)],[],2),[],2);
numn = size(n,1);
% Select previous or next whichever closer
idx = n((1:numn).' + (pos-1)*numn);
case 'Next'
% First price interpolation
% Use positions directly to obtain carry-on interpolation
idx = cumsum([2; n(1:end-1)]);
idx(ismembc(idx,last+1) | idx > szData(1)) = [];
case 'Previous'
% Last price interpolation
% Use positions directly to obtain carry-on interpolation
idx = cumsum([1; n(1:end-1)]);
idx = idx(~ismembc(idx,last));
case 'Standard'
idx = mcolon(last(1:end-1)+1, last(2:end), interval);
case 'Uniform'
if interval == 2
idx = [last(1:end-1)+1, last(2:end)].';
idx = idx(:);
elseif interval > 2
% In BusinessTime
if strcmp(type,'BusinessTime')
idx = fix(mcolon(last(1:end-1)+1,last(2:end),diff(last)/(interval-1)));
% In CalendarTime with Nearest interpolation
else
% Uniformly spaced grid in time
tgrid = mcolon(data(last(1:end-1)+1,1),data(last(2:end),1),...
diff([data(last(1:end-1)+1,1) data(last(2:end),1)],[],2)/(interval-1));
n = histc(data(:,1), tgrid + tol);
% Previuos and next
pn = cumsum([1; n(1:end-1)]);
pn = [pn, pn+1];
% If next overshots the day set to last observation
if pn(end,2) > szData(1)
pn(end,2) = szData(1);
end
% Find minimum distance from gridpoint (nearest)
[~,pos] = min(diff([data(pn(:,1),1) tgrid(:)...
data(pn(:,2),1)],[],2),[],2);
numn = numel(n);
% Select previous or next whichever closer
idx = pn((1:numn).' + (pos-1)*numn);
end
else
error('fltprice:uniformInterval','When SCHEME is ''Uniform'', the INTERVAL should be > 1.')
end
end
% Return actual prices with dates
if ~strcmp(scheme,'Linear')
out = data(idx,:);
end
% Optionally return specifications
if nargout == 2
spec = struct('type',type,'scheme',scheme,'interval',interval);
end
end
% getScheme ---------------------------------------------------------------
function [err,scheme] = getScheme(scheme,type)
% Initialize error
err = '';
% Available schemes
whichScheme = {'First','Last','Linear','Max','Min','Nearest','Next','Previous','Standard','Uniform'};
% Try to match
idx = strncmpi(scheme,whichScheme,numel(scheme));
% # of matches
nidx = nnz(idx);
% Ambiguous scheme
if nidx == 2
err = sprintf('SCHEME ''%s'' is ambiguous. Did you mean ''%s'' or ''%s''?',scheme,whichScheme{idx});
% No match
elseif nidx == 0
err = sprintf('SCHEME ''%s'' unrecognized.',scheme);
% Regular match
else
scheme = whichScheme{idx};
end
% Check limited scheme-type
switch scheme
case {'First','Last','Linear','Max','Min','Nearest','Next','Previous'}
if strcmp(type,'BusinessTime')
err = sprintf('SCHEME ''%s'' not allowed with TYPE ''BusinessTime''.',scheme);
end
case 'Uniform'
if strcmp(type, 'FixedTime')
err = sprintf('SCHEME ''%s'' not allowed with TYPE ''FixedTime''.',scheme);
end
case 'Standard'
if ~strcmp(type,'BusinessTime')
err = 'SCHEME ''Standard'' is limited to TYPE ''BusinessTime''.';
end
end
end