Code covered by the BSD License

# Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

### Oleg Komarov (view profile)

16 Sep 2011 (Updated )

My dissertation for the MSc in Finance & Economics from Warwick Business School

fltmedian(data,volume)
```function [out,totVol,nObs] = fltmedian(data,volume)

% FLTMEDIAN Compute the median price for time stamps with multiple prices
%
%   ... = FLTMEDIAN(DATA,VOLUME)
%
%       DATA    is an m by 2 single/double matrix:
%                   - column 1      increasing serial dates
%                   - column 2      prices
%
%       VOLUME  [optional] m by 1 single/double vector of volume data.
%
%   [OUT, TOTVOL, NOBS] = ...
%
%       OUT     n by 2 aggregated data with unique time stamps and median
%               prices.
%
%       TOTVOL  n by 1 vector with the sum of VOLUME at each time stamp.
%               [Empty] if only DATA was supplied.
%
%       NOBS    n by 1 vector with the number of multiple prices per each
%               timestamp.

% Oleg Komarov (oleg.komarov@hotmail.it)
% Tested on R14SP3 (7.1) and on R2011a. In-between compatibility is assumed.
% 28 aug 2011 - Created.

% Ninput
error(nargchk(1,2,nargin))

% Data
szData = size(data);
if isempty(data) || ~isfloat(data) || szData(2) ~= 2
error('fltmedian:data','DATA should be a single/double m by 2 matrix.')
end
if ~issorted(data(:,1))
error('fltmedian:data1stColumn','DATA''s 1st column (serial dates) should be sorted in ascending order.')
end

% Volume
if nargin == 3 &&...
~(isfloat(volume) || isvector(volume) || length(volume) == szData(1))
error('fltmedian:volume','VOLUME must be a double/single vector the same length as DATA.')
end

% Engine starts here
% -------------------------------------------------------------------------

% Quickly find the time change points
pl = [true logical(diff(data(:,1))).'];

% Preallocate
out = zeros(nnz(pl),2);

% Median time
out(:,1) = data(pl,1);

% Index single prices and assign them to out
posSingles = strfind(pl,[true true]);
idxSingles = ismembc(out(:,1),data(posSingles,1));
out(idxSingles,2) = data(posSingles,2);

% Erase single prices from data and pl
pl  (posSingles)   = [];
data(posSingles,:) = [];

% Median price
pl = cumsum(pl);

% Sort 'once' and call fast median
% We need to sort prices and accordingly the pl, then to ensure that the
% subvector are sorted when calling accumarray the pl should be sorted,
% thus we re-sort pl and price accordingly. This way not only pl are sorted
% but also the subvector corresponding to the indexing of pl.
[data(:,2),idx] = sort(data(:,2));
[pl,idx]  = sort(pl(idx));
data(:,2) = data(idx,2);
out(~idxSingles,2) = accumarray(pl(:),data(:,2),[],@mymedian);

% Median vol
if nargout > 1
if nargin == 3
totVol = accumarray(pl,volume);
else
totVol = [];
end
end

% nObs
if nargout == 3
nObs = accumarray(pl,1);
end
end

% mymedian ----------------------------------------------------------------
function out = mymedian(in)
% Calculation method for even lists: b - (b-a)/2.
n = numel(in);
if mod(n,2) == 1
out = in(ceil(n/2));
else
a   = n/2;
out = in(a) + (in(a+1)-in(a))/2;
end
end```