Code covered by the BSD License  

Highlights from
Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

Forecasting the FTSE 100 with high-frequency data: A comparison of realized measures

by

 

16 Sep 2011 (Updated )

My dissertation for the MSc in Finance & Economics from Warwick Business School

20110916_Dissertation.m
%% NOTE: block processing
% Since the data uses ~470 MB of RAM and it runs almost immediately OUT of
% MEMORY, the scripts processes the input in blocks. 
% I created the function bp (block-processing) to execute this operation.
% I lose 3x in performance.
%% Import FTSE100 (82 sec)
R.d = 'C:\Users\Oleg\Desktop\Dissertation\Data\';
fid = fopen([R.d 'fe10ok@mail.wbs.ac.uk-FTSE100-N22887676.csv']);
% Import 1e6 rows at a time
c = 0;
while ~feof(fid)
    c = c+1;
    tmp = textscan(fid,'%*s%f%f%f%f%*s%f',1e6,'Delimiter',',:','Headerlines',1);
    y = fix(tmp{1}/1e4);
    m = fix(mod(tmp{1},1e4)/1e2);
    d = mod(tmp{1},1e2);
    data(c,1) = {[datenummx(y,m,d,tmp{2},tmp{3},tmp{4}), tmp{end}]}; %#ok
end
fclose(fid);
data = cat(1,data{:});

% Save raw data
save([R.d 'raw.mat'],'data')
% load('C:\Users\Oleg\Desktop\Dissertation\Data\raw.mat')

% Part in blocks due to RAM constaints
time    = datenum([1996; 2003; 2008; ones(10,1)*2010],[1; 1; 6; 2; (5:2:19).';22],1);
mcounts = histc(data(:,1), time);
data    = mat2cell(data,mcounts(1:end-1));
fields  = cellstr(datestr(time(2:end),'tyyyymm'));
data    = cell2struct(data,fields,1);
data.fields = fields;

% Save
save('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','-struct','data','-v6')

clear all
%% [Rule 1] only 8:00-16:30
% Define color palette
R.clrs = {'green',[.18 .74 .40],[.18 .74 .55]
          'red'  ,[.83 .08 .02],[]
          'blue' ,[.2 .3 .5],[]}; 

% Count before change
R.totcounts(1) = sum(cell2mat(bp(@(in)numel(in(:,1)))));

% Max and min dates
f = @(in) [fix(min(in(:,1))) fix(max(in(:,1)))];
R.minmax = cell2mat(bp(f)); R.minmax = [min(R.minmax(:,1)) max(R.minmax(:,2))];

% X and Y axes labels
ylbl  = {'00:00','08:00','16:30','23:59'};
ynlim = mod(datenum(ylbl,'HH:MM'),1);
xnlim = linspace(R.minmax(1),R.minmax(2),9);
xlbl  = datestr(xnlim(2:end),'mmm yy');

% Detect outliers
f = @(in) in(mod(in(:,1),1) < ynlim(2) | mod(in(:,1),1) > ynlim(3),1);
x = cell2mat(bp(f));
    
% Axes settings
h.f = figure('color','w','un','pix','pos',[360 150 450 230],...
             'PaperPositionM','auto','Invert','off','rend','Painters');
h.a = axes('un','pix','pos',[50,20,350,190],'fonts',8,...
           'Ylim',[0,1],'Ytick',ynlim,'YtickL',ylbl,...
           'Xlim',xnlim([1,end]),'Xtick',xnlim(2:end),...
           'XtickL',xlbl,'layer','top','box','on');
set(h.a,'units','norm') % for automatic resize        

% Draw boundary lines, valid data points and P1 outliers
h.l(1) = line(x,mod(x,1),'LineS','none','Col',R.clrs{1,2},...
              'Marker','x','MarkerS',5,'LineW',.75);
h.l(2) = line(xnlim([1,end]),ynlim([2,2]),'LineS','-.','Col',R.clrs{2,2},'LineW',.75);
h.l(3) = line(xnlim([1,end]),ynlim([3,3]),'LineS','-.','Col',R.clrs{2,2},'LineW',.75);

% Print
print(h.f, '-depsc2', '-r150','-painters', '-loose', 'f1.eps')
fix_lines('f1.eps','f1.eps')

% Delete them
f = @(in) in(~ismembc(in(:,1),x),:);
load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','fields');
data = cell2struct(bp(f),fields,1);
data.fields = fields;
save('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','-struct','data','-v6')

% Count after change
R.totcounts(2) = sum(cell2mat(bp(@(in)numel(in(:,1)))));

% Save
clearvars -except R
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 
%% Fig2: daily counts

% Daily counts of obs
f = @(in) [fix(min(in(:,1)):max(in(:,1))).',...
           accumarray(fix(in(:,1)) - fix(min(in(:,1))) + 1,1)];
R.dc = cell2mat(bp(f)); R.dc(R.dc(:,2) == 0,2) = NaN; 

% Subs row: incremental index to which day it belongs
f    = @(in) fix(in(:,1)) - fix(min(in(:,1))) + 1;
subs = bp(f);

% Subs column: to which half hour it belongs
hh   = 1/48*16:1/48:1/48*33;
f    = @(in) histc(mod(in(:,1),1),hh);
subs(:,[3,2]) = bp(f,2); 

% Daily counts with half-hour breakdown
R.dch = cellfun(@(x,y) accumarray([x y],1),subs(:,1),subs(:,2),'un',0);
R.dch = [R.dc(:,1) cat(1,R.dch{:})]; 

% Create figure and first axis
szDc  = size(R.dc);
xnlim = R.dc(round(linspace(1,szDc(1),9)),1);
xlbl  = datestr(xnlim(2:end),'mmm yy');
h.f = figure('color','w','un','pix','pos',[360 150 450 340],...
             'PaperPositionM','auto','Invert','off','rend','Painters');
h.a(1) = axes('un','pix','pos',[50,190,350,130],'fonts',8,...
              'Xlim',xnlim([1,end]),'Xtick',xnlim(2:end),...
               'XtickL',xlbl,'Yscale','log','Ylim',[10,1.2e5],...
               'Ytick',10.^(1:5),'box','on');
% Line
h.l(1) = line(R.dc(:,1),R.dc(:,2),'Color',R.clrs{3,2},'LineW',.75); 
% Circles
xshift = [781 784 5076 5079];
h.l(2) = line(R.dc(xshift,1),R.dc(xshift,2),'Color',R.clrs{2,2},'LineS','none',...
              'Marker','o','LineW',.75);
% Annotations
trades = sep1000(R.dc(xshift,2),'%.0f',' ');
dates  = datestr(R.dc(xshift,1),'dd mmm yy');
pos    = [729600 60; 729200 8e3; 733300,300; 732600,4e4];
for n = 1:4
h.t(n) = text(pos(n,1),pos(n,2),{sprintf('# trades: %s',trades{n}),....
                                 sprintf('date: %s',dates(n,:))},'FontS',8);
end

% Create second axes
xlbl = ['08:30';'10:22';'12:15';'14:22';'16:30'];          
h.a(2) = axes('un','pix','pos',[50,20,350,130],'fonts',8,...
              'Xlim',[0,18],'Xtick',1:4:17,'XtickLabel',xlbl,'YGrid','on',...
           'GridL','-','Ylim',[0 3],'Ytick',1:2,'YtickLabel',[],'box','on');          
set(h.a,'units','norm','layer','top') % for automatic resize 

% Annotations
sel   = [729638;731321;734263];
dates = datestr(sel,'mmm dd, yyyy');
        
% Plot  
tmp = R.dch(ismembc(R.dch(:,1),sel),2:end);
tmp = bsxfun(@plus,bsxfun(@rdivide,tmp,max(tmp,[],2)),[1.4;.4;-.3]); tmp(1) = NaN;
for n = 1:3
    h.l(n) = line(1:17,tmp(n,:),'Color',R.clrs{3,2},'LineW',.75);
    h.t(n) = text(9,3.7-n,dates(n,:),'FontS',8,'Hor','center');
end

% Print
print(h.f, '-depsc2', '-r150','-painters', '-loose','f2.eps')
fix_lines('f2.eps','f2.eps')

% Save
clearvars -except R
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 
%% [Rule 2] delete after 9:30 AM - 10 days
% Check opening time for each day: ...         - 17 Jul 1998 --> 8:30
%                                  20 Jul 1998 - 17 Sep 1999 --> 9:00
%                                  22 Sep 1999 - ...         --> 8:00
f = @(in) [(min(fix(in(:,1))):max(fix(in(:,1)))).'...
           accumarray(fix(in(:,1))- min(fix(in(:,1))) + 1, mod(in(:,1),1),[],@min)];
times = bp(f); times = cat(1,times{:});
idx   = times(:,2)>0;
plot(times(idx,1),times(idx,2)) 
datetickdyn

% Apply rule2
R.rule2 = times(times(:,2) > mod(datenum('09:30', 'HH:MM'),1),1);
f = @(in) in(~ismembc(fix(in(:,1)),R.rule2),:);
load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','fields');
data = cell2struct(bp(f),fields,1);
data.fields = fields;
save('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','-struct','data','-v6')

% Count after change
R.totcounts(3) = sum(cell2mat(bp(@(in)numel(in(:,1)))));

% Save Res
clearvars -except R
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 

%% [Rule 3] Delete days < 4h   
load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','fields');
 
% Check intraday range distribution
f     = @(in) histc(  in([find(diff(fix(in(:,1)))); size(in,1)],1)...
                    - in([1; find(diff(fix(in(:,1))))+1],1),...
                    linspace(0,36*9/864,10));
rn    = bp(f,2);
% Dates matching with range distribution
f     = @(in) unique(fix(in(:,1)));
dates = bp(f);
% Delete those outside with less than 4hrs of intraday range
R.rule3 = sum(cellfun(@(x) nnz(x < 5),rn(:,2)));
f    = @(in,x,y) in(ismembc(fix(in(:,1)), y(~ismember(x,1:4))),:);
data = cell2struct(bp(f,1,rn(:,2),dates),fields,1);
data.fields = fields;
save('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','-struct','data','-v6')

% Count after change
R.totcounts(4) = sum(cell2mat(bp(@(in)numel(in(:,1)))));

% Save Res
clearvars -except R
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 
%% [Rule 4] same timestamp

% Count timestamps with multiple prices
f = @(x) sum(diff(diff(x(:,1))==0)==1);
R.multiprice = bp(f); sum([R.multiprice{:}])

% Collapse into median
data = bp(@(in) fltmedian(in),1); 
load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','fields');
data = cell2struct(data,fields,1);
data.fields = fields;
save('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','-struct','data','-v6')

% Daily counts of obs after median
f = @(in) accumarray(fix(in(:,1)) - fix(min(in(:,1))) + 1,1);
R.dc(:,3) = cell2mat(bp(f)); R.dc(R.dc(:,3) == 0,3) = NaN; 

% Count after change
R.totcounts(5) = sum(cell2mat(bp(@(in)numel(in(:,1)))));

% Save
clearvars -except R
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 
%% [Rule 5] filter outliers
load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','fields');
load('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat')
nF  = numel(fields);

% Select first period (47 outliers - 525 days)
data = load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','t200301');
idx  = data.t200301(:,1) < 729809;
R.ndays(1) = numel(unique(fix(data.t200301(idx,1))));
offs = sum(idx);
k    = 10; 
mult = 10;
R.otl{1} = fltout(data.t200301(idx,1),data.t200301(idx,2),k,mult);

% Select second period ( 581+1076+310 = 1967 outl; 
%                       1204+1360+381 = 2945 days)
idx = ~idx;
R.ndays(2) = numel(unique(fix(data.t200301(idx,1))));
k = 20;
R.otl{1} = cat(1,R.otl{1}, fltout(data.t200301(idx,1),data.t200301(idx,2),k,mult)+offs);

data       = load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','t200806');
R.ndays(2) = numel(unique(fix(data.t200806(:,1)))) + R.ndays(2);
R.otl{2}   = fltout(data.t200806(:,1),data.t200806(:,2),k,mult);

data = load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','t201002');
idx  = data.t201002(:,1) < 734105;
R.ndays(2) = numel(unique(fix(data.t201002(idx,1)))) + R.ndays(2);
offs = sum(idx); 
% Leave in cell 3!
R.otl{3} = fltout(data.t201002(idx,1),data.t201002(idx,2),k,mult);

% Third period (500+652+635+489+421+453+556+450+341+58 = 4555 outl;
%                42+ 63+ 41+ 43+ 43+ 43+ 40+ 41+ 42+ 8 =  406)
mult = 5;
k    = 50;
idx  = ~idx;
R.ndays(3) = numel(unique(fix(data.t201002(idx,1))));
R.otl{3} = cat(1,R.otl{3},...
               fltout(data.t201002(idx,1),data.t201002(idx,2),k,mult)+offs);

for n = 4:numel(fields)
    data = load('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat',fields{n});
    R.ndays(3) = numel(unique(fix(data.(fields{n})(:,1)))) + R.ndays(3);
    R.otl{n} = fltout(data.(fields{n})(:,1),data.(fields{n})(:,2),k,mult);
    disp(numel(R.otl{n}))
end

% Delete them
R.d = 'C:\Users\Oleg\Desktop\Dissertation\Data\';
f    = @(in,x) in(~ logical(accumarray(x,1,[size(in,1), 1])),:);
data = cell2struct(bp(f,1,R.otl.'),fields,1);
data.fields = fields;
save('C:\Users\Oleg\Desktop\Dissertation\Data\dataset.mat','-struct','data','-v6')

% Daily counts of obs after filter
f = @(in) accumarray(fix(in(:,1)) - fix(min(in(:,1))) + 1,1);
R.dc(:,4) = cell2mat(bp(f)); R.dc(R.dc(:,4) == 0,4) = NaN; 

% Percentages of observations lost from median aggregation and outliers
R.dcper = abs(diff(R.dc(:,2:end),[],2)./R.dc(:,2:3)*100);

% Count after change
R.totcounts(6) = sum(cell2mat(bp(@(in)numel(in(:,1)))));
R.otl(1:12) = cellfun(@length,R.otl,'un',0);
R.otl = cat(1,R.otl{:});

% Save
clearvars -except R
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 
%% Plot index and RV5m
% Import the whole dataset
data = bp(@(in) in);
data = cat(1,data{:});

% Realized volatility 5minutes
fxdinter = (3600*8:300:16.5*3600)/86400;
[RV5m,~,dates] = realized_var(data,'Fixed','Near',fxdinter); 

% Index series
last = [find(diff(fix(data(:,1)))); size(data,1)];
FTSE = data(last,:);

% Plot
load('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat')
xnlim = linspace(R.minmax(1),R.minmax(2),9);
xlbl  = datestr(xnlim(2:end),'mmm yy');

% Create figure and axes
h.f = figure('color','w','un','pix','pos',[360 150 450 340],...
             'PaperPositionM','auto','Invert','off','rend','Painters');
h.a(1) = axes('un','pix','pos',[50,190,350,130],'fonts',8,...
              'Xlim',xnlim([1,end]),'Xtick',xnlim(2:end),...
              'XtickL',xlbl,'Ylim',[3e3,7e3],'box','on');
h.a(2) = axes('un','pix','pos',[50,20,350,130],'fonts',8,...
              'Xlim',xnlim([1,end]),'Xtick',xnlim(2:end),...
              'XtickL',xlbl,'Ylim',[0,160],'Ytick',0:40:160,'box','on');
set(h.a,'units','norm','layer','top') % for automatic resize 
          
% Line
h.l(1) = line(FTSE(:,1),FTSE(:,2),'Color',R.clrs{3,2},'LineW',.75,'Parent',h.a(1));
h.l(2) = line(dates,sqrt(RV5m*252)*100,'Color',R.clrs{2,2},'LineW',.75,'Parent',h.a(2));
% h.l(2) = line(dates,sqrt(RV5mSS*252)*100,'Color',R.clrs{3,2},'LineW',.75,'Parent',h.a(2));

% Titles
text(.5,1.075,'FTSE 100 price index',...
     'Parent',h.a(1),'Units','norm','Horiz','center','Fonts',8)
text(.5,1.075,'Annualized realized volatility ',...
     'Parent',h.a(2),'Units','norm','Horiz','center','Fonts',8)

% Print 
print(h.f, '-depsc2', '-r150','-painters', '-loose','f3.eps')
fix_lines('f3.eps','f3.eps')   
%% Battery of RM (12 min)
load('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat')

tic
% Intervals: cal 1s, 5s, 1m, 5m, ~1h; bus 1m, 5m.
hr = (3600*8.5-300)/9;
R.fxdint = {(3600* 8:   1 :16.5 *3600)/86400
            (3600* 8:   5 :16.5 *3600)/86400
            (3600* 8:  60 :16.5 *3600)/86400
            (3600* 8: 300 :16.5 *3600)/86400
            (3600* 8:  hr :16.5 *3600)/86400
                      511
                      103};
% -------------------------------------------------------------------------      
% ALL PERIODS Calendar
% -------------------------------------------------------------------------
data = bp(@(in) in);
data = cat(1,data{:});
f = @(RM,FQ,TP,PER,SER) struct('measure',RM,'type',TP,'freq',FQ,'period',PER,'series',SER*1e4);
% - RV 1m, 5m, ~1h, 1d
% --------------------
R.RM( 1,1) = f('RV'  ,'1 min','cal','all',realized_var    (data,'Fixed','Near',R.fxdint{3}));
R.RM( 2,1) = f('RV'  ,'5 min','cal','all',realized_var    (data,'Fixed','Near',R.fxdint{4}));
R.RM( 3,1) = f('RV'  ,'1 hrs','cal','all',realized_var    (data,'Fixed','Near',R.fxdint{5}));
R.RM( 4,1) = f('RV'  ,'1 day','cal','all',realized_var    (data,'Calen','Unif',2          ));
% - RSV 1m, 5m
% ------------
R.RM( 5,1) = f('RSV' ,'1 min','cal','all',realized_semivar(data,'Fixed','Near',R.fxdint{3}));
R.RM( 6,1) = f('RSV' ,'5 min','cal','all',realized_semivar(data,'Fixed','Near',R.fxdint{4}));
% - BPV 1m, 5m
% ------------
R.RM( 7,1) = f('BPV' ,'1 min','cal','all',realized_bpv    (data,'Fixed','Near',R.fxdint{3}));
R.RM( 8,1) = f('BPV' ,'5 min','cal','all',realized_bpv    (data,'Fixed','Near',R.fxdint{4}));
% - TBPV 1m, 5m
% -------------
R.RM( 9,1) = f('TBPV','1 min','cal','all',realized_tbpv   (data,'Fixed','Near',R.fxdint{3}));
R.RM(10,1) = f('TBPV','5 min','cal','all',realized_tbpv   (data,'Fixed','Near',R.fxdint{4}));

% -------------------------------------------------------------------------
% 3RD PERIOD
% -------------------------------------------------------------------------
data = bp(@(in) in(in(:,1) >= 734107,:));
data = cat(1,data{:});
% - RV 1s, 5s, 1m, 5m, ~1h, 1d 
% ----------------------------
R.RM(11,1) = f('RV'    ,'1 sec','cal','3rd',realized_var    (data,'Fixed','Near',R.fxdint{1}   ));
R.RM(12,1) = f('RV'    ,'5 sec','cal','3rd',realized_var    (data,'Fixed','Near',R.fxdint{2}   ));
R.RM(13,1) = f('RV'    ,'1 min','cal','3rd',realized_var    (data,'Fixed','Near',R.fxdint{3}   ));
R.RM(14,1) = f('RVss'  ,'1 min','cal','3rd',realized_var    (data,'Fixed','Near',R.fxdint{3},10));
R.RM(15,1) = f('RV'    ,'1 min','bus','3rd',realized_var    (data,'Busin','Unif',R.fxdint{6}   ));
R.RM(16,1) = f('RVss'  ,'1 min','bus','3rd',realized_var    (data,'Busin','Unif',R.fxdint{6},10));
R.RM(17,1) = f('RV'    ,'5 min','cal','3rd',realized_var    (data,'Fixed','Near',R.fxdint{4}   ));
R.RM(18,1) = f('RVss'  ,'5 min','cal','3rd',realized_var    (data,'Fixed','Near',R.fxdint{4},10));
R.RM(19,1) = f('RV'    ,'5 min','bus','3rd',realized_var    (data,'Busin','Unif',R.fxdint{7}   ));
R.RM(20,1) = f('RVss'  ,'5 min','bus','3rd',realized_var    (data,'Busin','Unif',R.fxdint{7},10));
R.RM(21,1) = f('RV'    ,'1 hrs','cal','3rd',realized_var    (data,'Fixed','Near',R.fxdint{5}   ));
R.RM(22,1) = f('RV'    ,'1 day','cal','3rd',realized_var    (data,'Calen','Unif',2             ));
% - RSV 1m, 5m 
% ------------
R.RM(23,1) = f('RSV'   ,'1 min','cal','3rd',realized_semivar(data,'Fixed','Near',R.fxdint{3}   ));
R.RM(24,1) = f('RSVss' ,'1 min','cal','3rd',realized_semivar(data,'Fixed','Near',R.fxdint{3},10));
R.RM(25,1) = f('RSV'   ,'1 min','bus','3rd',realized_semivar(data,'Busin','Unif',R.fxdint{6}   ));
R.RM(26,1) = f('RSVss' ,'1 min','bus','3rd',realized_semivar(data,'Busin','Unif',R.fxdint{6},10));
R.RM(27,1) = f('RSV'   ,'5 min','cal','3rd',realized_semivar(data,'Fixed','Near',R.fxdint{4}   ));
R.RM(28,1) = f('RSVss' ,'5 min','cal','3rd',realized_semivar(data,'Fixed','Near',R.fxdint{4},10));
R.RM(29,1) = f('RSV'   ,'5 min','bus','3rd',realized_semivar(data,'Busin','Unif',R.fxdint{7}   ));
R.RM(30,1) = f('RSVss' ,'5 min','bus','3rd',realized_semivar(data,'Busin','Unif',R.fxdint{7},10));
% - BPV 1m, 5m 
% ------------
R.RM(31,1) = f('BPV'   ,'1 min','cal','3rd',realized_bpv    (data,'Fixed','Near',R.fxdint{3}   ));
R.RM(32,1) = f('BPVss' ,'1 min','cal','3rd',realized_bpv    (data,'Fixed','Near',R.fxdint{3},10));
R.RM(33,1) = f('BPV'   ,'1 min','bus','3rd',realized_bpv    (data,'Busin','Unif',R.fxdint{6}   ));
R.RM(34,1) = f('BPVss' ,'1 min','bus','3rd',realized_bpv    (data,'Busin','Unif',R.fxdint{6},10));
R.RM(35,1) = f('BPV'   ,'5 min','cal','3rd',realized_bpv    (data,'Fixed','Near',R.fxdint{4}   ));
R.RM(36,1) = f('BPVss' ,'5 min','cal','3rd',realized_bpv    (data,'Fixed','Near',R.fxdint{4},10));
R.RM(37,1) = f('BPV'   ,'5 min','bus','3rd',realized_bpv    (data,'Busin','Unif',R.fxdint{7}   ));
R.RM(38,1) = f('BPVss' ,'5 min','bus','3rd',realized_bpv    (data,'Busin','Unif',R.fxdint{7},10));
% - TBPV 1m, 5m 
% -------------
R.RM(39,1) = f('TBPV'  ,'1 min','cal','3rd',realized_tbpv   (data,'Fixed','Near',R.fxdint{3}   ));
R.RM(40,1) = f('TBPVss','1 min','cal','3rd',realized_tbpv   (data,'Fixed','Near',R.fxdint{3},10));
R.RM(41,1) = f('TBPV'  ,'1 min','bus','3rd',realized_tbpv   (data,'Busin','Unif',R.fxdint{6}   ));
R.RM(42,1) = f('TBPVss','1 min','bus','3rd',realized_tbpv   (data,'Busin','Unif',R.fxdint{6},10));
R.RM(43,1) = f('TBPV'  ,'5 min','cal','3rd',realized_tbpv   (data,'Fixed','Near',R.fxdint{4}   ));
R.RM(44,1) = f('TBPVss','5 min','cal','3rd',realized_tbpv   (data,'Fixed','Near',R.fxdint{4},10));
R.RM(45,1) = f('TBPV'  ,'5 min','bus','3rd',realized_tbpv   (data,'Busin','Unif',R.fxdint{7}   ));
R.RM(46,1) = f('TBPVss','5 min','bus','3rd',realized_tbpv   (data,'Busin','Unif',R.fxdint{7},10));

% Save results
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 
toc

% Labels all periods
lblAll = cell(1);
for l = 1:10
    sup = ['^{(' regexprep(R.RM(l).freq,' ','\\,') ')}']; m = R.RM(l).measure;
    if strncmp('RSV',R.RM(l).measure,3)
        sup = {['^{-~(' regexprep(R.RM(l).freq,' ','\\,') ')}']
               ['^{+~(' regexprep(R.RM(l).freq,' ','\\,') ')}']}; 
        m = {R.RM([l;l]).measure}.';
    end
    lblAll = [lblAll; strcat('$\rm{',m,sup,'}$')]; %#ok
end
lblAll(1) = [];

% Labels 3rd period
lblPer = cell(1);
for l = 11:numel(R.RM)
    sub = ['_{' R.RM(l).type '}'];
    sup = ['^{(' regexprep(R.RM(l).freq,' ','\\,') ')}']; m = R.RM(l).measure;
    if strncmp('RSV',R.RM(l).measure,3)
        sup = {['^{-~(' regexprep(R.RM(l).freq,' ','\\,') ')}']
               ['^{+ (' regexprep(R.RM(l).freq,' ','\\,') ')}']}; 
        m = {R.RM([l;l]).measure}.';
    end
    lblPer = [lblPer; strcat('$\rm{',m,sup,sub,'}$')]; %#ok
end
lblPer(1) = [];

% Stats for annualized realized volatilities
stats = [AllStats([R.RM( 1:10).series]) 
         AllStats([R.RM(11:46).series])];
stats = stats(:,[7 1:6]);

R.lblAll = lblAll;
R.lblPer = lblPer;
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 
%% TriPV and TTriPV (8.5 min)
load('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat')
tic
% Intervals: cal 1s, 5s, 1m, 5m, ~1h; bus 1m, 5m.
hr = (3600*8.5-300)/9;
R.fxdint = {(3600* 8:   1 :16.5 *3600)/86400
            (3600* 8:   5 :16.5 *3600)/86400
            (3600* 8:  60 :16.5 *3600)/86400
            (3600* 8: 300 :16.5 *3600)/86400
            (3600* 8:  hr :16.5 *3600)/86400
                      511
                      103};
% -------------------------------------------------------------------------      
% ALL PERIODS Calendar
% -------------------------------------------------------------------------
data = bp(@(in) in);
data = cat(1,data{:});
% - BPV 1m, 5m
% ------------
R.RM( 7,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{3})*1e4;
R.RM( 8,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{4})*1e4;
% - TBPV 1m, 5m
% -------------
R.RM( 9,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{3},[],true)*1e4;
R.RM(10,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{4},[],true)*1e4;

% -------------------------------------------------------------------------
% 3RD PERIOD
% -------------------------------------------------------------------------
data = bp(@(in) in(in(:,1) >= 734107,:));
data = cat(1,data{:});
% - BPV 1m, 5m 
% ------------
R.RM(31,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{3})*1e4;
R.RM(32,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{3},10)*1e4;
R.RM(33,1).tripv = realized_tripv(data,'Busin','Unif',R.fxdint{6})*1e4;
R.RM(34,1).tripv = realized_tripv(data,'Busin','Unif',R.fxdint{6},10)*1e4;
R.RM(35,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{4})*1e4;
R.RM(36,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{4},10)*1e4;
R.RM(37,1).tripv = realized_tripv(data,'Busin','Unif',R.fxdint{7})*1e4;
R.RM(38,1).tripv = realized_tripv(data,'Busin','Unif',R.fxdint{7},10)*1e4;
% - TBPV 1m, 5m 
% -------------
R.RM(39,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{3},[],true)*1e4;
R.RM(40,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{3},10,true)*1e4;
R.RM(41,1).tripv = realized_tripv(data,'Busin','Unif',R.fxdint{6},[],true)*1e4;
R.RM(42,1).tripv = realized_tripv(data,'Busin','Unif',R.fxdint{6},10,true)*1e4;
R.RM(43,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{4},[],true)*1e4;
R.RM(44,1).tripv = realized_tripv(data,'Fixed','Near',R.fxdint{4},10,true)*1e4;
R.RM(45,1).tripv = realized_tripv(data,'Busin','Unif',R.fxdint{7},[],true)*1e4;
R.RM(46,1).tripv = realized_tripv(data,'Busin','Unif',R.fxdint{7},10,true)*1e4;
% Save results
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 
toc

%% JZT, thresh, regressors
load('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat')

% Theta constant and Ncdf at 99.9% 
theta  = pi^2/4 + pi - 5;
calpha = norminv(0.999,0,1);
rvidx  = [1:2,1:2,13:20,13:20];
bpvidx = [7:10,31:46];

% Averaging of RM
f = @(x,h) filter(ones(1,h)/h, 1, x);

% Continuos and jump average components for lagged BPV and TBPV
for n = 1:numel(bpvidx)
    rv  = R.RM( rvidx(n)).series;
    bpv = R.RM(bpvidx(n)).series;
    % Jump component
    jump  = rv - bpv;
    mjump = max(0,jump);
    % z value
    dsqr  = sqrt(numel(jump));
    num   = jump./rv;
    den   = sqrt(theta*max(1,R.RM(bpvidx(n)).tripv ./ bpv.^2));
    z     = dsqr * num ./ den;
    % Significant Jump with indicator function
    J = mjump.*(z > calpha);
    % Residual continuos component
    R.RM(bpvidx(n)).C = rv - J;
    R.RM(bpvidx(n)).J = J;
    % Regressors for mod. HAR
    C = R.RM(bpvidx(n)).C(1:end-1);
    J = J(1:end-1);
    R.RM(bpvidx(n)).X = [C, f(C,5), f(C,22), J, f(J,5), f(J,22)];
end

% RV and full decomposition for RSV lagged (1,5,22) averages
for n = [1:6 11:30]
    rv  = R.RM(n).series(1:end-1,:);
    R.RM(n).X = [rv, f(rv,5), f(rv,22)];
    % Signed jump variation, negative, positive component
    if ismembc(n,[5:6 23:30])
        dJ  = diff(rv,[],2);
        R.RM(n).dJ = [dJ, dJ.*(dJ < 0) dJ.*(dJ > 0)]; 
    end
end

save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 

%% HAR and modified whole

%{ 
    1  2  3  4  5  6  7  8  9 10
RM  R  R  R  R  S  S  B  B  T  T
FR  1  5  1  1  1  5  1  5  1  5
    m  m  h  d  m  m  m  m  m  m
%}

load('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat')
% Whole horizon
table = NaN(19,13);
% RV
for n = 1:4
    y = R.RM(n).series(23:end);
    % Sanity check
    if n == 4
        y(y == 0) = y(find(y == 0)-1);
    end
    % Regressors (intercept,RV1bar, RV5bar, RV22bar)
    x = R.RM(n).X(22:end,:);
    % HAR
    tmp = regstats2(y,x,'linear',{'beta','yhat','hac'});
    % Mincer-Zarnowitz
    tmp2 = regstats2(y,tmp.yhat,'linear',{'rsquare','mse','qlike'});
    % Assign to table
    table(1:2:7,n) = tmp.beta;
    table(2:2:8,n) = tmp.hac.t;
    table(17:19,n) = [tmp2.rsquare; tmp2.mse; tmp2.qlike];
end

% RSV
for n = 5:6
    y = R.RM(n-4).series(23:end);
    
    % Regressors (intercept,dJ, BV1bar, RV5bar,RV22bar)
    x = [R.RM(n).dJ(22:end,1), R.RM(n+2).series(22:end-1,1) R.RM(n-4).X(22:end,2:end)];
    % HAR-RSV-dJ
    tmp = regstats2(y,x,'linear',{'beta','yhat','hac'});
    % Mincer-Zarnowitz
    tmp2 = regstats2(y,tmp.yhat,'linear',{'rsquare','mse','qlike'});
    % Assign to table
    table([1 15, 3:2:7],n+1) = tmp.beta;
    table([2 16, 4:2:8],n+1) = tmp.hac.t;
    table(17:19        ,n+1) = [tmp2.rsquare; tmp2.mse; tmp2.qlike];
end

% BPV
for n = 7:8
    y = R.RM(n-6).series(23:end);
    % Regressors (intercept, BPV1bar, BPV5bar, BPV22bar)
    x = R.RM(n).X(22:end,:);
    % HAR
    tmp = regstats2(y,x,'linear',{'beta','yhat','hac'});
    % Mincer-Zarnowitz
    tmp2 = regstats2(y,tmp.yhat,'linear',{'rsquare','mse','qlike'});
    % Assign to table
    table(1:2:13,n+2) = tmp.beta;
    table(2:2:14,n+2) = tmp.hac.t;
    table(17:19 ,n+2) = [tmp2.rsquare; tmp2.mse; tmp2.qlike];
end

% TBPV
for n = 9:10
    y = R.RM(n-8).series(23:end);
    % Regressors (intercept, BPV1bar, BPV5bar, BPV22bar)
    x = R.RM(n).X(22:end,:);
    % HAR
    tmp = regstats2(y,x,'linear',{'beta','yhat','hac'});
    % Mincer-Zarnowitz
    tmp2 = regstats2(y,tmp.yhat,'linear',{'rsquare','mse','qlike'});
    % Assign to table
    table(1:2:13,n+3) = tmp.beta;
    table(2:2:14,n+3) = tmp.hac.t;
    table(17:19 ,n+3) = [tmp2.rsquare; tmp2.mse; tmp2.qlike];
end
R.t1 = table;
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 

%% HAR and modified 3rd

%{
   11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46        
RM  R  R  R  R  R  R  R  R  R  R  R  R  S  S  S  S  S  S  S  S  B  B  B  B  B  B  B  B  T  T  T  T  T  T  T  T        
FR  1  5  1  1  1  1  5  5  5  5  1  1  1  1  1  1  5  5  5  5  1  1  1  1  5  5  5  5  1  1  1  1  5  5  5  5        
    s  s  m  m  m  m  m  m  m  m  h  d  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m  m        
bus             b  b        b  b              b  b        b  b        b  b        b  b        b  b        b  b        
ss           x     x     x     x           x     x     x     x     x     x     x     x     x     x     x     x        
%}

load('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat')

% Whole horizon
table = NaN(19,40);
% RV
for n = 11:22
    y = R.RM(n).series(23:end);
    % Regressors (intercept,RV1bar, RV5bar, RV22bar)
    x = R.RM(n).X(22:end,:);
    % HAR
    tmp = regstats2(y,x,'linear',{'beta','yhat','hac'});
    % Mincer-Zarnowitz
    tmp2 = regstats2(y,tmp.yhat,'linear',{'rsquare','mse','qlike'});
    % Assign to table
    table(1:2:7,n-10) = tmp.beta;
    table(2:2:8,n-10) = tmp.hac.t;
    table(17:19,n-10) = [tmp2.rsquare; tmp2.mse; tmp2.qlike];
end

% RSV
for n = 23:30
    y = R.RM(n-10).series(23:end);
    % Regressors (intercept,dJ, BV1bar, RV5bar, RV22bar)
    x = [R.RM(n).dJ(22:end,1), R.RM(n+8).series(22:end-1,1) R.RM(n-10).X(22:end,2:end)];
    % HAR-RSV-dJ
    tmp = regstats2(y,x,'linear',{'beta','yhat','hac'});
    % Mincer-Zarnowitz
    tmp2 = regstats2(y,tmp.yhat,'linear',{'rsquare','mse','qlike'});
    % Assign to table
    table([1 15, 3:2:7],n-9) = tmp.beta;
    table([2 16, 4:2:8],n-9) = tmp.hac.t;
    table(17:19        ,n-9) = [tmp2.rsquare; tmp2.mse; tmp2.qlike];
end

% BPV
for n = 31:38
    y = R.RM(n-18).series(23:end);
    % Regressors (intercept, BPV1bar, BPV5bar, BPV22bar)
    x = R.RM(n).X(22:end,:);
    % HAR
    tmp = regstats2(y,x,'linear',{'beta','yhat','hac'});
    % Mincer-Zarnowitz
    tmp2 = regstats2(y,tmp.yhat,'linear',{'rsquare','mse','qlike'});
    % Assign to table
    table(1:2:13,n-8) = tmp.beta;
    table(2:2:14,n-8) = tmp.hac.t;
    table(17:19 ,n-8) = [tmp2.rsquare; tmp2.mse; tmp2.qlike];
end

% TBPV
for n = 39:46
    y = R.RM(n-26).series(23:end);
    % Regressors (intercept, BPV1bar, BPV5bar, BPV22bar)
    x = R.RM(n).X(22:end,:);
    % HAR
    tmp = regstats2(y,x,'linear',{'beta','yhat','hac'});
    % Mincer-Zarnowitz
    tmp2 = regstats2(y,tmp.yhat,'linear',{'rsquare','mse','qlike'});
    % Assign to table
    table(1:2:13,n-7) = tmp.beta;
    table(2:2:14,n-7) = tmp.hac.t;
    table(17:19 ,n-7) = [tmp2.rsquare; tmp2.mse; tmp2.qlike];
end
   
R.t2 = table;
save('C:\Users\Oleg\Desktop\Dissertation\Data\Res.mat','R') 

Contact us