%% Connect to MySQL
mym('open','localhost','root','zhir')
mym('use test')
%% Increase selected MySQL buffers in a bid to boost performance
mym(['set ' ...
'bulk_insert_buffer_size = 500000000, ' ...
'global key_buffer_size = 500000000, ' ...
'myisam_sort_buffer_size = 150000000, ' ...
'sort_buffer_size = 150000000'])
%% Read download data into a MySQL table, retaining unique records
mym('drop table if exists temp')
mym('drop table if exists fex')
mym(['create table temp (' ...
'time datetime, ' ...
'id mediumint unsigned, ' ...
'rank smallint unsigned, ' ...
'file smallint unsigned, ' ...
'fcat tinyint unsigned, ' ...
'down smallint unsigned)'])
mym(['load data infile ''D:/Program Files/Matlab/work/fexstat/fex.txt'' ' ...
'into table temp lines terminated by ''\n'' ' ...
'(@time,id,rank,file,fcat,down) ' ...
'set time = str_to_date(@time,''%d-%b-%Y %H:%i:%s'')'])
mym(['create table fex as ' ...
'select distinct * from temp ' ...
'where file > 0 ' ...
'order by file, time'])
%% Read category list into a MySQL table, retaining unique records
mym('drop table if exists temp')
mym('drop table if exists cat')
mym(['create table temp (' ...
'fcat tinyint unsigned, ' ...
'drilldown tinyint unsigned, ' ...
'catname varchar(70))'])
mym(['load data infile ''D:/Program Files/Matlab/work/fexstat/cat.txt'' ' ...
'into table temp lines terminated by ''\n'' '])
mym('create table cat as select distinct * from temp order by fcat')
%% Switch from 'stock' to 'flow' downloads and change time accounting
mym('alter table fex add row int auto_increment primary key')
mym('drop table if exists filestats')
mym(['create table filestats as ' ...
'select a.id, a.file, a.fcat, ' ...
'date(a.time) as date, ' ...
'hour(a.time) as hour, ' ...
'(b.down - a.down) as down ' ...
'from fex a left join fex b ' ...
'on a.file = b.file ' ...
'and b.row = a.row + 1 ' ...
'and timestampdiff(minute,a.time,b.time) between 40 and 80']);
mym('alter table filestats add scan int')
mym('update filestats set scan = 24*datediff(date,''2007-11-16'') + hour')
%% Index resulting table
mym('create index i on filestats(id)')
mym('create index j on filestats(file)')
mym('create index k on filestats(scan)')
%% Check completeness of author cross-sections, tracking the number of distinct ids found at any time
mym('drop table if exists temp')
mym('drop table if exists badscans')
mym(['create table temp as ' ...
'select scan, count(distinct id) as n ' ...
'from filestats ' ...
'group by scan'])
mym(['create table badscans as ' ....
'select scan ' ...
'from temp ' ...
'where n < 2560'])
%% Calculate and plot downloads-per-hour totals for 'cleared' cross-sections
[t,x] = mym(['select distinct scan, sum(down) as n ' ...
'from filestats ' ...
'where scan not in (select scan from badscans) ' ...
'group by scan ' ...
'having n is not null']);
prepplot(t,x)
plot(t,x,'LineWidth',1,'Color',[.8 .8 .8],'LineStyle','.')
plot(t,hpfilter(x,4),'LineWidth',2,'Color',[.9 0 0],'LineStyle','-')
title('\bfDownloads of FEX submissions, November 16-22, 2007')
xlabel({'Number of downloads per hour, all available submissions', ...
'(Local time GMT; solid line plots a Hodrick-Prescott smoothed series)'})
%% Examine the distribution of weekly downloads-per-author (ignoring incomplete cross-sections)
mym('drop table if exists authorstats')
mym('drop table if exists authortotals')
mym(['create table authorstats as ' ...
'select distinct id, scan, sum(down) as down ' ...
'from filestats ' ...
'where scan not in (select scan from badscans) ' ...
'group by id, scan'])
mym(['create table authortotals as ' ...
'select distinct id, sum(down) as down ' ...
'from authorstats ' ...
'group by id ' ...
'order by down desc'])
mym('alter table authortotals add rank smallint auto_increment primary key')
%% Plot download history for the contributor with most downloads over the period
[t,x] = mym(['select scan, down ' ...
'from authorstats ' ...
'where id in (select id from authortotals where rank = 1) ' ...
'and down is not null']);
prepplot(t,x)
bar(t,x,'r')
title({'\bfDownloads of submissions by a selected contributor, November 16-22, 2007', ...
'\rm(Contributor with most downloads over the observation period)'})
xlabel({'Number of downloads per hour','(Local time GMT)'})
%% Plot download history for the contributor with most downloads of a file in a single hour
[d,n] = mym(['select distinct down, '...
'count(*) as n ' ...
'from filestats ' ...
'group by down ' ...
'order by down desc']);
[d n] %#ok
mym('drop table if exists temp1')
mym(['create table temp1 as ' ...
'select distinct id' ...
'from filestats ' ...
'where down > 300'])
[t,x] = mym(['select scan, down ' ...
'from authorstats ' ...
'where id in (select id from temp1) ' ...
'and down is not null']);
prepplot(t,x)
bar(t,x,'r')
title({'\bfDownloads of submissions by a selected contributor, November 16-22, 2007', ...
'\rm(Contributor with most downloads of a file in a single hour)'})
xlabel({'Number of downloads per hour','(Local time GMT)'})
%% Calculate and plot downloads per category
mym('drop table if exists temp')
mym('drop table if exists cattotals')
mym(['create table temp as ' ...
'select distinct fcat, sum(down) as down ' ...
'from filestats ' ...
'where scan not in (select scan from badscans) ' ...
'group by fcat ' ...
'order by down'])
mym(['create table cattotals as '...
'select temp.*, cat.catname, cat.drilldown ' ...
'from temp left join cat using (fcat) ' ...
'order by down desc'])
[x,n] = mym('select down, catname from cattotals');
n = deblank(n);
c = x/sum(x) < .009;
x = [x(~c); sum(x(c))];
n = [n(~c); {'Other'}];
n = strcat('\fontsize{7}',n);
for i = 1:length(n)
n{i} = [n{i} ', ' num2str(x(i))];
end
figure
set(gcf,'Color','w')
pie(x,n)
colormap autumn
title('\bfDownloads per category, November 16-22, 2007')
%%
% Example: none (Oh, the FEX code metrics..)