% script to get some data into a handy form
load contest_data.mat
n=numel(d);
% construct a vector that converts an entry id (as used in d.parent) into an index:
idmax=-Inf;
idmin=Inf;
for i=1:n;
if d(i).id<idmin
idmin=d(i).id;
end;
if d(i).id>idmax
idmax=d(i).id;
end;
end
id2i=zeros(idmax,1);
for i=1:n;
id2i(d(i).id) = i;
end
% Construct a vector of declared parent, and convert their entry id to an
% index:
parent=[d.parent];
for i=1:n
if parent(i)>0
parent(i) = id2i(parent(i));
end
end
% number of unique lines
for i=1:n
nULines(i) = numel(unique(d(i).lines));
end
% filter out entries with no code - don't count them as parents
[blanks,parent] = filterBlanks(parent,nULines);
% Just reassign parents - too tricky to delete them
% parent(blanks)=[];
% d(blanks)=[];
% nULines(blanks)=[];
% n = numel(parent);
authors=cell(1,n);
for i=1:n; authors{i}=d(i).author; end
uAuthors=unique(authors);
authorid=zeros(1,n);
for i=1:n;
j=1;
while ~authorid(i)
if strcmp(uAuthors{j},d(i).author)
authorid(i)=j;
end;
j=j+1;
end;
end
% set up children lists
children = findChildren(parent);
% number of descendants:
for i=1:n
nDesc(i)=findNdesc(parent,children,i);
end
t0 = d(1).timestamp; % reference time
t = [d.timestamp]-t0;
% A(i,j) is true if line i appears in entry j:
nL=numel(allLineList);
n=numel(d);
A=false(nL,n);
for j=1:n;
A(d(j).lines,j)=true;
end;
% ignore generic lines like end, return for determining ancestry....
generic = [1 2 3 4 5 24 26 42 69 71 480];
A(generic,:)=false;
% nshare(i,j) is the number of lines which entries i and j have in common
if exist('nshare.mat')
load nshare
else
% it takes an hour to compute :P
nshare=zeros(n);
for i=1:n
nshare(i,:) = sum(A(:,i*ones(1,n))&A);
end
save nshare.mat nshare;
end
bestIndexList = findleaders(d);
tlead = t(bestIndexList);
score=[d.score];
passed=[d.passed];
scoreN = ((score-min(score))/(max(score(passed))-min(score))*(10^3-1))+1;