No BSD License  

Highlights from
meet the family

image thumbnail
from meet the family by nathan q
Visualisation of a family tree in the programming contest.

prepData.m



% script to get some data into a handy form

load contest_data.mat

n=numel(d);


% construct a vector that converts an entry id (as used in d.parent) into an index:
idmax=-Inf;
idmin=Inf;
for i=1:n; 
   if d(i).id<idmin 
      idmin=d(i).id; 
   end; 
   if d(i).id>idmax 
      idmax=d(i).id; 
   end; 
end
id2i=zeros(idmax,1);
for i=1:n; 
   id2i(d(i).id) = i; 
end

% Construct a vector of declared parent, and convert their entry id to an
% index:
parent=[d.parent]; 
for i=1:n
   if parent(i)>0
     parent(i) = id2i(parent(i));
   end
end

% number of unique lines
for i=1:n
    nULines(i) = numel(unique(d(i).lines));
end



% filter out entries with no code - don't count them as parents
[blanks,parent] = filterBlanks(parent,nULines);
% Just reassign parents - too tricky to delete them
% parent(blanks)=[];
% d(blanks)=[];
% nULines(blanks)=[];
% n = numel(parent);

authors=cell(1,n);
for i=1:n; authors{i}=d(i).author; end
uAuthors=unique(authors);
authorid=zeros(1,n);
for i=1:n; 
    j=1; 
    while ~authorid(i) 
        if strcmp(uAuthors{j},d(i).author) 
            authorid(i)=j; 
        end; 
        j=j+1; 
    end; 
end



% set up children lists
children = findChildren(parent);

% number of descendants:
for i=1:n
    nDesc(i)=findNdesc(parent,children,i);
end


t0 = d(1).timestamp;        % reference time
t = [d.timestamp]-t0;


% A(i,j) is true if line i appears in entry j:
nL=numel(allLineList);
n=numel(d);
A=false(nL,n);
for j=1:n; 
    A(d(j).lines,j)=true; 
end; 
% ignore generic lines like end, return for determining ancestry....
generic = [1 2 3 4 5 24 26  42 69 71 480];
A(generic,:)=false;

% nshare(i,j) is the number of lines which entries i and j have in common
if exist('nshare.mat')
    load nshare
else
    % it takes an hour to compute  :P
    nshare=zeros(n);
    for i=1:n
        nshare(i,:) = sum(A(:,i*ones(1,n))&A); 
    end
    save nshare.mat nshare;
end



bestIndexList = findleaders(d);
tlead = t(bestIndexList);

score=[d.score];
passed=[d.passed];
scoreN = ((score-min(score))/(max(score(passed))-min(score))*(10^3-1))+1;

Contact us at files@mathworks.com