Code covered by the BSD License  

Highlights from
CODIS

image thumbnail

CODIS

by

 

14 May 2009 (Updated )

Combined DNA Index System: a GUI tool for forensic paternity lawsuit

out=tpi(alleles)
function out=tpi(alleles)

%remove all alleles with null frequency
alleles(alleles==0)=[];
%N=number of alleles
N=length(alleles);
%G=number of possible genotypes given N alleles
G=0.5*N*(N+1); 
%c=number of possible combinations of Father's and Mother's genotypes
C=G^2; 
%P=number of possible child genotype (and Paternity Index) given C combinations
CG=4*C;

%start to costruct all possible combinations of N alleles
%for example if N=3
%[J I]=ndgrid(1:1:N, 1:1:N);
%J= 1 1 1      I= 1 2 3
%   2 2 2         1 2 3 
%   3 3 3         1 2 3
%I=tril(I); J=tril(J); 
%J= 1 0 0      I= 1 0 0
%   2 2 0         1 2 0 
%   3 3 3         1 2 3
%I=I(:); J=J(:);
%J=[1 2 3 0 2 3 0 0 3]; I=[1 1 1 0 2 2 0 0 3]
%I(I==0)=[]; J(J==0)=[];
%J=[1 2 3 2 3 3]; I=[1 1 1 2 2 3];
%
%All possible genotypes G will be A=[I J]
% A= 1 1
%    1 2
%    1 3
%    2 2
%    2 3
%    3 3
[J I]=ndgrid(1:1:N, 1:1:N);
%we don't need N anymore
clear N
I=tril(I); J=tril(J); 
I=I(:); J=J(:);
I(I==0)=[]; J(J==0)=[];

%G genotypes can be combine in C=G^2 ways each other. So each row must be
%expanded G times.
%vectors preallocation to boost execution
tmp=zeros(C,4); 
%expand each I and J element G times (Father's genotypes)
tmp(:,1)=rldecode(I,repmat(G,G,1));
tmp(:,2)=rldecode(J,repmat(G,G,1));
%replicate I and G vectors G times (Mother's genotypes)
tmp(:,3)=repmat(I,G,1);
tmp(:,4)=repmat(J,G,1);
% tmp =
%      1     1     1     1
%      1     1     1     2
%      1     1     1     3
%      1     1     2     2
%      1     1     2     3
%      1     1     3     3
%      1     2     1     1
%      1     2     1     2
%      1     2     1     3
%      1     2     2     2
%      1     2     2     3
%      1     2     3     3
%      1     3     1     1
%      1     3     1     2
%      1     3     1     3
%      1     3     2     2
%      1     3     2     3
%      1     3     3     3
%      2     2     1     1
%      2     2     1     2
%      2     2     1     3
%      2     2     2     2
%      2     2     2     3
%      2     2     3     3
%      2     3     1     1
%      2     3     1     2
%      2     3     1     3
%      2     3     2     2
%      2     3     2     3
%      2     3     3     3
%      3     3     1     1
%      3     3     1     2
%      3     3     1     3
%      3     3     2     2
%      3     3     2     3
%      3     3     3     3
%we don't need I J anymore
clear I J 

%Now for each Father-Mother genotypes combination there are 4 possible Child's
%genotypes. So expand each tmp row 4 times.
%vectors preallocation to boost execution
gs=zeros(CG,6);
%expand Father's genotypes 4 times
gs(:,1)=rldecode(tmp(:,1),repmat(4,C,1));
gs(:,2)=rldecode(tmp(:,2),repmat(4,C,1));
%expand Mother's genotypes 4 times
gs(:,3)=rldecode(tmp(:,3),repmat(4,C,1));
gs(:,4)=rldecode(tmp(:,4),repmat(4,C,1));
%To construct the Child's genotype use a "chess board" approach
%Father's Alleles
gs(1:4:CG,5)=gs(1:4:CG,1);
gs(2:4:CG,5)=gs(2:4:CG,1);
gs(3:4:CG,5)=gs(3:4:CG,2);
gs(4:4:CG,5)=gs(4:4:CG,2);
%Mother's Alleles
gs(1:4:CG,6)=gs(1:4:CG,3);
gs(2:4:CG,6)=gs(2:4:CG,4);
gs(3:4:CG,6)=gs(3:4:CG,3);
gs(4:4:CG,6)=gs(4:4:CG,4);

%Now we can calculate all possible Paternity Index (PI).
%The PI can be calculated in this way:
%C=PP & F=PP => PI=1/p (p is the frequency of the Paternal allele)
%C=PP & F=PX => PI=1/2p
%M=PP|PX & C=PQ & F=QQ =>PI=1/q (q is the frequency of the Paternal allele)
%M=PP|PX & C=PQ & F=QX =>PI=1/2q 
%M=PQ C=PQ & F=PP|QQ|PQ => PI=1/(p+q) (sum of the frequencies of both alleles)
%M=PQ C=PQ & F=PX|QX => PI=1/(2(p+q))
%We can use a trick to use one formula for all occasion.
%HF=1 if Father is homozygous; 0 if heterozygous;
%HM=1 if Mother is homozygous; 0 if heterozygous;
%HC=1 if Child  is homozygous; 0 if heterozygous;
HF=gs(:,1)==gs(:,2); 
HM=gs(:,3)==gs(:,4);
HC=gs(:,5)==gs(:,6);
%If M=PQ and C=PQ in the denominator of PI there is the sum of the frequencies
%of both child's alleles; else there is only the frequencypaternal transmitted
%allele. So Mother's and Child's loci must be heterozygous and equal.
%To compare Child's and Mother's loci, the Child's locus must be sorted
SC=sort(gs(:,[5 6]),2); MCequality=all(gs(:,[3 4])==SC,2);
EqFlag= ~HM & ~HC & MCequality;
%The Paternal trasmitted allele is the gs column 5. 
%The Maternal trasmitted allele is the gs column 6. 
%If EqFlag is 1 the frequencies of both allele are summed; else only paternal
%allele frequency is considered.
Den=sum(alleles(gs(:,[5 6])).*[ones(CG,1) EqFlag],2);
%we don't need EqFlag anymore
clear EqFlag
%When M=C=F=PQ PI=1/(p+q) and not PI=1/(2*(p+q)). So, this is corrected using
%(1+Amb) as nominator.
%Amb is 1 if Father's, Mother's and Child's loci are heterozygous and equal.
Amb= ~HF & ~HM & ~HC & all(gs(:,[1 2])==gs(:,[3 4]),2) & all(gs(:,[1 2])==SC,2) & MCequality;
%we don't need HM HC MCequality SC gs anymore
clear HM HC MCequality SC gs
%Finally PI=(1+Amb)/((2-HF)*Den);
%vectors preallocation to boost execution
PItrio=zeros(CG,2);
PItrio(:,1)=(1+Amb)./((2-HF).*Den);
%we don't need HF Amb Den CG anymore
clear HF Amb Den CG

%Compute the probabilies to see all possible genotypes under Hardy-Weinberg
%Equilibrium
%column vector x row vector = G x G matrix
%on the diagonal p^2 (probabilities to see homozygotes)
%on the other cells pq (half probabilities to see heterozygotes)
X=alleles'*alleles; 
%sum the upper triangular matrix with the lower triangular matrix, so pq became
%2pq
X=tril(X)+triu(X,1)';
%trasform the matrix into a column vector
Y=X(:); 
%delete zeros of the triangular matrix: now we have a vector with all the
%probabilities to see a particular genotype (sum(Y)=1)
Y(Y==0)=[]; 
%we don't need X anymore
clear X  
%Now compute the probability to observe each Father-Mother genotypes combination
%vectors preallocation to boost execution
ProbMF=zeros(C,3);
%expand each Y element G times (Father's genotype probability)
ProbMF(:,1)=rldecode(Y,repmat(G,G,1));
%replicate Y vectors G times (Mother's genotype probability)
ProbMF(:,2)=repmat(Y,G,1);
%we don't need G Y anymore
clear G Y
%multiplicate probabilities row by row 
ProbMF(:,3)=prod(ProbMF(:,1:2),2);
%Finally to compute the probability to observe each PI we must expand each
%ProbMF row 4 times and divide each probability by 4 because for each
%Father-Mother combination the probability to observe a 1 of 4 Child's
%genotypes is equal.
PItrio(:,2)=rldecode(ProbMF(:,3),repmat(4,C,1))./4;
%we don't need C ProbMF anymore
clear C ProbMF
%to compute the TPI by Brenner TPI=1/(2*Hom). Hom=Homozygosity=sum(alleles^2)
out.PItrio=PItrio;
out.TPItrio.Brenner=1/(2*sum(alleles.^2));
out.TPItrio.Cardillo=median(PItrio(PItrio(:,2)==max(PItrio(:,2)),1));
%we don't need PItrio anymore
clear PItrio

%Now compute all possible PI for a Father-Child duo
Idx=tmp(:,1)==tmp(:,3) | tmp(:,1)==tmp(:,4) | tmp(:,2)==tmp(:,3) |tmp(:,2)==tmp(:,4);
tmp(Idx==0,:)=[]; L=length(tmp);
clear Idx
%The PI can be calculated in this way:
%C=PP & F=PP => PI=1/p (p is the frequency of the P allele)
%C=PP & F=PX => PI=1/2p
%C=QP & F=PP => PI=1/2p
%C=PQ & F=QQ => PI=1/2q (q is the frequency of the Q allele)
%C=QP & F=PX => PI=1/4p
%C=PQ & F=QX => PI=1/4q
%C=PQ & F=PQ => PI=(p+q)/4*(p*q)
%So, with the exception of the last case, given:
%NC alleles of the child's locus (1 if homozygous - 2 if heterozygous);
%NF alleles of the alleged father's locus (1 if homozygous - 2 if heterozygous);
%f frequency of the shared allele
%PI(I)=1/(NC*NF*f)
HF=tmp(:,1)==tmp(:,2); HC=tmp(:,3)==tmp(:,4);
FCequality=all(tmp(:,[1 2])==tmp(:,[3 4]),2);
%we don't need FCequality anymore
Flag=~HF & ~HC & FCequality;
clear FCequality
PIduo=zeros(L,1);
PIduo(Flag,1)=sum(alleles(tmp(Flag,[3 4])),2)./prod(alleles(tmp(Flag,[3 4])),2)./4;
PA1=tmp(:,1)==tmp(:,3) & ~Flag;
PA2= ~PA1 & tmp(:,1)==tmp(:,4) & ~Flag;
TA1=any([PA1 PA2],2);
clear PA1 PA2 L
PIduo(TA1,1)=1./((2-HF(TA1)).*(2-HC(TA1)).*(alleles(tmp(TA1,1)))');
TA2=~any([Flag TA1],2);
clear Flag TA1
PIduo(TA2,1)=1./((2-HF(TA2)).*(2-HC(TA2)).*(alleles(tmp(TA2,1)))');
clear TA2 
PIduo(:,2)=((2-HF).*prod(alleles(tmp(:,[1 2])),2)).*((2-HC).*prod(alleles(tmp(:,[3 4])),2));
clear HC HF tmp
out.PIduo=PIduo;
out.TPIduo.Cardillo=median(PIduo(PIduo(:,2)==max(PIduo(:,2)),1));
%we don't need PIduo anymore
clear PIduo

function x=rldecode(val,len)
%RLDECODE Run-length decoding of run-length encode data.
%
%   X = RLDECODE(VAL, LEN) returns a vector XLEN with the length of each run
%   and a vector VAL with the corresponding values.  LEN and VAL must have the
%   same lengths.
%
%   Example: rldecode([ 6 4 5 8 7 ], [ 2 3 1 2 4 ]) will return
%
%      x = [ 6 6 4 4 4 5 8 8 7 7 7 7 ];
%
%   See also RLENCODE.

%   Author:      Peter John Acklam
%   Time-stamp:  2002-03-03 13:50:38 +0100
%   E-mail:      pjacklam@online.no
%   URL:         http://home.online.no/~pjacklam

% keep only runs whose length is positive
K = len > 0;
len = len(K);
val = val(K);
clear K
I = cumsum(len);
J = zeros(1, I(end));
J(I(1:end-1)+1) = 1;
J(1) = 1;
x = val(cumsum(J));

Contact us