function PI2=CODIS2(KC,x,y,Z)
%CODIS2 is a CODIS ancillary routine.
%This function is recalled by CODIS3 to compute the Paternity Index, the
%Matching Probability and the Random Man Not Excluded probability when a duo is
%considered (only child's and alleged father's STRs are available or if, in a
%particular locus, there is a mismatch between the mother and the child)
%
% Syntax: CODIS2
%
% Inputs:
% all needed variables are global, so CODIS2 directly access to the
% RAM
% Outputs:
% - ordered loci, PI, MP, RMNE and all needed flags
%
% Example:
% run CODIS and select the DEMO menu->Duo attribution or Duo exclusion
%
% Created by Giuseppe Cardillo
% giuseppe.cardillo-edta@poste.it
%each variable is inheredited by CODIS3
global I C F HC HF MP EP matF Amb Fmut
%I is the for...end variable inheredited by CODIS3;
%C and F are the matrices of child's and alleged father's STRs data;
%HC and HF are the flag matrices of homozygosity (HC - child; HF - alleged
%father) HC(I) or HF(I) is 1 if the I-th locus is homozygous, 0 otherwise;
%MP, RMNE, PI are the matrices of the probabilities: Matching probability,
%Random Man Not Excluded proportion and Paternity Index;
%matF is the match-flag matrix: matF(I)=1 if there is a match between child's
%and alleged father's locus, 0 otherwise;
%Amb is the ambigous match-flag matrix: if C=PQ and F=PQ there is a match, but
%it is impossible to extablish which is the inheredited allele (Amb(I)=1);
%for the I-th STR and selected population dataset:
%x is the alleles frequencies array. Remember that in the CODIS GUI you can
%select the 0 allele (when the STR was not tested); so if you selected the J-th
%allele from the popupmenu, the correct frequency will be J-1.
%y is the number of the observed alleles;
%Fmut is the paternal mutation rate;
%Z is the alleles size array.
%KC Kinship coefficient
%The Matching probability is the probability that 2 persons share the same
%genotype. For each locus, the MP(I) is p^2 if the locus is homozygous or 2pq if
%the locus is heterozygous. The overall MP = prod(MP(I)).
%It is necessary to update the alleles frequecies because it is possible that
%the alleged father bring an allele that was not previously observed in the
%selected population: in this case all the indices (MP, RMNE, PI) will be 0.
z=round(x.*y); y=y+2; z(F(I,:)-1)=z(F(I,:)-1)+1+HF(I); x=z./y;
%There is a MatLab trick to use one formula for both cases:
%MP(I)=(2-HF(I))*prod(x(F(I,:)-1));
%if the locus homozygous:
% HF(I)=1; 2-HF(I)=1; F(I,1)==F(I,2) => prod(x(F(I,:)-1))=pp=p^2 => MP=p^2
%if the locus heterozygous:
% HF(I)=0; 2-HF(I)=2; F(I,1)~=F(I,2) => prod(x(F(I,:)-1))=pq => MP=2pq
MP(I)=(2-HF(I))*prod(x(F(I,:)-1));
%Now we can calculate the Paternity Index (PI).
%The PI is a likelihood ratio. It mesures the ratio between two probabilities:
%PI=P(C|F)/P(C|RM)
%P(C|F) is the probability to observe the child's genotype given the alleged
%father's genotype;
%P(C|RM) is the probability to observe the child's genotype given a random man
%(in this case, we consider all the compatible genotypes)
%If there is a match between the child's and the alleged father's loci the PI
%can be calculated in this way:
%C=PP & F=PP => PI=1/p (p is the frequency of the P allele)
%C=PP & F=PX => PI=1/2p
%C=QP & F=PP => PI=1/2p
%C=PQ & F=QQ => PI=1/2q (q is the frequency of the Q allele)
%C=QP & F=PX => PI=1/4p
%C=PQ & F=QX => PI=1/4q
%C=PQ & F=PQ => PI=(p+q)/4*(p*q)
%So, with the exception of the last case, given:
%NC alleles of the child's locus (1 if homozygous - 2 if heterozygous);
%NF alleles of the alleged father's locus (1 if homozygous - 2 if heterozygous);
%f frequency of the shared allele
%PI(I)=1/(NC*NF*f) and the overall PI = prod(PI(I))
%If there is not a match, storically PI(I)=0. With this assumption, even if
%there is only 1 mismatch, the overall PI will be 0. The mismatch could be the
%result of a mutational event: in this view, the PI must be corrected for the
%probability to observe a mutation.
%I have adopted the model described by Brenner.
%P(genotypes | true trio) is proportional to
%P(Q'R man transmits Q) = P(Q' is transmitted) x P(mutation) x P(mutation
%increases length) x P(s steps) = (1/2) ยต (1/2) (1/10)^s-1, where s =|Q'-Q|
%remember that the loci were sorted by CODIS3 (see it)
if (~HC(I) && ~HF(I)) && isequal(C(I,:),F(I,:)) %C=PQ & F=PQ
matF(I)=1; Amb(I)=1; %switch on the Match and the Ambigous flags
%Update the frequencies
%In this case, we have previously added the alleged father's alleles. Now we
%don't know which allele is transmitted by the alleged father to the child
%and we must add only one allele. I think that a salomonic solution is to
%add half allele each.
z=round(x.*y); y=y+1; z(C(I,:)-1)=z(C(I,:)-1)+0.5; x=z./y;
%compute the paternity index
PItmp=sum(x(C(I,:)-1))/prod(x(C(I,:)-1))/4;
if KC==0.5
PI2=PItmp;
else
PI2=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
end
else % we are not in the condition C=PQ & F=PQ
if any(ismember(C(I,:),F(I,:))) %there is a match
matF(I)=1; %switch on the Match flag
%check if swaps are required
if HC(I) %If the child's locus is homozygous...
%swap the alleged father's locus only if C=PP & F=XP
if ~HF(I) && C(I,2)==F(I,2)
CODISswap(0,0,1) %C=PP & F=PX
end
else %the child's locus is heterozygous...
if HF(I) %If the alleged father's locus is homozygous...
%swap the child's locus only if C=PQ & F=PP
if C(I,1)==F(I,1)
CODISswap(0,1,0) %C=QP & F=PP
end
else %the alleged father's locus is heterozygous...
if C(I,1)==F(I,1) %if C=PQ & F=PX
CODISswap(0,1,0) %C=QP & F=PX
elseif C(I,1)==F(I,2) %if C=PQ & F=XP
CODISswap(0,1,1) %C=QP & F=PX
elseif C(I,2)==F(I,2) %if C=QP & F=XP
CODISswap(0,0,1) %C=QP & F=PX
end
end
end
%Update the frequencies
%In this case, we know which allele is transmitted by the alleged father
%to the child and so we can add the untransmitted allele
z=round(x.*y); y=y+1; z(C(I,1)-1)=z(C(I,1)-1)+1; x=z./y;
PItmp=1/((2-HC(I))*(2-HF(I))*x(C(I,2)-1)); %there is a match and so the correction factor is not required
if KC==0.5
PI2=PItmp;
else
PI2=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
end
else %there is not a match between the child's and the alleged father's allele
%Update the frequencies
%In this case, we must add both child's alleles
z=round(x.*y); y=y+2; z(C(I,:)-1)=z(C(I,:)-1)+1; x=z./y;
if KC==0.5
%Now we must compute the correction factor
if HC(I) %child's locus is homozygous C=PP
%If the alleged father's locus is homozygous, F=RR, the quantity
%|R-P|-1 will be summed twice, but the denominator will be 2 and so
%the proper quantity will be computed.
%On the contrary, if the locus is heterozygous, F=RS, the quantities
%|R-P|-1 and |S-P|-1 will be both computed and the denominator will
%be 1
if HF(I)
corr=Fmut*(1/2)*(1/10)^(abs(Z(C(I,1)-1)-Z(F(I,1)-1))-1);
else
corr=Fmut*(1/2)*(1/10)^(abs(Z(C(I,1)-1)-Z(F(I,1)-1))-1)+Fmut*(1/2)*(1/10)^(abs(Z(C(I,1)-1)-Z(F(I,2)-1))-1);
end
PItmp=corr/x(C(I,1)-1)^2;
else %child's locus is heterozygous C=PQ
%also in this case the denominator will insure that the quantities
%will not be summed twice
if HF(I)
corr=Fmut*(1/2)*(1/10)^(abs(Z(C(I,1)-1)-Z(F(I,1)-1))-1)+Fmut*(1/2)*(1/10)^(abs(Z(C(I,2)-1)-Z(F(I,1)-1))-1);
else
corr=Fmut*(1/4)*(1/10)^(abs(Z(C(I,1)-1)-Z(F(I,1)-1))-1)+Fmut*(1/4)*(1/10)^(abs(Z(C(I,2)-1)-Z(F(I,1)-1))-1)+...
Fmut*(1/4)*(1/10)^(abs(Z(C(I,1)-1)-Z(F(I,2)-1))-1)+Fmut*(1/4)*(1/10)^(abs(Z(C(I,2)-1)-Z(F(I,2)-1))-1);
end
PItmp=corr/(2*x(C(I,1)-1)*x(C(I,2)-1));
end
else
PItmp=0;
end
%Finally, compute the PI and eventually transform PI in AI (Avuncular
%index)
PI2=PItmp*2*KC+(1-2*KC);
end
end
%Lastly, compute the Random Man Not Excluded percentage (RMNE).
%The RMNE is an estimation of the percentage of random men that can be
%compatible with paternity. Pratically, if the child's locus is PP a man that
%brings the P allele cannot be excluded as the father of the child (RM=PP or
%RM=PX); if the child's locus is PQ all the men that bring the P and the Q
%alleles cannot be excluded (RM=PP, QQ, PX, QX, PQ)
%So, this index only depends by the child's locus configuration
%RMNE=1-overall exclusion probability
%EP(I)=1-(1-p)^2 if child is homozygous
%EP(I)=1-(1-p-q)^2 if child is heterozygous
%Also in this case, the denominator (1+HC(I)) insures that p is not subtracted
%twice if the locus is homozygous.
EP(I)=(1-sum(x(C(I,:)-1))/(1+HC(I)))^2;