function PI=CODIS3(handles,KC)
%CODIS3 is a CODIS ancillary routine.
%This function is recalled by CODIS to compute the Paternity Index, the
%Matching Probability and the Random Man Not Excluded probability when a trio is
%considered (mother's, child's and alleged father's STRs are available)
%
% Syntax: CODIS3
%
% Inputs:
% all needed variables are global, so CODIS3 directly access to the
% RAM
% Outputs:
% - ordered loci, PI, MP, RMNE and all needed flags
%
% Example:
% run CODIS and select the DEMO menu->Trio attribution or Trio exclusion
%
% Created by Giuseppe Cardillo
% giuseppe.cardillo-edta@poste.it
%each variable is inheredited by CODIS
global I M C F HM HC HF MP RMNE matF matM Amb pmatF CODISpop Fmut Mmut FHp EP loci NL
%I is the for...end variable that can be passed to CODIS2
%M, C and F are the matrices of mother's, child's and alleged father's STRs data;
%HM, HC and HF are the flag matrices of homozygosity (HM - mother; HC - child;
%HF - alleged father) HX(I) is 1 if the I-th locus is homozygous, 0 otherwise
%MP, RMNE, PI are the matrices of the probabilities: Matching probability,
%Random Man Not Excluded proportion and Paternity Index;
%matM matF are the match-flag matrices: matF(I)=1 if there is a match between child's
%and alleged father's locus, 0 otherwise; the same for matM(I) for the mother
%match;
%Amb is the ambigous match-flag matrix: if M=PQ C=PQ F=PQ there is a match, but
%it is impossible to extablish which allele is inheredited allele from the
%mother and which from the father, so (Amb(I)=1);
%pmatF: there are some particular cases in which there is a probable match: i.e.
%M=XP C=PQ F=PP. In this case, the alleged father doesn't bring the Q allele so
%there is a mismatch; anyway it is possible the alleged father gave the P allele
%and the mother gave X or P allele and it was mutated in Q.
%for the I-th STR and selected population dataset (CODISpop matrix):
%x is the alleles frequencies array. Remember that in the CODIS GUI you can
%select the 0 allele (when the STR was not tested); so if you selected the J-th
%allele from the popupmenu, the correct frequency will be J-1.
%y is the number of the observed alleles;
%Fmut and Mmut are the paternal and maternal mutation rates;
%Z is the alleles size array.
%KC Kinship coefficient
%sort M C and F vectors
M=sort(M,2); C=sort(C,2); F=sort(F,2);
%Highlight homozigosity
HM=M(:,1)==M(:,2); HC=C(:,1)==C(:,2); HF=F(:,1)==F(:,2);
%Set the match-flag vectors:
%starting from the latin assumption "Mater semper certa est, pater numquam"
%(there's no doubt about the mother, but the father), the matM array is all
%switched on, the matF is all switched off.
matM=ones(18,1); matF=zeros(18,1); pmatF=matF; Amb=matF;
%set the Paternity Index, Probability of exclusion and Matching Probability arrays
PI=matF; MP=matF; EP=matM;
%if there is the mother and we have a standard paternity lawsuit
if FHp==1 && ~isequal(M,zeros(size(M)))
% check for possible child-mother swapping.
PICC=CODISCHECK(handles,M,F,HM,HF,1);
[Y,I]=max(PICC);
if Y>1 && I==1
stringa=sprintf('Mother and Proband could be related as Parent-Child (%0.2f%%).\n Eventually, check for Mother-Child swapped samples.\nDo you want to swap Mother and Child?',(1/(1+1/Y))*100);
ButtonName = questdlg(stringa, 'Mother and Proband kinship', 'Yes', 'No', 'No');
if strcmp(ButtonName,'Yes')
Mtmp=M; M=C; C=Mtmp;
HMtmp=HM; HM=HC; HC=HMtmp;
end
end
%Check if "mater certa est"
NMflag=0;
PICC=CODISCHECK(handles,C,M,HC,HM,0); Y=(1/(1+1/PICC));
if Y<0.999
stringa=sprintf('Mother and Child could not be related as Parent-Child\nProbability of maternity: %0.2f%%.\nDo you want to exclude Mother from computation?',Y*100);
ButtonName = questdlg(stringa, 'Mother and Child kinship', 'Yes', 'No', 'No');
if strcmp(ButtonName,'Yes')
NMflag=1; matM=zeros(size(matM));
end
end
else
NMflag=0;
end
for I=1:18
% Check for errors: the child's and the alleged father's loci are required
if (all(C(I,:)) && all(F(I,:)))
eval(sprintf('loci{I}=get(handles.text%i,''String'');',I))
TF=sum(strcmp(loci{I},CODISpop(:,7)).*(1:1:NL)');
%set the frequencies of the STRs
x=CODISpop{TF,1}; y=CODISpop{TF,2};
%Set the mutation rate and the STRs label array
Fmut=CODISpop{TF,3}; Mmut=CODISpop{TF,4}; Z=CODISpop{TF,5};
if isequal(M(I,:),[0 0]) || NMflag %if the mother is absent or she is not the mother...
PI(I)=CODIS2(KC,x,y,Z); %...switch on CODIS2
else %if the mother is present...
%if there isn't a match between the mother and the child (a very rare event)
if sum(ismember(M(I,:),C(I,:)))==0
matM(I)=0; PI(I)=CODIS2(KC,x,y,Z); %switch off the flag and switch on CODIS2
else %if there is the match between mother and child...
%tidy up the loci considering only the mother and the child
%Check if M=PQ & C=PQ
eqflag=(~HM(I) && ~HC(I) && isequal(M(I,:),C(I,:)));
if HC(I) %the child's locus is homozygous
%the mother's locus is heterozygous and it needs to be swapped
if ~HM(I) && M(I,1)==C(I,1) %M=PQ C=PP
CODISswap(1,0,0) %M=QP C=PP
end
else %the child's locus is heterozygous
if HM(I) %the mother's locus is homozygous
%and the child's locus needs to be swapped
if C(I,2)==M(I,2) %M=PP C=QP
CODISswap(0,1,0) %M=PP C=PQ
end
else %the mother's locus is heterozygous
if ~eqflag %if we are not in the situation M=PQ C=PQ
if M(I,1)==C(I,1)
CODISswap(1,0,0) %swap only the mother's locus
elseif M(I,1)==C(I,2)
CODISswap(1,1,0) %swap both loci
elseif M(I,2)==C(I,2)
CODISswap(0,1,0) %swap only the child's locus
end
end
end
end
%... update the STR frequencies and compute the Matching Probability
z=round(x.*y);
z(M(I,:)-1)=z(M(I,:)-1)+1+HM(I);
z(C(I,2)-1)=z(C(I,2)-1)+1;
y=y+3;
x=z./y;
%compute the Random Man Not Excluded percentage (RMNE)
%The RMNE is an estimation of the percentage of random men that can be
%compatible with paternity. Pratically, if the child's locus is PP a man that
%brings the P allele cannot be excluded as the father of the child (RM=PP or
%RM=PX). This index only depends by the mother's and child's loci configuration:
%it depends by the allele not inherited from the mother.
%RMNE=1-overall exclusion probability
%EP(I)=1-(1-Freq Allele1-freq Allele 2)^2 if eqflag=1 (M=PQ C=PQ)
%EP(I)=1-(1-Freq Allele2)^2 in all the other cases
al=sum(x(C(I,:)-1).*[eqflag 1]);
EP(I)=(1-al)^2;
%Now check if there is a match between C(I,2) and alleged
%father's locus and compute the Paternity Index (PI)
%check if M=PQ C=PQ F=PQ
Amb(I)=(~HM(I) && ~HC(I) && ~HF(I)) && isequal(M(I,:),C(I,:),F(I,:));
%Now we can calculate a temporary Paternity Index (PI).
%The PI is a likelihood ratio. It mesures the ratio between two probabilities:
%PI=P(C|M,F)/P(C|M,RM)
%P(C|M,F) is the probability to observe the child's genotype given the mother
%and the alleged father's genotype;
%P(C|M, RM) is the probability to observe the child's genotype given the mother
%and a random man (in this case, we consider all the compatible genotypes)
%If there is a match between the child's and the alleged father's loci the PI
%can be calculated in this way:
%C=PP & F=PP => PI=1/p (p is the frequency of the P allele)
%C=PP & F=PX => PI=1/2p
%M=PP|PX & C=PQ & F=QQ =>PI=1/q (q is the frequency of the Q allele)
%M=PP|PX & C=PQ & F=QX =>PI=1/2q
%M=PQ C=PQ & F=PP|QQ|PQ => PI=1/(p+q)
%M=PQ C=PQ & F=PX|QX => PI=1/(2(p+q))
%Also in this case, we can use a trick to use one formula for all occasion.
%PI=(1+Amb(I))/((2-HF(I))*al);
%In fact, the PI depends:
% 1) by the frequency of transmitted allele. If M=PQ and C=PQ we don't know
% which allele is inherited from which and so we use both frequencies. This was
% just computed for the RMNE index and is the quantity al;
% 2) by the alleged father's locus: if it is heterozygous there is a factor 2.
% This is computed by (2-HF(I))
% 3) When M=C=F=PQ PI=1/(p+q) and not PI=1/(2*(p+q)). So, this is corrected
% using (1+Amb(I))
%The overall PI = prod(PI(I))
%If there is not a match, storically PI(I)=0. With this assumption, even if
%there is only 1 mismatch, the overall PI will be 0. The mismatch could be the
%result of a mutational event: in this view, the PI must be corrected for the
%probability to observe a mutation.
%I have adopted the model described by Brenner.
%P(genotypes | true trio) is proportional to
%P(Q'R man transmits Q) = P(Q' is transmitted) x P(mutation) x P(mutation
%increases length) x P(s steps) = (1/2) ยต (1/2) (1/10)^s-1, where s =|Q'-Q|.
%Anyway, there are some particular cases in which it is possible to hypotize
%also a mutational event in the mother: in this cases P(C|M,F) and P(C|M,RM)
%must be modified both.
PItmp=(1+Amb(I))/((2-HF(I))*al); %compute the possible PI
if ~eqflag %if we are not in the case M=PQ C=PQ...
%.. the alleged father can transmit only the C(I,2) allele
if ismember(C(I,2),F(I,:)) %there is a match between child's and alleged father's loci
%The PI doesn't need correction and the match-flag is
%switched on
if KC==0.5
PI(I)=PItmp;
else
PI(I)=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
end
matF(I)=1;
%check if a swap of the alleged father's locus is needed
if C(I,2)==F(I,2)
CODISswap(0,0,1)
end
else %there isn't a match between child's and alleged father's loci
if KC==0.5
if HC(I) %the child's locus is homozygous
c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1); %c is always computed
d=(1-HF(I))*(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1); %d is only computed when the father is heterozygous
PI(I)=PItmp*Fmut*(c+d);
else %the child's locus is heterozygous
%check if could be a possible match between child's allele 1 and the alleged father's alleles
pmatch=ismember(F(I,:),C(I,1));
if any(pmatch) %mutation in the mother? Consider it!
pmatF(I)=1; %swith on the possible match flag
%check is a swap is needed
if ~HF(I) && find(pmatch)==2
CODISswap(0,0,1)
end
c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1); %c is always computed
if HM(I) %the mother is homozygous
d=(1-HF(I))*(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1); %d is only computed when the father is heterozygous
D=1/c+Mmut*(x(C(I,1)-1)/x(C(I,2)-1));
N=Mmut+Fmut*(1+(1-HF(I))*(d/c));
PI(I)=PItmp*N/D;
else %the mother is heterozygous
d=(1/2)*(1/10)^(abs(Z(M(I,1)-1)-Z(C(I,2)-1))-1); %in this case, d is always computed
if HF(I) %the alleged father is homozygous
D=1/c+(x(C(I,1)-1)/x(C(I,2)-1))*Mmut*(1+d/c);
N=Fmut+Mmut*(1+d/c);
PI(I)=PItmp*N/D;
else %the alleged father is heterozygous
e=(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1);
N=Fmut*(c+e)+Mmut*(c+d);
D=1+(x(C(I,1)-1)/x(C(I,2)-1))*Mmut*(c+d);
PI(I)=PItmp*N/D;
end
end
else %there isn't any match between child and alleged father
c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1); %c is always computed
d=(1-HF(I))*(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1); %d is only computed when the father is heterozygous
PI(I)=PItmp*Fmut*(c+d);
end
end
else
PI(I)=(1-2*KC);
end
end
else %M=PQ C=PQ
if Amb(I) %M=PQ C=PQ F=PQ
if KC==0.5
PI(I)=PItmp;
else
PI(I)=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
end
matF(I)=1;
else %M=PQ C=PQ F=...
switch HF(I) %check the alleged father's locus
case 0 %The alleged father's locus is heterozygous
if any(ismember(C(I,:),F(I,:))) %there is a match between child's and alleged father's loci
if KC==0.5
PI(I)=PItmp;
else
PI(I)=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
end
matF(I)=1;
%check if a swap is needed
if C(I,1)==F(I,1)
CODISswap(0,1,0)
elseif C(I,1)==F(I,2)
CODISswap(0,1,1)
elseif C(I,2)==F(I,1)
CODISswap(1,0,0)
elseif C(I,2)==F(I,2)
CODISswap(1,0,1)
end
else %there isn't a match between child's and alleged father's loci
if KC==0.5
c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,1)-1))-1);
d=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1);
e=(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,1)-1))-1);
f=(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1);
PI(I)=PItmp*Fmut*(c+d+e+f);
Amb(I)=1;
else
PI(I)=(1-2*KC);
end
end
case 1 %The alleged father's locus is homozygous
if any(ismember(C(I,:),F(I,1))) %there is a match between child's and alleged father's loci
if KC==0.5
PI(I)=PItmp;
else
PI(I)=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
end
matF(I)=1;
%check if a swap is needed
if C(I,1)==F(I,1)
CODISswap(0,1,0) %swap the child's locus
else
CODISswap(1,0,0) %swap the mother's locus
end
else %there isn't a match between child's and alleged father's loci
if KC==0.5
c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,1)-1))-1);
d=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1);
PI(I)=PItmp*Fmut*(c+d);
Amb(I)=1;
else
PI(I)=(1-2*FHp);
end
end
end
end
end
end
end
%The Matching probability is the probability that 2 persons share the same
%genotype. For each locus, the MP(I) is p^2 if the locus is homozygous or 2pq if
%the locus is heterozygous. The overall MP = prod(MP(I)).
%It is necessary to update the alleles frequecies because it is possible that
%the alleged father bring an allele that was not previously observed in the
%selected population: in this case all the indices (MP, RMNE, PI) will be 0.
%Under the condition that mother and alleged father are unrelated, we can add
%to the dataset all the 4 observed alleles
z=round(x.*y);
if matF(I)==1
if Amb(I)==1
z(C(I,1)-1)=z(C(I,1)-1)+1;
else
z(F(I,2)-1)=z(F(I,2)-1)+1;
end
y=y+1;
else
z(F(I,:)-1)=z(F(I,:)-1)+1+HF(I);
y=y+2;
end
x=z./y;
%There is a MatLab trick to use one formula for both cases:
%MP(I)=(2-HF(I))*prod(x(F(I,:)-1));
%if the locus homozygous:
% HF(I)=1; 2-HF(I)=1; F(I,1)==F(I,2) => prod(x(F(I,:)-1))=pp=p^2 => MP=p^2
%if the locus heterozygous:
% HF(I)=0; 2-HF(I)=2; F(I,1)~=F(I,2) => prod(x(F(I,:)-1))=pq => MP=2pq
MP(I)=(2-HF(I))*prod(x(F(I,:)-1));
end
end
%OEP is the overall exclusion probability
OEP=1-prod(EP(EP~=0));
RMNE=1-OEP;