Code covered by the BSD License  

Highlights from
CODIS

image thumbnail

CODIS

by

 

14 May 2009 (Updated )

Combined DNA Index System: a GUI tool for forensic paternity lawsuit

PI=CODIS3(handles,KC)
function PI=CODIS3(handles,KC) 
%CODIS3 is a CODIS ancillary routine.
%This function is recalled by CODIS to compute the Paternity Index, the 
%Matching Probability and the Random Man Not Excluded probability when a trio is
%considered (mother's, child's and alleged father's STRs are available)
%
% Syntax: 	CODIS3
%      
%     Inputs:
%           all needed variables are global, so CODIS3 directly access to the
%           RAM
%     Outputs:
%           - ordered loci, PI, MP, RMNE and all needed flags
%
%      Example: 
%           run CODIS and select the DEMO menu->Trio attribution or Trio exclusion
%
%           Created by Giuseppe Cardillo
%           giuseppe.cardillo-edta@poste.it

%each variable is inheredited by CODIS
global I M C F HM HC HF MP RMNE matF matM Amb pmatF CODISpop Fmut Mmut FHp EP loci NL
%I is the for...end variable that can be passed to CODIS2
%M, C and F are the matrices of mother's, child's and alleged father's STRs data;
%HM, HC and HF are the flag matrices of homozygosity (HM - mother; HC - child; 
%HF - alleged father) HX(I) is 1 if the I-th locus is homozygous, 0 otherwise
%MP, RMNE, PI are the matrices of the probabilities: Matching probability,
%Random Man Not Excluded proportion and Paternity Index;
%matM matF are the match-flag matrices: matF(I)=1 if there is a match between child's
%and alleged father's locus, 0 otherwise; the same for matM(I) for the mother
%match;
%Amb is the ambigous match-flag matrix: if M=PQ C=PQ F=PQ there is a match, but
%it is impossible to extablish which allele is inheredited allele from the 
%mother and which from the father, so (Amb(I)=1);
%pmatF: there are some particular cases in which there is a probable match: i.e.
%M=XP C=PQ F=PP. In this case, the alleged father doesn't bring the Q allele so
%there is a mismatch; anyway it is possible the alleged father gave the P allele
%and the mother gave X or P allele and it was mutated in Q.
%for the I-th STR and selected population dataset (CODISpop matrix):
%x is the alleles frequencies array. Remember that in the CODIS GUI you can
%select the 0 allele (when the STR was not tested); so if you selected the J-th
%allele from the popupmenu, the correct frequency will be J-1.
%y is the number of the observed alleles;
%Fmut and Mmut are the paternal and maternal mutation rates;
%Z is the alleles size array.
%KC Kinship coefficient

%sort M C and F vectors
M=sort(M,2); C=sort(C,2); F=sort(F,2);
%Highlight homozigosity
HM=M(:,1)==M(:,2); HC=C(:,1)==C(:,2); HF=F(:,1)==F(:,2);
%Set the match-flag vectors:
%starting from the latin assumption "Mater semper certa est, pater numquam"
%(there's no doubt about the mother, but the father), the matM array is all 
%switched on, the matF is all switched off.
matM=ones(18,1); matF=zeros(18,1); pmatF=matF; Amb=matF;
%set the Paternity Index, Probability of exclusion and Matching Probability arrays
PI=matF; MP=matF; EP=matM;

%if there is the mother and we have a standard paternity lawsuit
if FHp==1 && ~isequal(M,zeros(size(M)))
    % check for possible child-mother swapping.
    PICC=CODISCHECK(handles,M,F,HM,HF,1);
    [Y,I]=max(PICC);
    if Y>1 && I==1
        stringa=sprintf('Mother and Proband could be related as Parent-Child (%0.2f%%).\n Eventually, check for Mother-Child swapped samples.\nDo you want to swap Mother and Child?',(1/(1+1/Y))*100);
        ButtonName = questdlg(stringa, 'Mother and Proband kinship', 'Yes', 'No', 'No');
        if strcmp(ButtonName,'Yes')
            Mtmp=M; M=C; C=Mtmp;
            HMtmp=HM; HM=HC; HC=HMtmp;            
        end 
    end
    %Check if "mater certa est"
    NMflag=0;
    PICC=CODISCHECK(handles,C,M,HC,HM,0); Y=(1/(1+1/PICC));
    if Y<0.999
        stringa=sprintf('Mother and Child could not be related as Parent-Child\nProbability of maternity: %0.2f%%.\nDo you want to exclude Mother from computation?',Y*100);        
        ButtonName = questdlg(stringa, 'Mother and Child kinship', 'Yes', 'No', 'No');
        if strcmp(ButtonName,'Yes')
            NMflag=1; matM=zeros(size(matM));
        end
    end
else
    NMflag=0;
end

for I=1:18
    % Check for errors: the child's and the alleged father's loci are required
    if (all(C(I,:)) && all(F(I,:))) 
        eval(sprintf('loci{I}=get(handles.text%i,''String'');',I))
        TF=sum(strcmp(loci{I},CODISpop(:,7)).*(1:1:NL)');
        %set the frequencies of the STRs
        x=CODISpop{TF,1}; y=CODISpop{TF,2};
        %Set the mutation rate and the STRs label array
        Fmut=CODISpop{TF,3}; Mmut=CODISpop{TF,4}; Z=CODISpop{TF,5};
        if isequal(M(I,:),[0 0]) || NMflag %if the mother is absent or she is not the mother...
            PI(I)=CODIS2(KC,x,y,Z); %...switch on CODIS2
        else %if the mother is present...
        %if there isn't a match between the mother and the child (a very rare event)
            if sum(ismember(M(I,:),C(I,:)))==0 
                matM(I)=0; PI(I)=CODIS2(KC,x,y,Z); %switch off the flag and switch on CODIS2
            else %if there is the match between mother and child...
                %tidy up the loci considering only the mother and the child              
                %Check if M=PQ & C=PQ
                eqflag=(~HM(I) && ~HC(I) && isequal(M(I,:),C(I,:)));
                if HC(I) %the child's locus is homozygous
                    %the mother's locus is heterozygous and it needs to be swapped
                    if ~HM(I) && M(I,1)==C(I,1) %M=PQ C=PP
                        CODISswap(1,0,0) %M=QP C=PP
                    end
                else %the child's locus is heterozygous
                    if HM(I) %the mother's locus is homozygous
                        %and the child's locus needs to be swapped
                        if C(I,2)==M(I,2) %M=PP C=QP
                            CODISswap(0,1,0) %M=PP C=PQ
                        end
                    else %the mother's locus is heterozygous
                        if ~eqflag %if we are not in the situation M=PQ C=PQ
                            if M(I,1)==C(I,1)
                                CODISswap(1,0,0) %swap only the mother's locus
                            elseif M(I,1)==C(I,2)
                                CODISswap(1,1,0) %swap both loci
                            elseif M(I,2)==C(I,2) 
                                CODISswap(0,1,0) %swap only the child's locus
                            end
                        end
                    end
                end
%... update the STR frequencies and compute the Matching Probability
                z=round(x.*y); 
                z(M(I,:)-1)=z(M(I,:)-1)+1+HM(I); 
                z(C(I,2)-1)=z(C(I,2)-1)+1;
                y=y+3;
                x=z./y;
%compute the Random Man Not Excluded percentage (RMNE)
%The RMNE is an estimation of the percentage of random men that can be
%compatible with paternity. Pratically, if the child's locus is PP a man that
%brings the P allele cannot be excluded as the father of the child (RM=PP or
%RM=PX). This index only depends by the mother's and child's loci configuration:
%it depends by the allele not inherited from the mother.
%RMNE=1-overall exclusion probability
%EP(I)=1-(1-Freq Allele1-freq Allele 2)^2 if eqflag=1 (M=PQ C=PQ)
%EP(I)=1-(1-Freq Allele2)^2 in all the other cases
                al=sum(x(C(I,:)-1).*[eqflag 1]);
                EP(I)=(1-al)^2;

                %Now check if there is a match between C(I,2) and alleged
                %father's locus and compute the Paternity Index (PI)
                %check if M=PQ C=PQ F=PQ 
                Amb(I)=(~HM(I) && ~HC(I) && ~HF(I)) && isequal(M(I,:),C(I,:),F(I,:));
                
%Now we can calculate a temporary Paternity Index (PI).
%The PI is a likelihood ratio. It mesures the ratio between two probabilities:
%PI=P(C|M,F)/P(C|M,RM)
%P(C|M,F) is the probability to observe the child's genotype given the mother
%and the alleged father's genotype;
%P(C|M, RM) is the probability to observe the child's genotype given the mother
%and a random man (in this case, we consider all the compatible genotypes)

%If there is a match between the child's and the alleged father's loci the PI
%can be calculated in this way:
%C=PP & F=PP => PI=1/p (p is the frequency of the P allele)
%C=PP & F=PX => PI=1/2p
%M=PP|PX & C=PQ & F=QQ =>PI=1/q (q is the frequency of the Q allele)
%M=PP|PX & C=PQ & F=QX =>PI=1/2q 
%M=PQ C=PQ & F=PP|QQ|PQ => PI=1/(p+q)
%M=PQ C=PQ & F=PX|QX => PI=1/(2(p+q))
%Also in this case, we can use a trick to use one formula for all occasion.
%PI=(1+Amb(I))/((2-HF(I))*al);
%In fact, the PI depends:
% 1) by the frequency of transmitted allele. If M=PQ and C=PQ we don't know
% which allele is inherited from which and so we use both frequencies. This was
% just computed for the RMNE index and is the quantity al;
% 2) by the alleged father's locus: if it is heterozygous there is a factor 2.
%    This is computed by (2-HF(I))
% 3) When M=C=F=PQ PI=1/(p+q) and not PI=1/(2*(p+q)). So, this is corrected
%    using (1+Amb(I))
%The overall PI = prod(PI(I))
%If there is not a match, storically PI(I)=0. With this assumption, even if
%there is only 1 mismatch, the overall PI will be 0. The mismatch could be the
%result of a mutational event: in this view, the PI must be corrected for the
%probability to observe a mutation.
%I have adopted the model described by Brenner. 
%P(genotypes | true trio) is proportional to
%P(Q'R man transmits Q) = P(Q' is transmitted) x P(mutation) x P(mutation
%increases length) x P(s steps) = (1/2) ยต (1/2) (1/10)^s-1, where s =|Q'-Q|.
%Anyway, there are some particular cases in which it is possible to hypotize
%also a mutational event in the mother: in this cases P(C|M,F) and P(C|M,RM)
%must be modified both.
                
                PItmp=(1+Amb(I))/((2-HF(I))*al); %compute the possible PI
                if ~eqflag %if we are not in the case M=PQ C=PQ...
                    %.. the alleged father can transmit only the C(I,2) allele
                    if ismember(C(I,2),F(I,:)) %there is a match between child's and alleged father's loci
                        %The PI doesn't need correction and the match-flag is
                        %switched on
                        if KC==0.5
                            PI(I)=PItmp;
                        else
                            PI(I)=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
                        end
                        matF(I)=1;
                        %check if a swap of the alleged father's locus is needed
                        if C(I,2)==F(I,2)
                            CODISswap(0,0,1)
                        end
                    else  %there isn't a match between child's and alleged father's loci
                        if KC==0.5
                            if HC(I) %the child's locus is homozygous
                                c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1); %c is always computed
                                d=(1-HF(I))*(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1); %d is only computed when the father is heterozygous
                                PI(I)=PItmp*Fmut*(c+d);
                            else %the child's locus is heterozygous
                                %check if could be a possible match between child's allele 1 and the alleged father's alleles
                                pmatch=ismember(F(I,:),C(I,1));
                                if any(pmatch) %mutation in the mother? Consider it!
                                    pmatF(I)=1; %swith on the possible match flag
                                    %check is a swap is needed
                                    if ~HF(I) && find(pmatch)==2
                                        CODISswap(0,0,1)
                                    end
                                    c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1); %c is always computed
                                    if HM(I) %the mother is homozygous
                                        d=(1-HF(I))*(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1); %d is only computed when the father is heterozygous
                                        D=1/c+Mmut*(x(C(I,1)-1)/x(C(I,2)-1));
                                        N=Mmut+Fmut*(1+(1-HF(I))*(d/c));
                                        PI(I)=PItmp*N/D;
                                    else %the mother is heterozygous
                                        d=(1/2)*(1/10)^(abs(Z(M(I,1)-1)-Z(C(I,2)-1))-1); %in this case, d is always computed
                                        if HF(I) %the alleged father is homozygous
                                            D=1/c+(x(C(I,1)-1)/x(C(I,2)-1))*Mmut*(1+d/c);
                                            N=Fmut+Mmut*(1+d/c);
                                            PI(I)=PItmp*N/D;
                                        else %the alleged father is heterozygous
                                            e=(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1);
                                            N=Fmut*(c+e)+Mmut*(c+d);
                                            D=1+(x(C(I,1)-1)/x(C(I,2)-1))*Mmut*(c+d);
                                            PI(I)=PItmp*N/D;
                                        end
                                    end
                                else %there isn't any match between child and alleged father
                                    c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1); %c is always computed
                                    d=(1-HF(I))*(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1); %d is only computed when the father is heterozygous
                                    PI(I)=PItmp*Fmut*(c+d);
                                end
                            end
                        else
                            PI(I)=(1-2*KC);
                        end
                    end
                else %M=PQ C=PQ
                    if Amb(I) %M=PQ C=PQ F=PQ
                        if KC==0.5
                            PI(I)=PItmp;
                        else
                            PI(I)=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
                        end
                        matF(I)=1;
                    else %M=PQ C=PQ F=...
                        switch HF(I) %check the alleged father's locus
                            case 0 %The alleged father's locus is heterozygous
                                if any(ismember(C(I,:),F(I,:))) %there is a match between child's and alleged father's loci
                                    if KC==0.5
                                        PI(I)=PItmp;
                                    else
                                        PI(I)=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
                                    end
                                    matF(I)=1;
                                    %check if a swap is needed
                                    if C(I,1)==F(I,1)
                                        CODISswap(0,1,0)
                                    elseif C(I,1)==F(I,2)
                                        CODISswap(0,1,1)
                                    elseif C(I,2)==F(I,1)
                                        CODISswap(1,0,0)
                                    elseif C(I,2)==F(I,2)
                                        CODISswap(1,0,1)
                                    end
                                else %there isn't a match between child's and alleged father's loci
                                    if KC==0.5
                                        c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,1)-1))-1);
                                        d=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1);
                                        e=(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,1)-1))-1);
                                        f=(1/2)*(1/10)^(abs(Z(F(I,2)-1)-Z(C(I,2)-1))-1);
                                        PI(I)=PItmp*Fmut*(c+d+e+f);
                                        Amb(I)=1;
                                    else
                                        PI(I)=(1-2*KC);
                                    end
                                end
                            case 1 %The alleged father's locus is homozygous
                                if any(ismember(C(I,:),F(I,1))) %there is a match between child's and alleged father's loci
                                    if KC==0.5
                                        PI(I)=PItmp;
                                    else
                                        PI(I)=PItmp*2*KC+(1-2*KC); %eventually transform PI in AI (Avuncular index)
                                    end
                                    matF(I)=1;
                                    %check if a swap is needed
                                    if C(I,1)==F(I,1)
                                        CODISswap(0,1,0) %swap the child's locus
                                    else
                                        CODISswap(1,0,0) %swap the mother's locus
                                    end
                                else %there isn't a match between child's and alleged father's loci
                                    if KC==0.5
                                        c=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,1)-1))-1);
                                        d=(1/2)*(1/10)^(abs(Z(F(I,1)-1)-Z(C(I,2)-1))-1);
                                        PI(I)=PItmp*Fmut*(c+d);
                                        Amb(I)=1;
                                    else
                                        PI(I)=(1-2*FHp);
                                    end
                                end
                        end
                    end
                end
            end
        end
        %The Matching probability is the probability that 2 persons share the same
        %genotype. For each locus, the MP(I) is p^2 if the locus is homozygous or 2pq if
        %the locus is heterozygous. The overall MP = prod(MP(I)).
        %It is necessary to update the alleles frequecies because it is possible that
        %the alleged father bring an allele that was not previously observed in the
        %selected population: in this case all the indices (MP, RMNE, PI) will be 0.
        %Under the condition that mother and alleged father are unrelated, we can add
        %to the dataset all the 4 observed alleles
        z=round(x.*y);
        if matF(I)==1
            if Amb(I)==1
                z(C(I,1)-1)=z(C(I,1)-1)+1;
            else
                z(F(I,2)-1)=z(F(I,2)-1)+1;
            end
            y=y+1;
        else
            z(F(I,:)-1)=z(F(I,:)-1)+1+HF(I);
            y=y+2;
        end
        x=z./y;
        %There is a MatLab trick to use one formula for both cases:
        %MP(I)=(2-HF(I))*prod(x(F(I,:)-1));
        %if the locus homozygous:
        %   HF(I)=1; 2-HF(I)=1; F(I,1)==F(I,2) => prod(x(F(I,:)-1))=pp=p^2 => MP=p^2
        %if the locus heterozygous:
        %   HF(I)=0; 2-HF(I)=2; F(I,1)~=F(I,2) => prod(x(F(I,:)-1))=pq => MP=2pq
        MP(I)=(2-HF(I))*prod(x(F(I,:)-1));
    end
end
%OEP is the overall exclusion probability
OEP=1-prod(EP(EP~=0));
RMNE=1-OEP;

Contact us