% SEQUENOM2PEDFILE M-file for Sequenom2pedfile.fig
% This script is useful for generating files in linkage format (pedfile and
% mapfile) from the outputs generated from Sequenom MassArray runs
% Authorship: Ismael Huertas email: ihuertas-ibis@us.es
% Institution: Lab of Movement Disorders, Institute of Biomedicine of Sevilla (IBiS) , Spain.
function varargout = Sequenom2pedfile(varargin)
gui_Singleton = 1;
gui_State = struct('gui_Name', mfilename, ...
'gui_Singleton', gui_Singleton, ...
'gui_OpeningFcn', @Sequenom2pedfile_OpeningFcn, ...
'gui_OutputFcn', @Sequenom2pedfile_OutputFcn, ...
'gui_LayoutFcn', [] , ...
'gui_Callback', []);
if nargin && ischar(varargin{1})
gui_State.gui_Callback = str2func(varargin{1});
end
if nargout
[varargout{1:nargout}] = gui_mainfcn(gui_State, varargin{:});
else
gui_mainfcn(gui_State, varargin{:});
end
% End initialization code - DO NOT EDIT
% --- Executes just before Sequenom2pedfile is made visible.
function Sequenom2pedfile_OpeningFcn(hObject, eventdata, handles, varargin)
%%%%%%%%%%%%%%%%% GLOBAL VARIABLES %%%%%%%%%%%%%%%%%%%
handles.gtable={}; % Matrix for storing all the info
handles.setgenes={}; % List of genes
handles.listsnp=[]; % List of SNPs
handles.chr=[]; % Chromosome of each SNP
handles.position=[]; % Position of each SNP
handles.sample=[]; % Sample ID
handles.snp=[]; % SNP ID
handles.sex=[]; % sex of sample
handles.pheno=[]; % trait value of sample
handles.gene=[]; % Gene of each SNP
handles.dupmenu=1; % Aux variable for repeated samples option
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Choose default command line output for Sequenom2pedfile
handles.output = hObject;
% Update handles structure
guidata(hObject, handles);
% UIWAIT makes Sequenom2pedfile wait for user response (see UIRESUME)
% uiwait(handles.figure1);
% --- Outputs from this function are returned to the command line.
function varargout = Sequenom2pedfile_OutputFcn(hObject, eventdata, handles)
varargout{1} = handles.output;
% It allows the user to select the genes appearing in the output files
function list_of_genes_Callback(hObject, eventdata, handles)
listsnp=[]; % stores the SNPs belonging to these genes
setgenes=handles.setgenes;
s=get(handles.list_of_genes,'Value');
for m=1:length(s)
listsnp=[listsnp cell2mat(setgenes(2,s(m)))'];
end
handles.listsnp=listsnp;
guidata(hObject, handles);
function list_of_genes_CreateFcn(hObject, eventdata, handles)
if ispc && isequal(get(hObject,'BackgroundColor'), get(0,'defaultUicontrolBackgroundColor'))
set(hObject,'BackgroundColor','white');
end
% It generates a textfile per gene with the genotyping info
function generate_info_Callback(hObject, eventdata, handles)
% Retrieve global variables
setgenes=handles.setgenes;
gene=handles.gene;
snp=handles.snp;
sample=handles.sample;
gtable=handles.gtable;
% Identify the SNPs of each gene and generate a separated textfile
for k=1:length(setgenes)
fid = fopen(sprintf('%s.txt',char(setgenes(1,k))), 'w');
index=cell2mat(setgenes(2,k));
for p=1:length(sample)
fprintf(fid, '%s ',char(sample(p)));
for q=index(1):index(end)
if(isempty(cell2mat(gtable(p+1,q+1))))
fprintf(fid, '%d ',0);
else
fprintf(fid, '%s ',char(gtable(p+1,q+1)));
end
end
fprintf(fid, '\r\n');
end
fclose(fid);
end
xlswrite('GENOTYPES.xls',gtable);
guidata(hObject, handles);
% It generates the pedfile
function generate_pedfile_Callback(hObject, eventdata, handles)
% Retrieve global variables
snp=handles.snp;
sex=handles.sex;
pheno=handles.pheno;
gtable=handles.gtable;
sample=handles.sample;
listsnp=handles.listsnp;
fid = fopen('pedfile.ped', 'w');
if(find(hist(str2num(char(pheno))))>2)
typetrait=1;
else
typetrait=0;
end
for p=1:length(sample)
fprintf(fid, '%d ',p);
fprintf(fid, '%s %d %d ',char(sample(p)),0,0);
if(~isnan(char(sex(p,1))))
fprintf(fid, '%c ',char(sex(p)));
else
fprintf(fid, '%d ',0);
end
if(~isnan(char(pheno(p))))
fprintf(fid, '%s ',char(pheno(p)));
else
switch(typetrait)
case 0
fprintf(fid, '%d ',0);
case 1
fprintf(fid, '%d ',-9);
end
end
for k=1:length(listsnp)
q=listsnp(k);
if(isempty(cell2mat(gtable(p+1,q+1))))
fprintf(fid, '%d %d ',0,0);
else
str=regexp(char(gtable(p+1,q+1)),'.','match');
fprintf(fid, '%c %c ',str{1},str{2});
end
end
fprintf(fid, '\r\n');
end
fclose(fid);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% It reorganizes the genotype table according to repeated samples across the
% plates. These repetitions allows the control of the concordance.
function load_duplicates_Callback(hObject, eventdata, handles)
% Retrieve global variables
snp=handles.snp;
sex=handles.sex;
age=handles.age;
pheno=handles.pheno;
sample=handles.sample;
gtable=handles.gtable;
% Read textfile containing repeated samples
[txtc pathc]=uigetfile('*.txt','Select textfile with duplicated samples');
fid=fopen(char(strcat(pathc,txtc)));
set(handles.duplicates_pathname,'String', char(strcat(pathc,txtc)));
dup = textscan(fid,'%s%s');
% Load columns to variables
subid = dup{1}(2:end);
samid = dup{2}(2:end);
samex=[];d=1;i = 1;
num_c=zeros(1,length(snp));den_c=zeros(1,length(snp));
% This loop compares the genotypes of the repeated samples for each SNP,
% calculates the concordance rate, and leaves only one entry per sample with
% the most frequent genotype at each SNP
while(i<length(subid))
sdup={};
numgn=[];
ind = strmatch(subid(i),subid);
for k=1:length(ind)
samidx=strmatch(samid(ind(k),:),sample,'exact');
sdup(k,:)=gtable(samidx+1,:);
if(k<length(ind))
samex=[samex samidx+1];
end
end
lista{1,d}=sdup;
strgen(1,1)=sdup(k,1);
for k=2:size(sdup,2)
gs=sdup(:,k);
for q=1:length(gs)
numgn(q)=sum(cell2mat(regexpi(gs, gs(q))));
end
indx= find(numgn==max(numgn));
if (max(numgn)~=0)
num_c(1,k-1)=num_c(1,k-1)+max(numgn)-1;
den_c(1,k-1)=den_c(1,k-1)+length(find(numgn))-1;
end
if(length(indx)==max(numgn))
strgen(1,k)=gs(indx(1));
else
strgen(1,k)={''};
end
end
gtable(samidx+1,:)=strgen;
i=ind(end)+1;
end
% Update gtable matrix dropping out the repetitions
auxvec = 1:size(gtable,1);
auxvec(1,[samex])=0;
ac_indx=find(auxvec);
handles.gtable=gtable(ac_indx,:);
ac_indx=ac_indx-1;
handles.age=age(ac_indx(2:end),1);
handles.sex=sex(ac_indx(2:end),1);
handles.pheno=pheno(ac_indx(2:end),1);
handles.sample=sample(ac_indx(2:end),1);
% Generates Descriptives textfile of the studied sample
fid = fopen('descriptives.txt', 'w');
for j=1:length(handles.sample)
fprintf(fid, '%s %s %s %s \r\n',char(handles.sample(j)),char(handles.sex(j)),char(handles.age(j)),char(handles.pheno(j)));
end
fclose(fid);
% Generates SNP concordance rate textfile
conc = 100*(num_c./den_c)';
fid = fopen('concordance.txt','wt');
fprintf(fid,'%s %s\n','SNP','Concordance');
for i=1:length(snp)
fprintf(fid,'%s\t',char(snp(i)));
fprintf(fid,'%2.2f%%\n',conc(i));
end
fclose(fid);
guidata(hObject, handles);
function list_GenotypeArea_CreateFcn(hObject, eventdata, handles)
if ispc && isequal(get(hObject,'BackgroundColor'), get(0,'defaultUicontrolBackgroundColor'))
set(hObject,'BackgroundColor','white');
end
% It loads the genotype info
function load_genotype_data_Callback(hObject, eventdata, handles)
% Load excel files from Sequenom runs
[xlsq path]=uigetfile('*.xls','Select GenotypeArea.xls files output from Sequenom','MultiSelect','on');
set(handles.list_GenotypeArea,'String',xlsq);
if(iscell(xlsq))
co=length(xlsq);
else
co=1;
end
% Retrieve needed global variables
sample=handles.sample;
snp=handles.snp;
gtable=handles.gtable;
% gtable will be a table containing all the genotyping info
% Its first row and column are the SNP and sample tags respectively
gtable = cell(length(sample)+1,length(snp)+1);
gtable(1,2:end) = snp;
gtable(2:end,1) = sample;
% Loop reading each GenotypeArea.xls file
for is=1:co
% Distinguish multiple choice and single choice
if(iscell(xlsq))
sq=importdata(char(strcat(path,xlsq{is})));
else
sq=importdata(char(strcat(path,xlsq)));
end
% Load in variables the matrix from Genotypes' sheet
samplesq=sq.textdata.Genotypes(2:end,1);
positionsq=sq.textdata.Genotypes(2:end,2);
namesnp=sq.textdata.Genotypes(1,3:end);
genotype=sq.textdata.Genotypes(2:end,3:end);
% Run the loaded matrix and fill in the gtable matrix
for k=1:size(samplesq,1)
samidx = strmatch(samplesq(k),sample,'exact');
if(~isempty(samidx))
for h=1:size(namesnp,2)
snpidx = strmatch(namesnp(h),snp,'exact');
if(~isempty(snpidx))
if(~isempty(cell2mat(genotype(k,h)))& isempty(cell2mat(gtable(samidx+1,snpidx+1))))
if(length(genotype{k,h})==1)
gtable(samidx+1,snpidx+1)=strcat(genotype(k,h),genotype(k,h));
else
gtable(samidx+1,snpidx+1)=genotype(k,h);
end
end
end
end
end
end
end
%Update Global variables
handles.gtable=gtable;
guidata(hObject, handles);
% It reads the samplefile. This file contains information about the samples
% that have been genotyped.
function load_samples_data_Callback(hObject, eventdata, handles)
[txts paths]=uigetfile('*.txt','Select samplefile');
fids=fopen(char(strcat(paths,txts)));
set(handles.samplefile_pathname,'String', char(strcat(paths,txts)));
sam = textscan(fids,'%s%s%s%s');
sample = sam{1}(2:end);
sex = sam{2}(2:end);
age = sam{3}(2:end);
pheno = sam{4}(2:end);
%Update Global variables
handles.sample=sample;
handles.sex=sex;
handles.pheno=pheno;
handles.age=age;
guidata(hObject, handles);
% It reads the mapfile. This file contains information about the SNPs that
% are going to be read and reported in the pedfile.
function load_mapfile_Callback(hObject, eventdata, handles)
% Load mapile
[txtm pathm]=uigetfile('*.txt','Select mapfile');
fidm=fopen(char(strcat(pathm,txtm)));
set(handles.mapfile_pathname,'String', char(strcat(pathm,txtm)));
% Read mapfile, it assigns each column to a global variable
map = textscan(fidm,'%s%s%s%s');
chr = map{1}(2:end);
snp = map{2}(2:end);
gene = map{3}(2:end);
position = map{4}(2:end);
% Find out what genes are present in the files
setgenes=gene(1);
for k=2:size(gene,1)
if(isempty(strmatch(gene(k),setgenes,'exact')))
setgenes(end+1)=gene(k);
end
end
% Identify which SNPs belong to each gene
for k=1:length(setgenes)
matches = regexp(char(setgenes(1,k)),gene);
index=find(~cellfun(@isempty,matches));
setgenes{2,k}=index;
end
cc=setgenes(1,:);
set(handles.list_of_genes,'String',cc);
%Update global variables
handles.chr=chr;
handles.snp=snp;
handles.gene=gene;
handles.position=position;
handles.setgenes=setgenes;
guidata(hObject, handles);
% It generates the mapfile
function gen_mapfile_Callback(hObject, eventdata, handles)
% Retrieve global variables
chr = handles.chr;
position = handles.position;
snp = handles.snp;
listsnp = handles.listsnp;
fid = fopen('mapfile.map','w')
for k=1:length(listsnp)
q=listsnp(k);
fprintf(fid, '%s %s %d %s\r\n',char(chr(q)),char(snp(q)),0, char(position(q)) );
end
fclose(fid);