DNAmusic3 - The life music by Takahashi and Miller

by

 

18 Sep 2009 (Updated )

convert a mRNA and coded protein sequences into music

DNAmusic3(varargin)
function DNAmusic3(varargin)
%DNAMUSIC1 - The life music
%Audification (or sonification) is the technique of using the sense of hearing
%to analyse data. The advantage of audification over visualisation in data
%analysis is that sound has the property that when different notes are played
%togheter they can still be individually heard: in vision colours blend to form
%new colours. DNA and proteins maps naturally onto musical sequences. Several
%algorithms were proposed to translate DNA and proteins into music. This
%function use the algorithm proposed by Rie Takahashi and Jeffrey Miller
%(http://genomebiology.com/2007/8/5/405) 
%
% Syntax: 	DNAmusic1(mRNAid,wn)
%      
%     Inputs:
%           mRNAid - this is the id of the messanger that you want to translate
%                    deposited on the NCBI database
%                    (http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene).
%                    This is a string.
%                    (default = 'NM_005218'). 
%           wn - this is the duration (in second) of a whole note (default = 2).
%
%      Example: 
%
%      Calling on Matlab DNAmusic3
%
% Query the NCBI database to retrieve the NM_005218 mRNA
% Data retrieved
% Sonification process for Homo sapiens defensin, beta 1 (DEFB1), mRNA.
% Protein sonification
% 	Convert 68 amino acids into a notes array
% 	Done
% 	Convert notes array into frequencies array
% 		Convert 1st root
% 		Done
% 		Convert 3rd major
% 		Done
% 		Convert 5th perfect
% 		Done
% Mix and play
% to stop press ctrl-c
%  
% Of course longer is the mRNA and longer (very much longer) will be the time to
% convert it into music and much higher memory needed...
%
%           Created by Giuseppe Cardillo
%           giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:
% Cardillo G. (2009). DNAmusic3 - The life music by Takahashi and Miller
% http://www.mathworks.com/matlabcentral/fileexchange/...

%Input Error handling
args=cell(varargin);
nu=numel(args);
if nu>2
    error('DNAmusic3 accepts max 2 input arguments')
end
default.values = {'NM_005218';2};
default.values(1:nu) = args;
[mRNAid semibreve] = deal(default.values{:});
if nu>=1 && ~ischar(mRNAid)
    error('DNAmusic1 requires a string as mRNAid')
end
if nu==2 &&  ~isscalar(semibreve) || ~isfinite(semibreve) || ~isnumeric(semibreve)
    error('DNAmusic1 requires a scalar, numeric and finite WN value.')
end
clear args default nu varargin

%Retrieve informations
clc
try
    %try to see if a genbank file was saved on disk
    S=genbankread([mRNAid '.GBK']);
catch %else retrieve informations from the NCBI database
    disp(['Query the NCBI database to retrieve the ' mRNAid ' mRNA'])
    try
        S=getgenbank(mRNAid);
        disp('Data retrieved')
    catch ME
        disp(ME)
    end
end

%Keep only the useful informations
mRNAsequence=upper(S.Sequence); %mRNA sequence
mRNAcoding=S.CDS.indices; %the coding portion
protein=S.CDS.translation; %the protein sequence
disp(['Sonification process for ' S.Definition])
clear S

%codons frequencies are used to set duration of the notes
CDS=double(double(mRNAsequence((mRNAcoding(1):mRNAcoding(2))))); %convert in ASCII code
%create a Nx3 matrix: each row is a codon
cd=reshape(CDS,3,length(CDS)/3)';
%Load the Standard DNA code matrix
load DNAcode.mat code
%find the codon in the matrix...
[tf,codon]=ismember(cd,code(:,1:3),'rows');
%...and pick the codon frequency
cf=code(codon,5)';
cf(end)=[]; %delete the stop codon
d=zeros(size(cf)); L=d; %array preallocation
d(cf>0 & cf<11)=semibreve/8; L(cf>0 & cf<11)=length(0:1/8192:semibreve/8); 
d(cf>=11 & cf<21)=semibreve/4; L(cf>=11 & cf<21)=length(0:1/8192:semibreve/4);
d(cf>=21 & cf<30)=semibreve/2; L(cf>=21 & cf<30)=length(0:1/8192:semibreve/2);
d(cf>=30)=semibreve; L(cf>=30)=length(0:1/8192:semibreve);
clear mRNAsequence mRNAcoding cd CDS code cf semibreve codon tf

%Now convert the protein into notes using logical indexing
disp('Protein sonification')
fprintf('\tConvert %i amino acids into a notes array\n',length(protein))
lp=length(protein);
Proteinmusic=cell(lp,3);%Array preallocation
Proteinmusic(protein=='W',1)={'C3'};  Proteinmusic(protein=='W',2)={'E3'};  Proteinmusic(protein=='W',3)={'G3'};
Proteinmusic(protein=='M',1)={'D3'};  Proteinmusic(protein=='M',2)={'F#3'}; Proteinmusic(protein=='M',3)={'A3'};
Proteinmusic(protein=='P',1)={'E3'};  Proteinmusic(protein=='P',2)={'G#3'}; Proteinmusic(protein=='P',3)={'B3'};
Proteinmusic(protein=='H',1)={'F3'};  Proteinmusic(protein=='H',2)={'A3'};  Proteinmusic(protein=='H',3)={'C4'};
Proteinmusic(protein=='Y',1)={'G3'};  Proteinmusic(protein=='Y',2)={'B3'};  Proteinmusic(protein=='Y',3)={'D4'};
Proteinmusic(protein=='F',1)={'B3'};  Proteinmusic(protein=='F',2)={'D4'};  Proteinmusic(protein=='F',3)={'G4'};
Proteinmusic(protein=='L',1)={'A3'};  Proteinmusic(protein=='L',2)={'C#4'}; Proteinmusic(protein=='L',3)={'E4'};
Proteinmusic(protein=='I',1)={'C#4'}; Proteinmusic(protein=='I',2)={'E4'};  Proteinmusic(protein=='I',3)={'A4'}; 
Proteinmusic(protein=='V',1)={'B3'};  Proteinmusic(protein=='V',2)={'D#4'}; Proteinmusic(protein=='V',3)={'F#4'};
Proteinmusic(protein=='A',1)={'D#4'}; Proteinmusic(protein=='A',2)={'F#4'}; Proteinmusic(protein=='A',3)={'B4'};
Proteinmusic(protein=='C',1)={'C4'};  Proteinmusic(protein=='C',2)={'E4'};  Proteinmusic(protein=='C',3)={'G4'};
Proteinmusic(protein=='G',1)={'D4'};  Proteinmusic(protein=='G',2)={'F#4'}; Proteinmusic(protein=='G',3)={'A4'};
Proteinmusic(protein=='T',1)={'E4'};  Proteinmusic(protein=='T',2)={'G#4'}; Proteinmusic(protein=='T',3)={'B4'};
Proteinmusic(protein=='S',1)={'G#4'}; Proteinmusic(protein=='S',2)={'B4'};  Proteinmusic(protein=='S',3)={'E5'};
Proteinmusic(protein=='Q',1)={'F4'};  Proteinmusic(protein=='Q',2)={'A4'};  Proteinmusic(protein=='Q',3)={'C5'};
Proteinmusic(protein=='N',1)={'A4'};  Proteinmusic(protein=='N',2)={'C5'};  Proteinmusic(protein=='N',3)={'F5'};
Proteinmusic(protein=='E',1)={'G4'};  Proteinmusic(protein=='E',2)={'B4'};  Proteinmusic(protein=='E',3)={'D5'};
Proteinmusic(protein=='D',1)={'B4'};  Proteinmusic(protein=='D',2)={'D5'};  Proteinmusic(protein=='D',3)={'G5'};  
Proteinmusic(protein=='R',1)={'A4'};  Proteinmusic(protein=='R',2)={'C#5'}; Proteinmusic(protein=='R',3)={'E5'};
Proteinmusic(protein=='K',1)={'C#5'}; Proteinmusic(protein=='K',2)={'E5'};  Proteinmusic(protein=='K',3)={'A5'}; 
clear protein
fprintf('\tDone\n')
fprintf('\tConvert notes array into frequencies array\n')
idxstop=cumsum(L);
idxstart=[1 idxstop(1:end-1)+1];
%Convert into tunes
fprintf('\t\tConvert 1st root\n')
first=zeros(idxstop(end),1); %array preallocation
for k=1:lp
     first(idxstart(k):idxstop(k))=fnote(Proteinmusic{k,1},d(k));
end
fprintf('\t\tDone\n')
fprintf('\t\tConvert 3rd major\n')
third=zeros(idxstop(end),1); %array preallocation
for k=1:lp
     third(idxstart(k):idxstop(k))=fnote(Proteinmusic{k,2},d(k));
end
fprintf('\t\tDone\n')
fprintf('\t\tConvert 5th perfect\n')
fifth=zeros(idxstop(end),1); %array preallocation
for k=1:lp
     fifth(idxstart(k):idxstop(k))=fnote(Proteinmusic{k,1},d(k));
end
fprintf('\t\tDone\n')
clear d Proteinmusic L s
disp('Mix and play')
disp('to stop press ctrl-c')
disp(' ')
soundsc(first+third+fifth,8192);
end

function y=fnote(str,dur)
%see http://en.wikipedia.org/wiki/Piano_key_frequencies
t=0:1/8192:dur;
if str=='r' %this is a rest
    y=zeros(size(t));
else
    a=['A';' ';'B';'C';' ';'D';' ';'E';'F';' ';'G']; %keyboard
    iv=find(a==str(1)); %find the note
    oct=str2double(str(end))*12; %find the octave
    alt=length(str)==3; %find alteration (diesis #)
    key=iv+alt+oct; %compute the key
    f=440*(2^(1/12))^(key-49); %compute the frequency
    % Create a pitch vector
    y=sin((2*pi*f).*t);
end
% fading the vector to avoid clicks
fadetime=.01; %sec
fadein=0:1/(8192*fadetime):1;
fadeout=1:(-1/(8192*fadetime)):0;
y(1:(length(fadein)))=y(1:(length(fadein))).*fadein;
y(length(y)+1-(length(fadein)):length(y))=y(length(y)+1-(length(fadein)):length(y)).*fadeout;
end

Contact us