Code covered by the BSD License  

Highlights from
DNAmusic1 - the life music by Nobuo Munakata

DNAmusic1 - the life music by Nobuo Munakata

by

 

15 Sep 2009 (Updated )

convert a mRNA and coded protein sequences into music

DNAmusic1(varargin)
function DNAmusic1(varargin)
%DNAMUSIC1 - The life music
%Audification (or sonification) is the technique of using the sense of hearing
%to analyse data. The advantage of audification over visualisation in data
%analysis is that sound has the property that when different notes are played
%togheter they can still be individually heard: in vision colours blend to form
%new colours. DNA and proteins maps naturally onto musical sequences. Several
%algorithms were proposed to translate DNA and proteins into music. This
%function use the algorithm proposed by Nobuo Munakata
%(http://www.toshima.ne.jp/~edogiku/) 
%
% Syntax: 	DNAmusic1(mRNAid,wn)
%      
%     Inputs:
%           mRNAid - this is the id of the messanger that you want to translate
%                    deposited on the NCBI database
%                    (http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene).
%                    This is a string.
%                    (default = 'NM_005218'). 
%           wn - this is the duration (in second) of a whole note (default = 2).
%
%      Example: 
%
%      Calling on Matlab DNAmusic1
%
% Query the NCBI database to retrieve the NM_005218 mRNA
% Data retrieved
% Sonification process for Homo sapiens defensin, beta 1 (DEFB1), mRNA.
% mRNA sonification
% ....Convert 484 nucleotides into a notes array
% ....Done
% ....Convert notes array into frequencies array
% ....Done
% Protein sonification
% ....Convert 68 amino acids into a notes array
% ....Done
% ....Convert notes array into frequencies array
% Done
% Mix and play
% to stop press ctrl-c
%  
% Prelude (mRNA 5' UTR)
% Main theme (CDS and protein)
% Finale (mRNA 3' UTR)
%
% Of course longer is the mRNA and longer (very much longer) will be the time to
% convert it into music and much higher memory needed...
%
%           Created by Giuseppe Cardillo
%           giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:
% Cardillo G. (2009). DNAmusic1 - The life music by Nobuo Munakata
% http://www.mathworks.com/matlabcentral/fileexchange/25319

%Input Error handling
args=cell(varargin);
nu=numel(args);
if nu>2
    error('DNAmusic1 accepts max 2 input arguments')
end
default.values = {'NM_005218';2};
default.values(1:nu) = args;
[mRNAid semibreve] = deal(default.values{:});
if nu>=1 && ~ischar(mRNAid)
    error('DNAmusic1 requires a string as mRNAid')
end
if nu==2 &&  ~isscalar(semibreve) || ~isfinite(semibreve) || ~isnumeric(semibreve)
    error('DNAmusic1 requires a scalar, numeric and finite WN value.')
end
clear args default nu

%Retrieve informations
clc
try
    %try to see if a genbank file was saved on disk
    S=genbankread([mRNAid '.GBK']);
catch %else retrieve informations from the NCBI database
    disp(['Query the NCBI database to retrieve the ' mRNAid ' mRNA'])
    try
        S=getgenbank(mRNAid);
        disp('Data retrieved')
    catch ME
        disp(ME)
    end
end

global t

%Keep only the useful informations
mRNAsequence=upper(S.Sequence); %mRNA sequence
mRNAcoding=S.CDS.indices; %the coding portion
protein=S.CDS.translation; %the protein sequence
disp(['Sonification process for ' S.Definition])
clear S

dur=semibreve/8;
t=0:1/8192:dur;
%Start tje mRNA sonification process:
%1) Convert all nucleotides into notes according to:
%http://www.toshima.ne.jp/~edogiku/TextTable/WhatisGM.html#TonAssTable
%using logical indexing
disp('mRNA sonification')
fprintf('....Convert %i nucleotides into a notes array\n',length(mRNAsequence))
mRNAmusic=cell(length(mRNAsequence),1); %array preallocation
mRNAmusic(mRNAsequence=='A')={'A3'};
mRNAmusic(mRNAsequence=='C')={'E3'};
mRNAmusic(mRNAsequence=='G')={'D3'};
mRNAmusic(mRNAsequence=='T')={'G3'};

%2) now split the music in three parts:
%the prelude (5' UTR)
mRNAmusicprelude=mRNAmusic(1:mRNAcoding(1)-1);
%the main theme (CDS)
mRNAmusicmaintheme=mRNAmusic(mRNAcoding(1):mRNAcoding(2));
mRNAmusicmaintheme=reshape(mRNAmusicmaintheme,3,length(mRNAmusicmaintheme)/3);
mRNAmusicmaintheme(4,:)={'r'};
mRNAmusicmaintheme=mRNAmusicmaintheme(:);
%the finale (3' UTR)
mRNAmusicfinale=mRNAmusic(mRNAcoding(2)+1:end);
clear mRNAmusic mRNAcoding mRNAsequence
disp('....Done')
disp('....Convert notes array into frequencies array')
 
L=length(t);%length of each note
% Convert into tunes
%prelude
l=length(mRNAmusicprelude);
lhprelude=zeros(L*l,1); %array preallocation
for k=1:l
    lhprelude((k-1)*L+1:k*L)=fnote(mRNAmusicprelude{k});
end
%main theme
l=length(mRNAmusicmaintheme);
lhmaintheme=zeros(L*l,1); %array preallocation
for k=1:length(mRNAmusicmaintheme)
    lhmaintheme((k-1)*L+1:k*L)=fnote(mRNAmusicmaintheme{k});
end
%finale
l=length(mRNAmusicfinale);
lhfinale=zeros(L*l,1); %array preallocation
for k=1:length(mRNAmusicfinale)
    lhfinale((k-1)*L+1:k*L)=fnote(mRNAmusicfinale{k});
end
disp('....Done')
clear mRNAmusicprelude mRNAmusicmaintheme mRNAmusicfinale l k
 
%Now convert the protein into notes using logical indexing
disp('Protein sonification')
fprintf('....Convert %i amino acids into a notes array\n',length(protein))
%Array preallocation. The Array must be 1 cell longer because the last codon is
%a stop codon: so there is not amino acid (insert a rest).
Proteinmusic=cell(length(protein)+1,4);
Proteinmusic(:,1:3)={'r'};
Proteinmusic(protein=='I',4)={'B4'};
Proteinmusic(protein=='V',4)={'A4'};
Proteinmusic(protein=='L',4)={'F#4'};
Proteinmusic(protein=='M',4)={'E4'};
Proteinmusic(protein=='F',4)={'D4'};
Proteinmusic(protein=='W',4)={'B3'};
Proteinmusic(protein=='Y',4)={'A3'};
Proteinmusic(protein=='C',4)={'G3'};
Proteinmusic(protein=='A',4)={'E3'};
Proteinmusic(protein=='P',4)={'D3'};
Proteinmusic(protein=='G',4)={'C3'};
Proteinmusic(protein=='T',4)={'A2'};
Proteinmusic(protein=='S',4)={'G2'};
Proteinmusic(protein=='Q',4)={'F2'};
Proteinmusic(protein=='N',4)={'D2'};
Proteinmusic(protein=='E',4)={'C2'};
Proteinmusic(protein=='D',4)={'A#1'};
Proteinmusic(protein=='H',4)={'G1'};
Proteinmusic(protein=='K',4)={'F1'};
Proteinmusic(protein=='R',4)={'D#1'};
Proteinmusic(end,:)={'r'};
Proteinmusic=Proteinmusic';
Proteinmusic=Proteinmusic(:);
disp('....Done')
disp('....Convert notes array into frequencies array')
%Convert into tunes
l=length(Proteinmusic);
rhmaintheme=zeros(L*l,1); %array preallocation
for k=1:length(Proteinmusic)
    rhmaintheme((k-1)*L+1:k*L)=fnote(Proteinmusic{k});
end
clear protein dur Proteinmusic L l k t
disp('Done')

disp('Mix and play')
disp('to stop press ctrl-c')
disp(' ')
disp('Prelude (mRNA 5'' UTR)')
soundsc(lhprelude,8192);
disp('Main theme (CDS and protein)')
%here mix the mRNA and the protein music
u = min(length(rhmaintheme),length(lhmaintheme));
soundsc(lhmaintheme(1:u)+rhmaintheme(1:u),8192);
disp('Finale (mRNA 3'' UTR)')
soundsc(lhfinale,8192);
end

function y=fnote(str)
%see http://en.wikipedia.org/wiki/Piano_key_frequencies
global t
if str=='r' %this is a rest
    y=zeros(size(t));
else
    a=['A';' ';'B';'C';' ';'D';' ';'E';'F';' ';'G']; %keyboard
    iv=find(a==str(1)); %find the note
    oct=str2double(str(end))*12; %find the octave
    alt=length(str)==3; %find alteration (diesis #)
    key=iv+alt+oct; %compute the key
    f=440*(2^(1/12))^(key-49); %compute the frequency
    % Create a pitch vector
    y=sin((2*pi*f).*t);
end
% fading the vector to avoid clicks
fadetime=.01; %sec
fadein=0:1/(8192*fadetime):1;
fadeout=1:(-1/(8192*fadetime)):0;
y(1:(length(fadein)))=y(1:(length(fadein))).*fadein;
y(length(y)+1-(length(fadein)):length(y))=y(length(y)+1-(length(fadein)):length(y)).*fadeout;
end

Contact us