Code covered by the BSD License  

Highlights from
DNAmusic2 - The life music by King and Angus

DNAmusic2 - The life music by King and Angus

by

 

16 Sep 2009 (Updated )

convert a mRNA and coded protein sequences into music

DNAmusic2(varargin)
function DNAmusic2(varargin)
%DNAMUSIC2 - The life music
%Audification (or sonification) is the technique of using the sense of hearing
%to analyse data. The advantage of audification over visualisation in data
%analysis is that sound has the property that when different notes are played
%togheter they can still be individually heard: in vision colours blend to form
%new colours. DNA and proteins maps naturally onto musical sequences. Several
%algorithms were proposed to translate DNA and proteins into music. This
%function use the algorithm proposed by Ross D. King and Colin G. Angus
%(PM - Protein Music - Cabios applications notes 1996; 12(3):251-252) 
%
% Syntax: 	DNAmusic2(mRNAid,wn)
%      
%     Inputs:
%           mRNAid - this is the id of the messanger that you want to translate
%                    deposited on the NCBI database
%                    (http://www.ncbi.nlm.nih.gov/sites/entrez?db=gene).
%                    This is a string.
%                    (default = 'NM_005218'). 
%           wn - this is the duration (in second) of a whole note (default = 1).
%
%      Example: 
%
%      Calling on Matlab DNAmusic2
%
% Query the NCBI database to retrieve the NM_005218 mRNA
% Data retrieved
% Sonification process for Homo sapiens defensin, beta 1 (DEFB1), mRNA.
% mRNA sonification
% ....Convert 484 nucleotides into a notes array
% ....Done
% ....Convert notes array into frequencies array
% ....Done
% Protein sonification
% ....Convert 68 amino acids into a notes array
% ....Done
% ....Convert notes array into frequencies array
% Done
% Mix and play
% to stop press ctrl-c
%  
% Prelude (mRNA 5' UTR)
% Main theme (CDS and protein)
% Finale (mRNA 3' UTR)
%
% Of course longer is the mRNA and longer (very much longer) will be the time to
% convert it into music and much higher memory needed...
%
%           Created by Giuseppe Cardillo
%           giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:
% Cardillo G. (2009). DNAmusic2 - The life music by King and Angus
% http://www.mathworks.com/matlabcentral/fileexchange/25338

%Input Error handling
args=cell(varargin);
nu=numel(args);
if nu>2
    error('DNAmusic1 accepts max 2 input arguments')
end
default.values = {'NM_005218';1};
default.values(1:nu) = args;
[mRNAid semibreve] = deal(default.values{:});
if nu>=1 && ~ischar(mRNAid)
    error('DNAmusic1 requires a string as mRNAid')
end
if nu==2 &&  ~isscalar(semibreve) || ~isfinite(semibreve) || ~isnumeric(semibreve)
    error('DNAmusic1 requires a scalar, numeric and finite WN value.')
end
clear args default nu

%Retrieve informations
clc
try
    %try to see if a genbank file was saved on disk
    S=genbankread([mRNAid '.GBK']);
catch %else retrieve informations from the NCBI database
    disp(['Query the NCBI database to retrieve the ' mRNAid ' mRNA'])
    try
        S=getgenbank(mRNAid);
        disp('Data retrieved')
    catch ME
        disp(ME)
    end
end

global t

%Keep only the useful informations
mRNAsequence=upper(S.Sequence); %mRNA sequence
mRNAcoding=S.CDS.indices; %the coding portion
protein=S.CDS.translation; %the protein sequence
disp(['Sonification process for ' S.Definition])
clear S

dur=semibreve/4;
t=0:1/8192:dur;
L=length(t);%length of each note
%Start tje mRNA sonification process:
%1) Convert all nucleotides into notes according to:
%PM - Protein Music - Cabios applications notes 1996; 12(3):251-252
%using logical indexing
disp('mRNA sonification')
fprintf('....Convert %i nucleotides into a notes array\n',length(mRNAsequence))
mRNAmusic=cell(length(mRNAsequence),1); %array preallocation
mRNAmusic(mRNAsequence=='A')={'A4'};
mRNAmusic(mRNAsequence=='C')={'C3'};
mRNAmusic(mRNAsequence=='G')={'G3'};
mRNAmusic(mRNAsequence=='T')={'E3'};

%2) now split the music in three parts:
%the prelude (5' UTR)
mRNAmusicprelude=mRNAmusic(1:mRNAcoding(1)-1);
%the main theme (CDS)
mRNAmusicmaintheme=mRNAmusic(mRNAcoding(1):mRNAcoding(2));
%the finale (3' UTR)
mRNAmusicfinale=mRNAmusic(mRNAcoding(2)+1:end);
clear mRNAmusic mRNAcoding mRNAsequence
disp('....Done')
disp('....Convert notes array into frequencies array')
 
% Convert into tunes
%prelude
l=length(mRNAmusicprelude);
lhprelude=zeros(L*l,1); %array preallocation
for k=1:l
    lhprelude((k-1)*L+1:k*L)=fnote(mRNAmusicprelude{k});
end
%main theme
l=length(mRNAmusicmaintheme);
lhmaintheme=zeros(L*l,1); %array preallocation
for k=1:length(mRNAmusicmaintheme)
    lhmaintheme((k-1)*L+1:k*L)=fnote(mRNAmusicmaintheme{k});
end
%finale
l=length(mRNAmusicfinale);
lhfinale=zeros(L*l,1); %array preallocation
for k=1:length(mRNAmusicfinale)
    lhfinale((k-1)*L+1:k*L)=fnote(mRNAmusicfinale{k});
end
disp('....Done')
clear mRNAmusicprelude mRNAmusicmaintheme mRNAmusicfinale l k L


%Now convert the protein into notes
dur=semibreve/8;
t=0:1/8192:dur;
L=length(t);%length of each note
disp('Protein sonification')
fprintf('....Convert %i amino acids into a notes array\n',length(protein))
%Array preallocation. The Array must be 1 cell longer because the last codon is
%a stop codon: so there is not amino acid (insert a rest).
Proteinmusic=cell(length(protein)+1,6);
Proteinmusic(protein=='P',:)={'r'};
idx=find(protein=='I' | protein=='V' | protein=='L');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'C1' 'C1' 'r' 'r' 'G1' 'G1'};
    end
end
idx=find(protein=='W' | protein=='Y');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'D1' 'D1' 'C1' 'C1' 'A2' 'A2'};
    end
end
idx=find(protein=='M' | protein=='C' | protein=='A' | protein=='G');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'C1' 'C1' 'r' 'r' 'r' 'r'};
    end
end
idx=find(protein=='S' | protein=='Q' | protein=='N');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'A2' 'A2' 'r' 'r' 'r' 'r'};
    end
end
idx=find(protein=='T' | protein=='E');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'A2' 'A2' 'r' 'r' 'C1' 'C1'};
    end
end
idx=find(protein=='D');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'A2' 'A2' 'r' 'r' 'F1' 'F1'};
    end
end
idx=find(protein=='H');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'E1' 'E1' 'A2' 'C1' 'D1' 'F1'};
    end
end
idx=find(protein=='K');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'A2' 'A2' 'C1' 'C1' 'F1' 'E1'};
    end
end
idx=find(protein=='R');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'A2' 'A2' 'E1' 'E1' 'F1' 'F1'};
    end
end
idx=find(protein=='F');
if ~isempty(idx)
    for k=1:length(idx)
        Proteinmusic(idx(k),:)={'D1' 'D1' 'r' 'r' 'C1' 'C1'};
    end
end
Proteinmusic(end,:)={'r'};
Proteinmusic=Proteinmusic';
Proteinmusic=Proteinmusic(:);
%assign the duration of each note on the basis of the codon frequency
disp('....Done')
disp('....Convert notes array into frequencies array')
%Convert into tunes
l=length(Proteinmusic);
rhmaintheme=zeros(L*l,1); %array preallocation
for k=1:length(Proteinmusic)
    rhmaintheme((k-1)*L+1:k*L)=fnote(Proteinmusic{k});
end
clear protein dur Proteinmusic L l k t
disp('Done')

disp('Mix and play')
disp('to stop press ctrl-c')
disp(' ')
disp('Prelude (mRNA 5'' UTR)')
soundsc(lhprelude,8192);
disp('Main theme (CDS and protein)')
%here mix the mRNA and the protein music
u = min(length(rhmaintheme),length(lhmaintheme));
soundsc(lhmaintheme(1:u)+rhmaintheme(1:u),8192);
disp('Finale (mRNA 3'' UTR)')
soundsc(lhfinale,8192);
end

function y=fnote(str)
%see http://en.wikipedia.org/wiki/Piano_key_frequencies
global t
if str=='r' %this is a rest
    y=zeros(size(t));
else
    a=['A';' ';'B';'C';' ';'D';' ';'E';'F';' ';'G']; %keyboard
    iv=find(a==str(1)); %find the note
    oct=str2double(str(end))*12; %find the octave
    alt=length(str)==3; %find alteration (diesis #)
    key=iv+alt+oct; %compute the key
    f=440*(2^(1/12))^(key-49); %compute the frequency
    % Create a pitch vector
    y=sin((2*pi*f).*t);
end
% fading the vector to avoid clicks
fadetime=.01; %sec
fadein=0:1/(8192*fadetime):1;
fadeout=1:(-1/(8192*fadetime)):0;
y(1:(length(fadein)))=y(1:(length(fadein))).*fadein;
y(length(y)+1-(length(fadein)):length(y))=y(length(y)+1-(length(fadein)):length(y)).*fadeout;
end

Contact us