Code covered by the BSD License  

Highlights from
Formant Estimation

image thumbnail

Formant Estimation

by

 

30 Jan 2014 (Updated )

Illustrates an algorithm for estimating the locations of the formants of voiced speech intervals

Callbacks_formant_estimation_GUI25(f,C,start_path)
function Callbacks_formant_estimation_GUI25(f,C,start_path)
%SENSE COMPUTER AND SET FILE DELIMITER
switch(computer)				
    case 'MACI64',		char= '/';
    case 'GLNX86',  char='/';
    case 'PCWIN',	char= '\';
    case 'PCWIN64', char='\';
    case 'GLNXA64', char='/';
end%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
x=C{1,1};
y=C{1,2};
a=C{1,3};
b=C{1,4};
u=C{1,5};
v=C{1,6};
m=C{1,7};
n=C{1,8};
lengthbutton=C{1,9};
widthbutton=C{1,10};
enterType=C{1,11};
enterString=C{1,12};
enterLabel=C{1,13};
noPanels=C{1,14};
noGraphicPanels=C{1,15};
noButtons=C{1,16};
labelDist=C{1,17};%distance that the label is below the button
noTitles=C{1,18};
buttonTextSize=C{1,19};
labelTextSize=C{1,20};
textboxFont=C{1,21};
textboxString=C{1,22};
textboxWeight=C{1,23};
textboxAngle=C{1,24};
labelHeight=C{1,25};
fileName=C{1,26};
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%PANELS
for j=0:noPanels-1
uipanel('Parent',f,...
'Units','Normalized',...
'Position',[x(1+4*j) y(1+4*j) x(2+4*j)-x(1+4*j) y(3+4*j)-y(2+4*j)]);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%GRAPHIC PANELS
for i=0:noGraphicPanels-1
switch (i+1)
case 1
graphicPanel1 = axes('parent',f,...
'Units','Normalized',...
'Position',[a(1+4*i) b(1+4*i) a(2+4*i)-a(1+4*i) b(3+4*i)-b(2+4*i)],...
'GridLineStyle','--');
case 2
graphicPanel2 = axes('parent',f,...
'Units','Normalized',...
'Position',[a(1+4*i) b(1+4*i) a(2+4*i)-a(1+4*i) b(3+4*i)-b(2+4*i)],...
'GridLineStyle','--');
case 3
graphicPanel3 = axes('parent',f,...
'Units','Normalized',...
'Position',[a(1+4*i) b(1+4*i) a(2+4*i)-a(1+4*i) b(3+4*i)-b(2+4*i)],...
'GridLineStyle','--');
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%TITLE BOXES
for k=0:noTitles-1
switch (k+1)
case 1
titleBox1 = uicontrol('parent',f,...
'Units','Normalized',...
'Position',[u(1+4*k) v(1+4*k) u(2+4*k)-u(1+4*k) v(3+4*k)-v(2+4*k)],...
'Style','text',...
'FontSize',textboxFont{k+1},...
'String',textboxString(k+1),...
'FontWeight',textboxWeight{k+1},...
'FontAngle',textboxAngle{k+1});
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%BUTTONS
for i=0:(noButtons-1)
enterColor='w';
if strcmp(enterType{i+1},'pushbutton')==1 ||strcmp(enterType{i+1},'text')==1
enterColor='default';
end
if (strcmp(enterLabel{1,(i+1)},'')==0 &&...
        strcmp(enterLabel{1,(i+1)},'...')==0) %i.e. there is a label
%creating a label for some buttons
uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i)-labelDist-labelHeight(i+1) ...
(m(2+2*i)-m(1+2*i)) labelHeight(i+1)],...
'Style','text',...
'String',enterLabel{i+1},...
'FontSize', labelTextSize(i+1),...
'HorizontalAlignment','center');
end
switch (i+1)
case 1
button1=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button1Callback);
case 2
button2=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button2Callback);
case 3
button3=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button3Callback);
case 4
button4=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button4Callback);
case 5
button5=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button5Callback);
case 6
button6=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button6Callback);
case 7
button7=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button7Callback);
case 8
button8=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button8Callback);
case 9
button9=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button9Callback);
case 10
button10=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button10Callback);
end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%USER CODE FOR THE VARIABLES AND CALLBACKS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize Variables
    curr_file=1;
    fs=8000;
    fsd=10000;
    directory_name='abcd';
    wav_file_names='abce';
    fin_path='filename';
    fname='output';
    nsamp=1;
    Lm=40;
    L=400;
    Rm=10;
    R=100;
    xin=[];
    y=[];
    p=16;
    minrad=0.9;
    dthr=0.05;
    nrun=-1;

% Name the GUI
    set(f,'Name','formant_estimation');

% CALLBACKS
% Callback for button1 -- Get Speech Files Directory
 function button1Callback(h,eventdata)
     directory_name=uigetdir(start_path,'dialog_title');
     A=strvcat(strcat((directory_name),[char,'*.wav']));
     struct_filenames=dir(A);
     wav_file_names={struct_filenames.name};
     set(button2,'String',wav_file_names);
     
% once the popupmenu/drop down menu is created, by default, the first
% selection from the popupmenu/drop down menu id not called
    indexOfDrpDwnMenu=1;
    
% by default first option from the popupmenu/dropdown menu will be loaded
    [curr_file,fs]=loadSelection(directory_name,wav_file_names,indexOfDrpDwnMenu);
 end

% Callback for button2 -- Choose speech file for play and plot
 function button2Callback(h,eventdata)
     indexOfDrpDwnMenu=get(button2,'val');
     [curr_file,fs]=loadSelection(directory_name,wav_file_names,indexOfDrpDwnMenu);
 end

%*************************************************************************
% function -- load selection from designated directory and file
%
function [curr_file,fs]=loadSelection(directory_name,wav_file_names,...
    indexOfDrpDwnMenu);
%
% read in speech/audio file
% fin_path is the complete path of the .wav file that is selected
    fin_path=strcat(directory_name,char,strvcat(wav_file_names(indexOfDrpDwnMenu)));
    
% clear speech/audio file
    clear curr_file;
    
% read in speech/audio signal into curr_file; sampling rate is fs 
    [curr_file,fs]=wavread(fin_path);
    xin=curr_file*32768;
    
% create title information with file, sampling rate, number of samples
    fname=wav_file_names(indexOfDrpDwnMenu);
    FS=num2str(fs);
    nsamp=num2str(length(curr_file));
    file_info_string=strcat('  file: ',fname,', fs: ',FS,' Hz, nsamp:',nsamp);
    
% read in filename (fname) from cell array
    fname=wav_file_names{indexOfDrpDwnMenu};
end

% Callback for button10 -- Play Speech File
    function button10Callback(h,eventdata)
        soundsc(curr_file,fs);
    end

% Callback for button3 -- Lm: analysis frame length (msec)
 function button3Callback(h,eventdata)
     Lm=str2num(get(button3,'string'));
     if (Lm < 1 || Lm > 100)
         waitfor(errordlg('The frame length must be between 1 and 100'));
         return;
     end
 end

% Callback for button4 -- Rm: analysis frame shift (msec)
 function button4Callback(h,eventdata)
     Rm=str2num(get(button4,'string'));
     if (Rm < 1 || Rm > 100)
         waitfor(errordlg('The frame shift must be between 1 and 100'));
         return;
     end
 end

% Callback for button5 -- p: lpc system order
 function button5Callback(h,eventdata)
     p=str2num(get(button5,'string'));
     if (p < 4 || p > 32)
         waitfor(errordlg('The LPC system order must be between 4 and 32'));
         return;
     end
 end

% Callback for button6 -- minrad: minimum pole radius
 function button6Callback(h,eventdata)
     minrad=str2num(get(button6,'string'));
     if (minrad < 0.5 || minrad > 0.95)
         waitfor(errordlg('The minimum pole radius must be between 0.5 and 0.95'));
         return;
     end
 end

% Callback for button7 -- dthr: formant distance threshold
 function button7Callback(h,eventdata)
     dthr=str2num(get(button7,'string'));
     if (dthr < 0.04 || dthr > 0.06)
         waitfor(errordlg('The formant distance threshold must be between 0.04 and 0.06'));
         return;
     end
 end

% Callback for button8 -- Run formant estimation
 function button8Callback(h,eventdata)
     
% check editable buttons for changes
    button3Callback(h,eventdata);
    button4Callback(h,eventdata);
    button5Callback(h,eventdata);
    button6Callback(h,eventdata);
    button7Callback(h,eventdata);
    
% setup parameters for formant estimation
    setup_formant_estimation(xin,fs,Lm,Rm,p,minrad,dthr,fname);
 end

%*****************************************************************
function setup_formant_estimation(x,fs,Lm,Rm,p,minrad,dthr,fname)
%
% set up signal processing parameters for formant estimation
%
% Inputs:
%   x: speech array
%   fs: speech sampling rate
%   Lm: analysis frame durstion in msec
%   Rm: analysis frame shift in msec
%   p: lpc system order
%   minrad: minimum pole radius to be considered for formant
%   dthr: formant distance threshold
%   fname: speech filename

% set nmax to length of x; ss to 1; es to nmax
    nmax=length(x);
    ss=1; es=nmax;
    
% Lm: frame duration in msec; must convert to samples
    L=round(Lm*fs/1000);
    
% Rm: frame shift in msec; must convert to samples
    R=round(Rm*fs/1000);
    
% clear graphics Panel 2
        reset(graphicPanel2);
        axes(graphicPanel2);
        cla;
    
% find all roots whose magnitude is greater than threshold
    n=1;
    F=[];
    frame=1;
    
% process speech on a frame-by-frame basis until no more viable frames
    while (n+L-1 <= nmax)
        
% perform LPC analysis on each frame of speech using autocorrelation method
        fmax=max(abs(x(n:n+L-1)));
        if (fmax == 0) 
            x(n:n+L-1)=randn(1,L);
        end
        xlpc=x(n:n+L-1).*hamming(L);
        [A,G,a,r]=autolpc(xlpc,p);
        
% find roots of LPC polynomial, eliminate all roots where imag(root)<=0, or
% where abs(root) < minrad 
        Ar=roots(A);
        Ar(find(imag(Ar)<=0))=0;
        Ar(find(abs(Ar)<=minrad))=NaN;
        angr=atan2(imag(Ar),real(Ar))*fs/(2*pi);
        angr(find(angr > 4500))=NaN;
        F=[F angr];
        n=n+R;
        frame=frame+1;
    end
    
% sort putative formants in ascending order
% keep track of the number of putative formants at each frame
    FS=sort(F);
    f1=p-sum(isnan(FS));
    
% open output file for printing results
    outfile=['out_',fname,'_formants.txt'];
    fidw=fopen(outfile,'w');
    
% write header for output file
    fprintf(fidw,'file:%s, L:%d, R:%d, p:%d, minrad:%6.2f, dthr:%6.2f \n',...
        fname,L,R,p,minrad,dthr);
    
% write out formant estimates from lpc root analysis
    nfrm=frame-1;
    for frame=1:nfrm
        if (f1(frame) == 0)
            fprintf(fidw,'frame: %d, no formants \n',frame);
        elseif (f1(frame) == 1)
            fprintf(fidw,'frame: %d, formants: %6.2f \n',...
                frame,FS(1:f1(frame),frame));
        elseif (f1(frame) == 2)
            fprintf(fidw,'frame: %d, formants: %6.2f %6.2f \n',...
                frame,FS(1:f1(frame),frame));
        elseif (f1(frame) == 3)
            fprintf(fidw,'frame: %d, formants: %6.2f %6.2f %6.2f \n',...
                frame,FS(1:f1(frame),frame));
        elseif (f1(frame) == 4)
            fprintf(fidw,'frame: %d, formants: %6.2f %6.2f %6.2f %6.2f \n',...
                frame,FS(1:f1(frame),frame));
        elseif (f1(frame) == 5)
            fprintf(fidw,'frame: %d, formants: %6.2f %6.2f %6.2f %6.2f %6.2f \n',...
                frame,FS(1:f1(frame),frame));
        else
            fprintf(fidw,'frame: %d, more than 5 formants \n',frame);
            fprintf(fidw,'frame: %d, formants: %6.2f %6.2f %6.2f %6.2f %6.2f \n',...
                frame,FS(1:5,frame));
        end
    end
    
% determine run lengths and find longest run
    [nrun,startsav,endsav]=run_lengths(f1,nfrm,FS,dthr,fidw);

% process further if nrun >= 1; else send warning message and terminate run
if (nrun > 0)
    for run=1:nrun
        fb=startsav(run);fe=endsav(run);
        fprintf(fidw,'run_length: run:%d, start:%d, end:%d \n',run,fb,fe);
    end
    
% save formants for each of the formant intervals in array Fm
    nfmt=4;
    Fm(1:nfmt,1:nfrm)=NaN;
    Fm(1:nfmt,1:nfrm)=FS(1:nfmt,1:nfrm);
    
% modify each run based on surrounding formant estimates
% must have 3 or 4 formants in each frame to track backwards or forwards
    [Fmc,startsav,endsav]=extend_runs(nrun,startsav,endsav,nfrm,f1,Fm,...
        nfmt,FS,dthr,fidw);
    for run=1:nrun
        fb=startsav(run);fe=endsav(run);
        fprintf(fidw,'continuity: run:%d, start:%d, end:%d \n',run,fb,fe);
    end
                
% plot remaining roots
    [l1,l2]=size(F');
    T=(0:l1-1); % *R/fs;

% plot raw roots of lpc polynomial, after root processing, on graphics
% Panel 2
    plot(T,Fm','*k');
    % title(['Angles of Complex LPC Roots with Magnitude > ',...
        % num2str(minrad)],'fontsize',fsize);
        xpp=['Frame Number; fs=',num2str(fs),' samples/second'];
    xlabel(xpp),ylabel('Frequency (Hz)');
    grid on;
    axis([0 nfrm+1 0 5000]);
    stitle=sprintf(' file:%s, L:%d, R:%d, p:%d, minrad:%6.2f, dthr:%6.2f ',...
        fname,L,R,p,minrad,dthr); 
    
% display fname and signal processing parameters on titleBox1
        stitle1=strcat('Formant Estimation -- ',stitle);
        set(titleBox1,'string',stitle1);
        set(titleBox1,'FontSize',15);
        
% clear graphics Panel 3
        reset(graphicPanel3);
        axes(graphicPanel3);
        
% plot entire speech interval on graphics Panel 3
        cla;
        ss1=L/2+1-R;
        es1=L/2+1+nfrm*R;
        xpp=['Time in Samples; fs=',num2str(fs),' samples/second'];
        plot(ss1:es1,x(ss1:es1),'b');xlabel(xpp);
        ylabel('Value');grid on;
        axis([L/2+1-R, L/2+1+nfrm*R, min(x), max(x)]);
    
% clear graphics Panel 1
        reset(graphicPanel1);
        axes(graphicPanel1);
        cla;
        
% plot formant estimates on graphics Panel 1
    plot(T,Fmc','k','LineWidth',2),grid on;
    xpp=['Frame Number; fs=',num2str(fs),' samples/second'];
    xlabel(xpp);ylabel('Frequency in Hz');
    axis([0 nfrm+1 0 5000]);
    
% close input and output txt files
    fclose('all');
    
% save formants and input parameters in mat file
    outfile=['out_formants','.mat'];
    save(outfile,'nfmt','nfrm','Fmc','fname','L','R','p','minrad','dthr');
else
    uiwait(msgbox('No viable formant track found -- Change p or minrad and rerun formant estimation','error warning'));
end
end

% Callback for button9 -- close GUI
 function button9Callback(h,eventdata)
     close(gcf);
 end
end

Contact us