Code covered by the BSD License  

Highlights from
Bayesian VUS Classifier

image thumbnail

Bayesian VUS Classifier

by

 

20 Feb 2014 (Updated )

This exercise utilizes four programs to train a Bayesian classifier and classify frames of signals.

Callbacks_VUS_Analysis_GUI25(f,C,start_path)
function Callbacks_VUS_Analysis_GUI25(f,C,start_path)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
x=C{1,1};
y=C{1,2};
a=C{1,3};
b=C{1,4};
u=C{1,5};
v=C{1,6};
m=C{1,7};
n=C{1,8};
lengthbutton=C{1,9};
widthbutton=C{1,10};
enterType=C{1,11};
enterString=C{1,12};
enterLabel=C{1,13};
noPanels=C{1,14};
noGraphicPanels=C{1,15};
noButtons=C{1,16};
labelDist=C{1,17};%distance that the label is below the button
noTitles=C{1,18};
buttonTextSize=C{1,19};
labelTextSize=C{1,20};
textboxFont=C{1,21};
textboxString=C{1,22};
textboxWeight=C{1,23};
textboxAngle=C{1,24};
labelHeight=C{1,25};
fileName=C{1,26};
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%PANELS
for j=0:noPanels-1
uipanel('Parent',f,...
'Units','Normalized',...
'Position',[x(1+4*j) y(1+4*j) x(2+4*j)-x(1+4*j) y(3+4*j)-y(2+4*j)]);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%GRAPHIC PANELS
for i=0:noGraphicPanels-1
switch (i+1)
case 1
graphicPanel1 = axes('parent',f,...
'Units','Normalized',...
'Position',[a(1+4*i) b(1+4*i) a(2+4*i)-a(1+4*i) b(3+4*i)-b(2+4*i)],...
'GridLineStyle','--');
case 2
graphicPanel2 = axes('parent',f,...
'Units','Normalized',...
'Position',[a(1+4*i) b(1+4*i) a(2+4*i)-a(1+4*i) b(3+4*i)-b(2+4*i)],...
'GridLineStyle','--');
case 3
graphicPanel3 = axes('parent',f,...
'Units','Normalized',...
'Position',[a(1+4*i) b(1+4*i) a(2+4*i)-a(1+4*i) b(3+4*i)-b(2+4*i)],...
'GridLineStyle','--');
case 4
graphicPanel4 = axes('parent',f,...
'Units','Normalized',...
'Position',[a(1+4*i) b(1+4*i) a(2+4*i)-a(1+4*i) b(3+4*i)-b(2+4*i)],...
'GridLineStyle','--');
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%TITLE BOXES
for k=0:noTitles-1
switch (k+1)
case 1
titleBox1 = uicontrol('parent',f,...
'Units','Normalized',...
'Position',[u(1+4*k) v(1+4*k) u(2+4*k)-u(1+4*k) v(3+4*k)-v(2+4*k)],...
'Style','text',...
'FontSize',textboxFont{k+1},...
'String',textboxString(k+1),...
'FontWeight',textboxWeight{k+1},...
'FontAngle',textboxAngle{k+1});
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%BUTTONS
for i=0:(noButtons-1)
enterColor='w';
if strcmp(enterType{i+1},'pushbutton')==1 ||strcmp(enterType{i+1},'text')==1
enterColor='default';
end
if (strcmp(enterLabel{1,(i+1)},'')==0 &&...
        strcmp(enterLabel{1,(i+1)},'...')==0) %i.e. there is a label
%creating a label for some buttons
uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i)-labelDist-labelHeight(i+1) ...
(m(2+2*i)-m(1+2*i)) labelHeight(i+1)],...
'Style','text',...
'String',enterLabel{i+1},...
'FontSize', labelTextSize(i+1),...
'HorizontalAlignment','center');
end
switch (i+1)
case 1
button1=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button1Callback);
case 2
button2=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button2Callback);
case 3
button3=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button3Callback);
case 4
button4=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button4Callback);
case 5
button5=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button5Callback);
case 6
button6=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button6Callback);
case 7
button7=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button7Callback);
case 8
button8=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button8Callback);
case 9
button9=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button9Callback);
case 10
button10=uicontrol('Parent',f,...
'Units','Normalized',...
'Position',[m(1+2*i) n(1+2*i) (m(2+2*i)-m(1+2*i)) (n(2+2*i)-n(1+2*i))],...
'Style',enterType{i+1},...
'String',enterString{i+1},...
'FontSize', buttonTextSize(1+i),...
'BackgroundColor',enterColor,...
'HorizontalAlignment','center',...
'Callback',@button10Callback);
end
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%USER CODE FOR THE VARIABLES AND CALLBACKS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize Variables
    curr_file=1;
    fs=8000;
    fsd=20000;
    directory_name='abcd';
    wav_file_names='abce';
    fin_path='filename';
    fname='output';
    nsamp=1;
    Lm=40;
    L=800;
    Rm=10;
    R=200;
    usehp=1;
    xin=[];
    p=12;
    nsec=4;
    y=[];
    means=[];
    stdevs=[];
    % vuss=[];
    % confidence=[];

% Name the GUI
    set(f,'Name','VUS_Analysis');

% CALLBACKS
% Callback for button1 -- speech file directory choice
 function button1Callback(h,eventdata)
     directory_name=uigetdir(start_path,'dialog_title');
     A=strvcat(strcat((directory_name),'\*.wav'));
     struct_filenames=dir(A);
     wav_file_names={struct_filenames.name};
     set(button2,'String',wav_file_names);
     set(button2,'val',1);
     
% once the popupmenu/drop down menu is created, by default, the first
% selection from the popupmenu/drop down menu id not called
    indexOfDrpDwnMenu=1;
    
% by default first option from the popupmenu/dropdown menu will be loaded
    [curr_file,fs]=loadSelection(directory_name,wav_file_names,indexOfDrpDwnMenu);
 end

% Callback for button2 -- Choose speech file for play and plot
 function button2Callback(h,eventdata)
     indexOfDrpDwnMenu=get(button2,'val');
     [curr_file,fs]=loadSelection(directory_name,wav_file_names,indexOfDrpDwnMenu);
 end

%*************************************************************************
% function -- load selection from designated directory and file
%
function [curr_file,fs]=loadSelection(directory_name,wav_file_names,...
    indexOfDrpDwnMenu);
%
% read in speech/audio file
% fin_path is the complete path of the .wav file that is selected
    fin_path=strcat(directory_name,'\',strvcat(wav_file_names(indexOfDrpDwnMenu)));
    
% clear speech/audio file
    clear curr_file;
    
% read in speech/audio signal into curr_file; sampling rate is fs 
    [curr_file,fs]=wavread(fin_path);
    xin=curr_file*32768;
    
% create title information with file, sampling rate, number of samples
    fname=wav_file_names(indexOfDrpDwnMenu);
    FS=num2str(fs);
    nsamp=num2str(length(curr_file));
    file_info_string=strcat('  file: ',fname,', fs: ',FS,' Hz, nsamp:',nsamp);
    
% read in filename (fname) from cell array
    fname=wav_file_names{indexOfDrpDwnMenu};
end

% Callback for button3 -- fsd: sampling rate for processing
 function button3Callback(h,eventdata)
     fsd=str2num(get(button3,'string'));
     a = [10000 6000 8000 10000 16000 20000];
     fsd = a(get(button3,'val'));
 end

% Callback for button4 -- usehp: use highpass filter switch; 1:use hp, 2:
% skip hp
 function button4Callback(h,eventdata)
     usehp=get(button4,'val');
 end

% Callback for button5 -- Lm: analysis frame length in msec
 function button5Callback(h,eventdata)
     Lm=str2num(get(button5,'string'));
      if ~((Lm >= 1 && Lm <= 100))
        waitfor(errordlg('Lm must be a positive number between 1 and 100'))
        return;
     end
 end

% Callback for button6 -- Rm: analysis frame shift in msec
 function button6Callback(h,eventdata)
     Rm=str2num(get(button6,'string'));
      if ~((Rm >= 1 && Rm <= 100))
        waitfor(errordlg('Rm must be a positive number between 1 and 100'))
        return;
     end
 end

% Callback for button7 -- p: lpc system order
 function button7Callback(h,eventdata)
     p=str2num(get(button7,'string'));
     if ~((p >= 4 && p <= 32))
        waitfor(errordlg('p must be a positive integer between 4 and 32'))
        return;
     end
     p=round(p);
     set(button7,'string',num2str(p));
 end

% Callback for button8 -- record file
 function button8Callback(h,eventdata)
% check editable buttons for changes
    button3Callback(h,eventdata);
    nsec=3;
    clear y;
    N=fsd*nsec;
    % yn=speech in range -1 to 1
 % N=number of samples of speech
 % ch=number of recording channels
    N=fs*nsec;
    ch=1;
    
% Begin recording after hitting OK on msg box
    uiwait(msgbox('Ready to Record -- Hit OK to Begin','Record','modal'));
   
% recording code
recobj=audiorecorder(fsd,16,1);
recordblocking(recobj,nsec);
y=getaudiodata(recobj);

    ymin=min(y);
    ymax=max(y);
    ym=max(ymax,-ymin);
    xin=y/ym*32767;
    
% highpass filter to eliminate dc offset and 60 Hz hum and play out results
    y=highpass_filter_signal_GUI(y,fsd);
    yrec=y;
    
% play out recorded speech
    soundsc(yrec,fsd);
    fs=fsd;
 end

% Callback for button9 -- run VUS analysis
 function button9Callback(h,eventdata)
    
% check editable buttons for changes
    button3Callback(h,eventdata);
    button4Callback(h,eventdata);
    button5Callback(h,eventdata);
    button6Callback(h,eventdata);
    button7Callback(h,eventdata);
    
% if the speech file sampling rate does not match the desired rate, need to
% convert sampling rate
        y=xin;
        if (fs ~= fsd)
            [y]=srconv(y,fs,fsd);
        end
        soundsc(y,fsd);
    
% read in means and standard deviations for each of the five parameters and
% for each class (voiced/unvoiced/silence)
    s_data=load('VUS_means_stdevs');
    means=s_data.means;
    stdevs=s_data.stdevs;
    
% process input feature set for VUS
    [vuss,confidence]=speech_VUS(y,fsd,Lm,Rm,p,usehp,fname,means,stdevs);
 end

%************************************************************************
function [vuss,confidence]=speech_VUS(y,fsd,Lm,Rm,p,usehp,fname,means,stdevs);
%
% Program to record/read-in a speech file, optionally highpass filter 
% (to eliminate DC offset and hum), do short-time analysis of the 
% following five parameters:
%   log energy, 
%   zero crossing rate (per 10 msec interval), 
%   normalized autocorrelation coefficient at unit sample delay, 
%   first predictor coefficient of a p=12 pole LPC analysis, and 
%   normalized prediction error in dB
% and classify each frame as belonging to the class of voiced speech (V),
% unvoiced speech (U), or background signal (non-speech) (S)
% based on a minimum probability of error decision scheme using simple 
% Gaussian fits to the histogram of each of the 5 parameters for each
% of the 3 decision classes.
%
% The Gaussian model density uses a stored set of means and standard
% deviations for each of the 5 features for voiced speech (V),
% unvoiced speech (U), and background signal (S) frames, trained based
% on manual classification of a training set of frames.  The file
% VUS_means_stdev.mat contains the set of means and standard deviations for
% the five analysis parameters and for the three classes.
% 
% A measure of confidence score is also provided with each frame decision
% as to voiced speech (V), unvoiced speech (U), and background signal (S)
% classification.  The confidence score is normalized to the 
% range 0 <= confidence <= 1.

% Inputs:
%   y: speech array to be classified as VUS for each frame
%   fsd: speech sampling rate
%   Lm: analysis frame length in msec
%   Rm: analysis frame shift in msec
%   p: lpc system order
%   usehp: option to use highpass filter (usehp=1 for using highpass
%   filter; usehp=2 to skip highpass filter)
%   fname: speech filename
%   means: means of each parameter for each class
%   stdevs: standard deviations of each parameter for each class

% repair speech filename character '_' with ' '
    fname(find(fname(:) == '_'))=' ';
    
% convert Lm, Rm to samples
    L=round(Lm*fsd/1000);
    R=round(Rm*fsd/1000);

    if (usehp == 1)
% design highpass filter with length 400*fsd/10000+1 samples, 
% cutoff frequencies fl=100 Hz, fh=200 Hz, fs as specified
        fleng=round(400*fsd/10000);
        bf=hpf(fleng,100,200,fsd,0);

% highpass filter input and compensate for delay of fleng/2 samples
        ye=[y; zeros(fleng/2,1)];
        yf=filter(bf,1,ye);
        y=yf(1+fleng/2:length(y)+fleng/2);
        ym=max(abs(y));
        y=y/ym;
        % soundsc(y,fsd);
    end
            
% do short-time parameter analysis to measure log energy (energy), and zero
% crossings rate per 10 msec interval (zerocrossings), over the nfrm frames
% of the utterance
        [energy,zerocrossings,c1,alpha1,Vp,nfrm]=analysis_VUS(y,L,R,p,fsd);
        Vp=-10*log10(single(Vp));
        
% store log energy and zerocrossings in an array
        x=[energy; zerocrossings; c1; alpha1; Vp];
        
% do voiced speech (V) - unvoiced speech (U) - nonspeech (S) 
% classification of each of the frames of the
% utterance; provide a confidence measure for the estimate for each frame
        [vuss,confidence]=VUS_GUI(x,nfrm,means,stdevs,fname,L,R,p);
end

%********************************************************************
function [vuss,confidence]=VUS_GUI(x,nfrm,means,stdevs,filename,L,R,p)
%
% Voiced speech (V), Unvoiced speech (U), Background Signal (S) 
% analysis of speech using Bayesian 
% decision rule with single Gaussian densities
%
% Inputs:
%   x: matrix with columns representing:
%       energy=log energy measurement (1:nfrm)
%       zerocrossings=zc measurement (1:nfrm)
%       c1: first speech correlation (1:nfrm)
%       alpha1: first lpc coefficient (1:nfrm)
%       Vp: lpc normalized log error (1:nfrm)
%   nfrm: number of frames in utterance
%   means: matrix of means of 5 parameters in format 
%       mean=[logesm, zcsm, c1sm, alpha1sm, Vpsm; logeum, zcum, c1um,
%       alpha1um, Vpum; logevm, zcvm, c1vm, alpha1vm, Vpvm];
%   stdevs: matrix of standard deviations of 5 parameters in format:
%       stdev=[logess, zcss, c1ss, alpha1ss, Vpss; logeus, zcus, c1us,
%       alpha1us, Vpus; logevs, zcvs, c1vs, alpha1vs, Vpvs];
%   filename: file being processed
%   L: frame length in samples
%   R: frame shift in samples
%   p: lpc analysis order
% Outputs:
%   vuss: VUS contour (1:nfrm)
%   confidence: confidence countour (1:nfrm)

% calculate distances and confidence scores (c1-c5) for background
% signal, (d1), unvoiced speech, (d2), and voiced speech, (d3),
% distributions
    clear c1 c2 c3;
    clear d1 d2 d3;
    for i=1:nfrm
        d1(i)=sum((x(:,i)'-means(1,:)).^2./(stdevs(1,:).^2));
        d2(i)=sum((x(:,i)'-means(2,:)).^2./(stdevs(2,:).^2));
        d3(i)=sum((x(:,i)'-means(3,:)).^2./(stdevs(3,:).^2));
        den=d1(i)*d2(i)+d1(i)*d3(i)+d2(i)*d3(i);
        c1(i)=d2(i)*d3(i)/den;
        c2(i)=d1(i)*d3(i)/den;
        c3(i)=d1(i)*d2(i)/den;
    end
    
% save results in arrays vuss and confidence
    dist=[d1; d2; d3];
    conf=[c1; c2; c3];
    for i=1:nfrm
        xx=min(dist(:,i));
        yy=find(dist(:,i)==xx);
        vuss(i)=yy(1);
        confidence(i)=conf(vuss(i),i);
    end
    
% set vuss score to 0 when confidence score falls below threshold, conf_thr
    conf_thr=0.4;
    
% median filter VUS contour; threshold median filtered contour to eliminate
% low confidence regions
    vuss=medf(vuss,5,nfrm);
    vuss_init=vuss;
    vuss(find(confidence < conf_thr))=0;
    
% clear graphics Panel 4
    reset(graphicPanel4);
    axes(graphicPanel4);
    
% plot speech waveform in graphics Panel 4
    cla;
    ss1=L/2+1-R;
    es1=L/2+1+nfrm*R;
    ym=max(abs(y));
    y=y/ym;
    plot(ss1:es1,y(ss1:es1),'k');xlabel('sample index');ylabel('value');
    grid on; axis([ss1 es1 min(y) max(y)]);
    
% plot VUS scores and confidence scores 
% clear graphics Panel 3
    reset(graphicPanel3);
    axes(graphicPanel3);
    cla;
    
% plot VUS decision along with confidence score in graphics Panel 3
    n=1:nfrm;
    plot(n,vuss_init,'g--',n,vuss,'r',n,confidence*3,'b--','LineWidth',2),...
        xlabel('frame number'),ylabel('VUS');hold on, ...
        legend('1=silence, 2=unvoiced, 3=voiced',...
        'thresholded by confidence score','confidence score (scaled by 3)');
        grid on; axis([0 nfrm+1 .5 3.5]);
        stitle=sprintf('filename: %s, N,M,p: %d %d %d',filename,L,R,p);
% clear graphics Panel 2
    reset(graphicPanel2);
    axes(graphicPanel2);
    cla;
    
% plot log energy contour on graphics Panel 2
    plot(n,x(1,:),'r','LineWidth',2),xlabel('frame number'),...
        ylabel('log energy (dB)');xmax=max(x(1,:));xmin=min(x(1,:));...
        grid on; axis([0 nfrm+1 xmin xmax]),legend('log energy (dB)');
    
% clear graphics Panel 1
    reset(graphicPanel1);
    axes(graphicPanel1);
    cla;
    
% plot zero crossings rate parameter contour on graphics Panel 1
    plot(n,x(2,:),'r','LineWidth',2),xlabel('frame number'),...
        ylabel('zero crossings');xmax=max(x(2,:));xmin=min(x(2,:));...
        grid on; axis([0 nfrm+1 xmin xmax]),legend('zero crossings');
    
% display fname, signal processing parameters on titleBox1
    stitle2=' -- VUS Analysis (1:S, 2:U, 3:V)';
    stitle3=strcat(stitle,stitle2);
    set(titleBox1,'string',stitle3);
    set(titleBox1,'FontSize',20);
end

% Callback for button10 -- close GUI
 function button10Callback(h,eventdata)
     close(gcf);
 end
end

Contact us