Code covered by the BSD License  

Highlights from
Analysis Modification Synthesis

image thumbnail
from Analysis Modification Synthesis by Kamil Wojcicki
Short-time Fourier analysis-modification-synthesis (AMS) framework for speech processing.

test_ams.m
%__________________________________________________________________________________________________________________________
% Short-time Fourier analysis-modification-synthesis (AMS) test framework by Kamil Wojcicki, 2010 (test_ams.m)
clear all; close all; % clc;

    SNR = @(x,y) (10*log10((sum(x.^2)))/(sum((x(:)-y(:)).^2))); % in-line function for SNR computation 

    file.original = 'sp10.wav'; % specify the input file
    [speech.original, fs, nbits] = wavread(file.original); % read audio samples from the input file
    time = [0:length(speech.original)-1]/fs; % create time vector

    Tw = 32; % analysis frame duration (ms) 
    Ts = Tw/8; % analysis frame shift (ms)
    [speech.processed] = ams(speech.original, fs, Tw, Ts); % process speech through AMS framework without modification
    % ADD OTHER PROCESSING HERE ... e.g., 
    % [speech.processed2] = ams(speech.original, fs, Tw, Tw/4, 'Allen & Rabiner'); 
    % [speech.processed3] = ams(speech.original, fs, Tw, Tw/4, 'Griffin & Lim'); 

    methods = fieldnames(speech); % treatment names
    M = length(methods); % number of treatments

    system(sprintf('rm -f ./%s.txt', mfilename));
    diary(sprintf('%s.txt', mfilename)); diary on;
    for m = 1:M % loop through treatment types and compute SNR (dB)
        method = methods{m};
        snr.(method) = SNR(speech.original, speech.(method));
        fprintf('SNR [ %12s ]: %4.2e dB\n', method, snr.(method));
    end
    diary off;

    figure('Position', [20 20 1000 200*M], 'PaperPositionMode', 'auto', 'Visible', 'on');
    for m = 1:M % loop through treatment types and plot spectrograms
        method = methods{m};

        subplot(M,2,2*m-1); % time domain plots
        plot(time,speech.(method),'k-'); 
        xlim([min(time) max(time)]);
        title(sprintf('Waveform: %s  SNR=%0.2e dB', method, snr.(method)), 'interpreter', 'none');
        xlabel('Time (s)');
        ylabel('Amplitude');

        subplot(M,2,2*m); % spectrogram plots
        myspectrogram(speech.(method), fs);
        set(gca,'ytick',[0:1000:16000],'yticklabel',[0:16]);
        title(sprintf('Spectrogram: %s  SNR=%0.2e dB', method, snr.(method)), 'interpreter', 'none');
        xlabel('Time (s)');
        ylabel('Frequency (kHz)');
    end
    print('-depsc2', '-r250', sprintf('%s.eps', mfilename));
    print('-dpng', sprintf('%s.png', mfilename));

    for m = 1:M % loop through treatment types and write audio to wav files
        method = methods{m};
        audio.(method) = 0.999*speech.(method)./max(abs(speech.(method)));
        wavwrite(audio.(method), fs, nbits, sprintf('%s.wav',method));
    end

%__________________________________________________________________________________________________________________________
% EOF

Contact us