| [AI,W,f]=artind(V,R,Be,type)
|
function [AI,W,f]=artind(V,R,Be,type)
%
% [AI,W,f]=artind(V,R,Be,type)
%
% Estimates the articulation index, AI, based
% on the paper by French and Steinberg (1946).All inputs
% are 1x20 vectors with values corresponding to the average of the
% 20 frequency bands given below.
%
% V is "the speech level of the talker at two inches
% from the lips, as determined by a sound level measurement
% using 40-dB weighting."
%
% R is the orthotelephonic response of the communication system
% at the appropritate frequencies.
%
% Be represents noise intensity per frequency level of the total
% noise from all sources except that produced by the received speech.
%
% 'type' is a string defining which AI calculation to use. Set:
%
% 'french' from French Steinberg's Method (JASA, 1949), default
% 'kryter' from Kryter's Method (JASA, 1962)
% 'dot_nonsense_syl' from Mueller & Killion (1990) used to measure AI based on human pure
% tone thresholds (V). Enter empty values for othe parameters. V is
% the pure tone threshold for frequencies 250 300 400 500 600 800 1000 1250 1750 2000 2500 3500 4000 5500
%
% 'Ao' from Pavlovic (1991). Estimates persons AI based on pure tone thresholds for frequencies:
% 500 1000 2000 4000. V is the persons threholds and those respective frequencies. Enter empty
% brackets for the other parameters.
%
% A is the estimated articulation index. A is between 0 (0% intelligibility and
% 1 (100% intelligibity).
%
% W is the weighting function for each band. Value of 1 means band is contributing
% maximally to speech intelligibility. Value of 0 means band is not contributing
% at all.
%
% f is the frequency centers from which W was calculated.
%
% for the kryter method the center frequencies and their minmax ranges are (in Hz):
% 270 200 330
% 380 330 430
% 490 430 560
% 630 560 700
% 770 700 840
% 920 840 1000
% 1070 1000 1150
% 1230 1150 1310
% 1400 1310 1480
% 1570 1480 1660
% 1740 1660 1830
% 1920 1830 2020
% 2130 2020 2240
% 2370 2240 2500
% 2660 2500 2820
% 3000 2820 3200
% 3400 3200 3650
% 3950 3650 4250
% 4650 4250 5050
% 5600 5050 6100
%
%
%
%
% Written by Ikaro Silva 2005
if(isempty(type) | strcmp(type,'french'))
%%%%
% The 20 band centers and ranges (Hz):
% 310 250-375
% 440 375-505
% 575 505-645
% 720 645-795
% 875 795-955
% 1040 955-1130
% 1225 1130-1315
% 1415 1315-1515
% 1615 1515-1720
% 1825 1720-1930
% 2035 1930-2140
% 2250 2140-2355
% 2475 2355-2600
% 2750 2600-2900
% 3080 2900-3255
% 3470 3255-3680
% 3940 3680-4200
% 4530 4200-4860
% 5300 4860-5720
% 6350 5720-7000
%
%%%%Defining Parameters and Tables%%%%
W=zeros(1,20);
p=12;
%Value of Bs' according to table VI in dB vs 10^-16 watt/cm^2
Bsp=[36.5 36.6 35.7 33.4 30.7 28.3 26 24 22.1 20.4 18.9 17.5 ...
16.1 14.6 13 11.3 9.5 7.5 5.1 2.5];
%X= bo -K
X=-1*[1.5 8.0 11.6 14.1 15.7 16.7 17.5 18.3 19.4 21 23.3 ...
25.2 26.6 27.8 28.5 28.9 28.8 27.8 25.1 19.7];
%The number in dB that the noise produced in any band, by speech in any
%lower band, is below the long average intensity of the speech in the lower
%band from table VIII
Q=[49 62 70 75 78 81 83 85 85 85 86 86 87 87 87 87 87 87 87;...
0 44 56 64 70 74 78 80 82 83 84 85 85 86 86 87 87 87 87;...
0 0 42 52 61 67 71 75 78 79 81 83 84 85 85 86 86 87 87;...
0 0 0 40 50 57 64 68 71 75 78 79 81 82 83 84 85 86 87;...
0 0 0 0 38 49 56 62 66 70 72 75 78 79 81 83 85 85 86;...
0 0 0 0 0 36 46 54 59 64 67 70 72 75 77 80 82 84 85;...
0 0 0 0 0 0 35 45 51 56 61 65 69 72 75 77 79 81 84;...
0 0 0 0 0 0 0 35 43 50 55 59 64 67 71 74 77 80 83;...
0 0 0 0 0 0 0 0 35 42 47 52 57 63 67 71 74 78 81;...
0 0 0 0 0 0 0 0 0 34 41 47 52 57 63 67 71 76 79;...
0 0 0 0 0 0 0 0 0 0 33 40 46 52 57 63 68 72 77;...
0 0 0 0 0 0 0 0 0 0 0 33 40 47 54 60 65 71 75;...
0 0 0 0 0 0 0 0 0 0 0 0 34 41 50 55 61 67 72;...
0 0 0 0 0 0 0 0 0 0 0 0 0 34 42 49 56 64 70;...
0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 43 51 59 65;...
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 45 54 61;...
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 46 56;...
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 36 49;...
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 39];
%Values of (B(+)X)-X as a function of B-X. Table VII
BpXmX=[[-9:1:9]' [0.5 0.6 0.8 1 1.2 1.5 1.8 2.1 2.5 3.0 3.5 4.1 4.8 5.5 6.2 7.0 7.8 8.6 9.5]'];
%frequency center/range of each band. In case of plotting f vs W.
f=[310 440 575 720 875 1040 1225 1415 1615 1825 2035 2250 2475 2750 3080 3470 3940 4530 5300 6350];
%Values of m as a function of Z to the nearest dB (in case of high level
%noises from Table II:
mZ=[[54:91]' [ones(7,1);ones(5,1)*2;ones(5,1)*3;ones(4,1)*4;ones(3,1)*5;ones(3,1)*6;ones(3,1)*7;ones(3,1)*8;ones(3,1)*9;ones(2,1)*10]];
%%%%%Calculation of AI%%%%%%
Bs=Bsp + (V-90) + R; % level in dB vs 10^-16 watt/cm^2 of the long average intensity per cycle
% of the received speech (intensity being expressed
% as free field intensity
%Interband masking of speech, produced in band n by speech in band k
%So n is from 2-20 and k is from 1-19. Bnk only applies to band 2-20.
Bmask=triu(repmat(Bs(1:19),[19 1]));
Bnk=Bmask - Q;
Bnk(Bnk==0)=NaN;
%Combine masking in a power basis (eq. 21) to obtain total inter-speech
%masking for respective band. Append 0 for the first band
Bn=[0 nansum(10.^(Bnk./10))]; %add on a power basis
Bn=10*log10(Bn);
Bf= Bs-24; % Self masking noise
B=10.^(Be/10)+ 10.^(Bf/10) + 10.^(Bn/10); %add on a power basis
B=10*log10(B);
Z=B-X; %effective level (level above threshold of a critical band of noise)
z=round(Z);
% if(any(z>9) | any(z<-9))
% error('Effective level Z out of range.')
% end
%Calculate m using mZ table
m=zeros(1,20);
ind=find(Z>54);
for i=1:length(ind),
[val,trash]=find(mZ==round(Z));
m(ind(i))=mZ(val,2);
end
BpX=zeros(20,1);
for i=1:20,
[ind,trash]=find(BpXmX(:,1)==z(i));
BpX(i)=BpXmX(ind,2)+X(i); %(B(+)X)-X+X = B(+)X
end
W=(1/30).*[Bs+p-6-m-BpX'];
AI=0.05*sum(W);
elseif(strcmp(type,'kryter'))
%%%%%%%%%%%%%%%Kryter's Method%%%%%%%%%%%%%%%%%%%%
%Not corrected for loudspeaker presentation of speech in typical
%semireveberant room
f=[270 380 490 630 770 920 1070 1230 1400 1570 1740 1920 2130 2370 2660 3000 3400 3950 4650 5600]; %from Beranek
frange=[200 330 430 560 700 840 1000 1150 1310 1480 1660 1830 2020 2240 2500 2820 3200 3650 4250 5050]; %fequency range for each band
frange=[frange' [frange(2:end) 6100]'];
df=diff(frange,[],2);
th=-1*[6 10 13 16 16 16 17 18 19 20 22 23 26 28 29 30 29 27 24 21];%threshold of sounds with continous spectra
Bsp=[50 50 48 45 43 40 37 34 32 31 29 28 26 25 24 23 21 20 19 19]; %ideal speech spectrum
Msk=[[80:150]' floor([0:0.2:14]')]; %Correction for non-linear growth of masking
max_speech=[114 110 107 105 103 101 99 97 97 97 97 97 97 97 97 97 98 100 103 104];
%upward spread of masking
high_lev=[46:10:96]';
high_mask=zeros(6,2,5);
high_mask(:,:,1)=[[50 75 150 200 200 250]' [45 35 25 20 15 10]'];
high_mask(:,:,2)=[[100 150 250 400 500 500]' [40 30 23 18 13 8]'];
high_mask(:,:,3)=[[200 300 500 800 1000 1000]' [35 25 20 15 10 5]'];
high_mask(:,:,4)=[[200 500 1000 1500 1500 1500]' [40 25 15 10 5 3]'];
high_mask(:,:,5)=[[200 800 2000 3000 3000 3000]' [40 20 5 0 0 0]'];
high_ind=[0 800 1600 2400 3200 5200]; %frequency upper bound values
%Step 1
Bs=Bsp+R;
%RMS of input speech spectrum
Vrms=10*log10(10.^(V/10)*df);
Speech_Peaks= Bs + (Vrms-65);
%Step 2
Band_SL=Be - th;
noise_spec=Band_SL;
ind=find(Band_SL >80);
for i=1:length(ind),
[val,trash]=find(Msk==round(Band_SL(ind(i))));
noise_spec(ind(i))=noise_spec(ind(i))+ Msk(val,2);
end
%Step 3
noise_spec=noise_spec+th;
[val,ind]=max(noise_spec);
n=noise_spec;
n(ind)=-inf;
[st_pt]=find(n >= (val-3));
if(isempty(st_pt))
st_pt=20;
else
st_pt=st_pt(end);
end
low_m=zeros(1,st_pt);
Lev=noise_spec(st_pt);
if(Lev>46),
%Downward spread of masking
for i=st_pt:-1:1,
%Upward line to the left of st_pt with 10dB per octave slope
low_m(i)= (Lev-57) + (10/log(2))*log(f(st_pt)/f(i));
end
tLev=Lev;
if(tLev>95)
tLev=95;
end
%low_m(end)=[];
[lev_ind]=find(high_lev>tLev)-1;
[f_ind]=find(high_ind>f(st_pt))-1;
width=high_mask(lev_ind(1),1,f_ind(1));
slope=high_mask(lev_ind(1),2,f_ind(1));
%Upward spread of masking
high_m=zeros(1,20-st_pt);
j=1;
for i=st_pt:20,
if(f(i)< (st_pt+width)),
high_m(j)=Lev;
else
high_m(j)= Lev + (slope/log(2))*log(f(st_pt)/f(i));
end
j=j+1;
end
else
high_m=zeros(1,20-st_pt);
low_m=zeros(1,st_pt);
end
%Step 4
noise_spec=max(noise_spec,[low_m high_m]);
noise_spec=max(noise_spec,th);
Speech_Peaks=min(Speech_Peaks,max_speech);
W=Speech_Peaks - noise_spec;
W(W<0)=0;
W(W>30)=30;
%Step 5
AI=sum(W)/600;
elseif(strcmp(type,'Ao'))
f=[500 1000 2000 4000];
W=max(V,20);
W=min(W,50);
A=sum(50-W)/120;
elseif(strcmp(type,'dot_nonsense_syl'))
f=[250 300 400 500 600 800 1000 1250 1750 2000 2500 3500 4000 5500];
N=length(f);
dot=[1 0 0 0 0 0 0 0 0 0 0 0 0 1;... %15HL
0 0 0 0 0 0 0 0 0 0 0 0 0 0;...
0 1 0 0 0 0 0 0 0 0 0 0 0 0;...
0 0 1 0 0 0 0 0 0 0 0 0 0 0;...
0 0 0 0 1 1 1 1 1 1 1 1 1 1;...
1 0 0 1 0 0 1 1 1 1 1 1 1 0;...
0 0 0 0 0 0 1 1 1 1 1 1 0 1;...
0 1 1 0 1 1 0 0 1 1 1 1 0 0;...
0 0 0 0 1 1 1 1 1 1 1 1 1 1;...
0 0 0 1 0 0 0 0 0 0 0 0 0 0;...
1 0 1 0 0 0 1 1 1 1 1 1 1 1;...
0 0 0 0 1 1 0 0 0 0 0 0 0 0;...
0 1 0 1 0 0 1 1 1 1 1 1 1 1;...
0 0 0 0 1 1 0 1 0 0 0 0 0 0;...
0 0 1 0 0 0 1 1 1 1 1 1 1 0;...
0 0 0 1 0 0 0 0 1 1 1 1 1 0;...
0 0 0 0 1 1 1 1 1 1 1 1 0 0;...
0 0 0 0 0 0 0 0 1 1 1 1 0 0];%48 HL
rows=linspace(15,48,18)';
[ddots,dth]=meshgrid(rows,V);
D=abs(ddots-dth);
[trash,cl]=min(D,[],2);
W=zeros(1,N);
for i=1:N,
W(i)=sum(dot(cl(i):end,i));
end
A=sum(W(:))/100;
end
|
|