?? myvoicemark.m
字號:
%%% voice - unvoice speech part decision
%%% 2004-3jingjing:端點檢測程序使,適應于多種采樣率
function [T]=myvoicemark(x,Fs)
FrameLength=round(Fs*0.0025); %%2.5ms, Fs=16000時為40 samples
energy_threshold=0.01;
zero_threshold=3.5;
N=length(x);
%% cross zero rate
Len=round(20*Fs/16000);%過零率分析單元
for n=1:N-Len
cross=0;
if(x(n)>0)
flag=0;
else
flag=1;
end
for m=n+1:n+Len %求出短時(Len=20)的過零率
if(flag==0&x(m)<=0)
cross=cross+1;
flag=1;
elseif(flag==1&x(m)>0)
cross=cross+1;
flag=0;
end
end
crslt(n)=cross; %共求出N-Len幀短時過零率,存于crslt中
end
%% cross rate smoothing
C=fft(crslt); %過零率波形的譜,形狀不平滑
N2=length(C); %N2=N-Len
C1=zeros(1,N2);
lp=floor(N2/200);
%去掉過零率波形中的高頻,即fft譜中間置零
C1(1:lp)=C(1:lp); %將C的前面一段復制到C1中
C1(N2-lp:N2)=C(N2-lp:N2); %將C的后面一段復制到C1中
C2=ifft(C1); %C1的中間一大段都為0,應該是為了平滑
C2=real(C2);
%% mark the voice part of speech by cross-zero-rate
C3=zeros(1,N2);
for n=1:N2
if(C2(n)<zero_threshold)
C3(n)=1; %C3中保存的為判斷是否超過零率門限的標志,超過置0,未超過置1
end
end
%%energy
for n=1:N-2*FrameLength %共有N-2*FrameLength幀
E(n)=0;
for m=n:n+2*FrameLength %幀長2*FrameLength+1
E(n)=E(n)+(x(m).^2);
end
end
%% energy smoothing
E1=fft(E);
N1=length(E1) ; %N1=N-3*FrameLength
lp2=floor(N1/200);
E2=zeros(1,N1);
E2(1:lp2)=E1(1:lp2);
E2(N1-lp2:N1)=E1(N1-lp2:N1);
E3=ifft(E2); %平滑算法同過零率的平滑
%% energy normalization
E3=real(E3);
E3=E3./max(E3);
%% mark the voice part of speech by energy
%% be sure N1<N2
E4=zeros(1,N2);
for n=1:N1
if(E3(n)>energy_threshold)
E4(n)=1; %E4中保存的為判斷是否超能量門限的標志,超過置1,未超過置0(對前N1個元素)
end
end
v=E4&C3; %% both zero cross rate and energy decision
if(v(1)==1)%v(1)=1則起始點為濁音點
flag=1; %v(1)同時超能量門限和不超過零率門限則flag置1,否則置0
else
flag=0;
end
cross2=0;%cross2記錄v總共的過零次數
for n=2:N2
if(flag==1&v(n)==0)
flag=0;
cross2=cross2+1;
pt(cross2)=n;
end
if(flag==0&v(n)==1)
flag=1;
cross2=cross2+1;
pt(cross2)=n;
end
end %pt保存的是cross2分別為1、2……最開始的n值
T=[];
if(v(1)==1&(mod(cross2,2)==0)) %% High, even
b=1;
e=pt(1); %e為cross2等于1時的序號
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
pt(cross2+1)=N2; %% add a psudo point 加個偽點:序號為N2時等于cross2+1(因為是偶數)
for n=2:2:cross2
b=pt(n); %b為cross2等于n時的序號
e=pt(n+1); %b為cross2等于n+1時的序號
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
end
elseif(v(1)==1&(mod(cross2,2)==1)) %% High, odd
b=1;
e=pt(1);
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
for n=2:2:cross2-1
b=pt(n);
e=pt(n+1);
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
end
elseif(v(1)==0&(mod(cross2,2)==0)) %% Low, even
for n=1:2:cross2-1
b=pt(n);%起點
e=pt(n+1);%終點
if((e-b)>8*FrameLength)%8*FrameLength在Fs=16000時即為20ms幀長
T=[T [b,e]'];
end
end
elseif(v(1)==0&(mod(cross2,2)==1)) %% Low, odd
pt(cross2+1)=N2;
for n=1:2:cross2
b=pt(n);
e=pt(n+1);
if((e-b)>8*FrameLength)
T=[T [b,e]'];
end
end
end
%判斷兩組分割點的間隔
[M,N]=size(T);
flag=0;%表示最后一列未被存儲過
if N>1
T_temp=[];
for n=1:N-1
if T(1,n+1)-T(2,n)>8*FrameLength
T_temp=[T_temp,T(:,n)];
flag=0;
else
T_temp=[T_temp,[T(1,n),T(2,n+1)]'];
flag=1;
end
end
if flag==0
T_temp=[T_temp,T(:,n+1)];
end
T=T_temp;
end
%T %每一列應該是標注語音的始末點
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -