?? preprocess.c
字號:
{ if (st->S[i] > 20.f*st->Smin[i]+1000.f) active_bands+=1; } active_bands /= (freq_end-freq_start+1); if (active_bands > .2f) { float loudness=0.f; float rate, rate2=.2f; st->nb_loudness_adapt++; rate=2.0f/(1+st->nb_loudness_adapt); if (rate < .05f) rate = .05f; if (rate < .1f && pow(loudness, LOUDNESS_EXP) > st->loudness) rate = .1f; if (rate < .2f && pow(loudness, LOUDNESS_EXP) > 3.f*st->loudness) rate = .2f; if (rate < .4f && pow(loudness, LOUDNESS_EXP) > 10.f*st->loudness) rate = .4f; for (i=2;i<N;i++) { loudness += scale*st->ps[i] * st->gain2[i] * st->gain2[i] * st->loudness_weight[i]; } loudness=sqrt(loudness); /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) && loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ st->loudness = (1-rate)*st->loudness + (rate)*pow(loudness, LOUDNESS_EXP); st->loudness2 = (1-rate2)*st->loudness2 + rate2*pow(st->loudness, 1.0f/LOUDNESS_EXP); loudness = pow(st->loudness, 1.0f/LOUDNESS_EXP); /*fprintf (stderr, "%f %f %f\n", loudness, st->loudness2, rate);*/ } agc_gain = st->agc_level/st->loudness2; /*fprintf (stderr, "%f %f %f %f\n", active_bands, st->loudness, st->loudness2, agc_gain);*/ if (agc_gain>200) agc_gain = 200; for (i=0;i<N;i++) st->gain2[i] *= agc_gain; }static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x){ int i; int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; float *ps=st->ps; /* 'Build' input frame */ for (i=0;i<N3;i++) st->frame[i]=st->inbuf[i]; for (i=0;i<st->frame_size;i++) st->frame[N3+i]=x[i]; /* Update inbuf */ for (i=0;i<N3;i++) st->inbuf[i]=x[N4+i]; /* Windowing */ for (i=0;i<2*N;i++) st->frame[i] *= st->window[i]; /* Perform FFT */ spx_drft_forward(st->fft_lookup, st->frame); /* Power spectrum */ ps[0]=1; for (i=1;i<N;i++) ps[i]=1+st->frame[2*i-1]*st->frame[2*i-1] + st->frame[2*i]*st->frame[2*i];}static void update_noise_prob(SpeexPreprocessState *st){ int i; int N = st->ps_size; for (i=1;i<N-1;i++) st->S[i] = 100.f+ .8f*st->S[i] + .05f*st->ps[i-1]+.1f*st->ps[i]+.05f*st->ps[i+1]; if (st->nb_preprocess<1) { for (i=1;i<N-1;i++) st->Smin[i] = st->Stmp[i] = st->S[i]+100.f; } if (st->nb_preprocess%200==0) { for (i=1;i<N-1;i++) { st->Smin[i] = min(st->Stmp[i], st->S[i]); st->Stmp[i] = st->S[i]; } } else { for (i=1;i<N-1;i++) { st->Smin[i] = min(st->Smin[i], st->S[i]); st->Stmp[i] = min(st->Stmp[i], st->S[i]); } } for (i=1;i<N-1;i++) { st->update_prob[i] *= .2f; if (st->S[i] > 2.5*st->Smin[i]) st->update_prob[i] += .8f; /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/ /*fprintf (stderr, "%f ", st->update_prob[i]);*/ }}#define NOISE_OVERCOMPENS 1.4int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo){ int i; int is_speech=1; float mean_post=0; float mean_prior=0; int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; float scale=.5f/N; float *ps=st->ps; float Zframe=0, Pframe; preprocess_analysis(st, x); update_noise_prob(st); st->nb_preprocess++; /* Noise estimation always updated for the 20 first times */ if (st->nb_adapt<10) { update_noise(st, ps, echo); } /* Deal with residual echo if provided */ if (echo) for (i=1;i<N;i++) st->echo_noise[i] = (.3f*st->echo_noise[i] + st->frame_size*st->frame_size*4.0*echo[i]); /* Compute a posteriori SNR */ for (i=1;i<N;i++) { float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i]; st->post[i] = ps[i]/tot_noise - 1.f; if (st->post[i]>100.f) st->post[i]=100.f; /*if (st->post[i]<0) st->post[i]=0;*/ mean_post+=st->post[i]; } mean_post /= N; if (mean_post<0.f) mean_post=0.f; /* Special case for first frame */ if (st->nb_adapt==1) for (i=1;i<N;i++) st->old_ps[i] = ps[i]; /* Compute a priori SNR */ { /* A priori update rate */ for (i=1;i<N;i++) { float gamma = .1+.9*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i])); float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i]; /* A priori SNR update */ st->prior[i] = gamma*max(0.0f,st->post[i]) + (1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]); if (st->prior[i]>100.f) st->prior[i]=100.f; mean_prior+=st->prior[i]; } } mean_prior /= N;#if 0 for (i=0;i<N;i++) { fprintf (stderr, "%f ", st->prior[i]); } fprintf (stderr, "\n");#endif /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/ if (st->nb_preprocess>=20) { int do_update = 0; float noise_ener=0, sig_ener=0; /* If SNR is low (both a priori and a posteriori), update the noise estimate*/ /*if (mean_prior<.23 && mean_post < .5)*/ if (mean_prior<.23f && mean_post < .5f) do_update = 1; for (i=1;i<N;i++) { noise_ener += st->noise[i]; sig_ener += ps[i]; } if (noise_ener > 3.f*sig_ener) do_update = 1; /*do_update = 0;*/ if (do_update) { st->consec_noise++; } else { st->consec_noise=0; } } if (st->vad_enabled) is_speech = speex_compute_vad(st, ps, mean_prior, mean_post); if (st->consec_noise>=3) { update_noise(st, st->old_ps, echo); } else { for (i=1;i<N-1;i++) { if (st->update_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/) { if (echo) st->noise[i] = .95f*st->noise[i] + .05f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*4.0*echo[i]); else st->noise[i] = .95f*st->noise[i] + .05f*st->ps[i]; } } } for (i=1;i<N;i++) { st->zeta[i] = .7f*st->zeta[i] + .3f*st->prior[i]; } { int freq_start = (int)(300.0f*2.f*N/st->sampling_rate); int freq_end = (int)(2000.0f*2.f*N/st->sampling_rate); for (i=freq_start;i<freq_end;i++) { Zframe += st->zeta[i]; } Zframe /= (freq_end-freq_start); } st->Zlast = Zframe; Pframe = qcurve(Zframe); /*fprintf (stderr, "%f\n", Pframe);*/ /* Compute gain according to the Ephraim-Malah algorithm */ for (i=1;i<N;i++) { float MM; float theta; float prior_ratio; float p, q; float zeta1; float P1; prior_ratio = st->prior[i]/(1.0001f+st->prior[i]); theta = (1.f+st->post[i])*prior_ratio; if (i==1 || i==N-1) zeta1 = st->zeta[i]; else zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1]; P1 = qcurve (zeta1); /* FIXME: add global prob (P2) */ q = 1-Pframe*P1; q = 1-P1; if (q>.95f) q=.95f; p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta)); /*p=1;*/ /* Optimal estimator for loudness domain */ MM = hypergeom_gain(theta); st->gain[i] = prior_ratio * MM; /*Put some (very arbitraty) limit on the gain*/ if (st->gain[i]>2.f) { st->gain[i]=2.f; } st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i]; if (st->denoise_enabled) { st->gain2[i] = p*p*st->gain[i]; /*st->gain2[i]=(p*sqrt(st->gain[i])+.05*(1-p))*(p*sqrt(st->gain[i])+.05*(1-p));*/ /*st->gain2[i] = pow(st->gain[i], p) * pow(.2f,1.f-p);*/ } else { st->gain2[i]=1.f; } } st->gain2[0]=st->gain[0]=0.f; st->gain2[N-1]=st->gain[N-1]=0.f; /* for (i=30;i<N-2;i++) { st->gain[i] = st->gain2[i]*st->gain2[i] + (1-st->gain2[i])*.333*(.6*st->gain2[i-1]+st->gain2[i]+.6*st->gain2[i+1]+.4*st->gain2[i-2]+.4*st->gain2[i+2]); } for (i=30;i<N-2;i++) st->gain2[i] = st->gain[i]; */ if (st->agc_enabled) speex_compute_agc(st, mean_prior);#if 0 if (!is_speech) { for (i=0;i<N;i++) st->gain2[i] = 0; }#if 0 else { for (i=0;i<N;i++) st->gain2[i] = 1; }#endif#endif /* Apply computed gain */ for (i=1;i<N;i++) { st->frame[2*i-1] *= st->gain2[i]; st->frame[2*i] *= st->gain2[i]; } /* Get rid of the DC and very low frequencies */ st->frame[0]=0; st->frame[1]=0; st->frame[2]=0; /* Nyquist frequency is mostly useless too */ st->frame[2*N-1]=0; /* Inverse FFT with 1/N scaling */ spx_drft_backward(st->fft_lookup, st->frame); for (i=0;i<2*N;i++) st->frame[i] *= scale; { float max_sample=0; for (i=0;i<2*N;i++) if (fabs(st->frame[i])>max_sample) max_sample = fabs(st->frame[i]); if (max_sample>28000.f) { float damp = 28000.f/max_sample; for (i=0;i<2*N;i++) st->frame[i] *= damp; } } for (i=0;i<2*N;i++) st->frame[i] *= st->window[i]; /* Perform overlap and add */ for (i=0;i<N3;i++) x[i] = st->outbuf[i] + st->frame[i]; for (i=0;i<N4;i++) x[N3+i] = st->frame[N3+i]; /* Update outbuf */ for (i=0;i<N3;i++) st->outbuf[i] = st->frame[st->frame_size+i]; /* Save old power spectrum */ for (i=1;i<N;i++) st->old_ps[i] = ps[i]; return is_speech;}void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo){ int i; int N = st->ps_size; int N3 = 2*N - st->frame_size; float *ps=st->ps; preprocess_analysis(st, x); update_noise_prob(st); st->nb_preprocess++; for (i=1;i<N-1;i++) { if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i]) { if (echo) st->noise[i] = .95f*st->noise[i] + .1f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*4.0*echo[i]); else st->noise[i] = .95f*st->noise[i] + .1f*st->ps[i]; } } for (i=0;i<N3;i++) st->outbuf[i] = x[st->frame_size-N3+i]*st->window[st->frame_size+i]; /* Save old power spectrum */ for (i=1;i<N;i++) st->old_ps[i] = ps[i]; for (i=1;i<N;i++) st->reverb_estimate[i] *= st->reverb_decay;}int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr){ int i; SpeexPreprocessState *st; st=(SpeexPreprocessState*)state; switch(request) { case SPEEX_PREPROCESS_SET_DENOISE: st->denoise_enabled = (*(int*)ptr); break; case SPEEX_PREPROCESS_GET_DENOISE: (*(int*)ptr) = st->denoise_enabled; break; case SPEEX_PREPROCESS_SET_AGC: st->agc_enabled = (*(int*)ptr); break; case SPEEX_PREPROCESS_GET_AGC: (*(int*)ptr) = st->agc_enabled; break; case SPEEX_PREPROCESS_SET_AGC_LEVEL: st->agc_level = (*(float*)ptr); if (st->agc_level<1) st->agc_level=1; if (st->agc_level>32768) st->agc_level=32768; break; case SPEEX_PREPROCESS_GET_AGC_LEVEL: (*(float*)ptr) = st->agc_level; break; case SPEEX_PREPROCESS_SET_VAD: st->vad_enabled = (*(int*)ptr); break; case SPEEX_PREPROCESS_GET_VAD: (*(int*)ptr) = st->vad_enabled; break; case SPEEX_PREPROCESS_SET_DEREVERB: st->dereverb_enabled = (*(int*)ptr); for (i=0;i<st->ps_size;i++) st->reverb_estimate[i]=0; break; case SPEEX_PREPROCESS_GET_DEREVERB: (*(int*)ptr) = st->dereverb_enabled; break; case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL: st->reverb_level = (*(float*)ptr); break; case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL: (*(float*)ptr) = st->reverb_level; break; case SPEEX_PREPROCESS_SET_DEREVERB_DECAY: st->reverb_decay = (*(float*)ptr); break; case SPEEX_PREPROCESS_GET_DEREVERB_DECAY: (*(float*)ptr) = st->reverb_decay; break; case SPEEX_PREPROCESS_SET_PROB_START: st->speech_prob_start = (*(int*)ptr) / 100.0; if ( st->speech_prob_start > 1 || st->speech_prob_start < 0 ) st->speech_prob_start = SPEEX_PROB_START_DEFAULT; break; case SPEEX_PREPROCESS_GET_PROB_START: (*(int*)ptr) = st->speech_prob_start * 100; break; case SPEEX_PREPROCESS_SET_PROB_CONTINUE: st->speech_prob_continue = (*(int*)ptr) / 100.0; if ( st->speech_prob_continue > 1 || st->speech_prob_continue < 0 ) st->speech_prob_continue = SPEEX_PROB_CONTINUE_DEFAULT; break; case SPEEX_PREPROCESS_GET_PROB_CONTINUE: (*(int*)ptr) = st->speech_prob_continue * 100; break; default: speex_warning_int("Unknown speex_preprocess_ctl request: ", request); return -1; } return 0;}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -