?? encoder.c
字號:
frameEn = 0.5f + 23.0831f * (sum-4.15888f);
if( ((frameEn-vad->Eloglowtrack) < SNR_THRESHOLD_UPD_LTE) || (tc < MIN_FRAME) ){
if( (frameEn<vad->Eloglowtrack) || ((tc) < MIN_FRAME))
vad->Eloglowtrack = vad->Eloglowtrack + (1 - lambdaLTE )*( frameEn - vad->Eloglowtrack );
else
vad->Eloglowtrack = vad->Eloglowtrack + (1.0f - (float)lambdaLTEhigherE )*( frameEn - vad->Eloglowtrack );
if (vad->Eloglowtrack < ENERGY_FLOOR)vad->Eloglowtrack = ENERGY_FLOOR;
}
if(tc>4){
if( ( (frameEn-vad->Eloglowtrack) > SNR_THRESHOLD_VAD) ){
vad->nbSpeechFrame++;
vad->flagVAD = 1;
}
else {
if( vad->nbSpeechFrame > MIN_SPEECH_FRAME_HANGOVER)
vad->hangOver = HANGOVER;
vad->nbSpeechFrame = 0;
if(vad->hangOver!=0){
vad->hangOver--;
vad->flagVAD = 1;
}
else
vad->flagVAD = 0;
}
}
return vad->flagVAD;
}
/* Perform noise reduction */
void DoNoiseReduction(AuroraEncoder *pCodec,float *pBuffer, int tc, float *pOutFFT, float *pOutputBuffer, int *tcur,
float *pOutMel, float *pOutDen,char flagVAD){
int i;
float lambdaNSE;
float pFFT[FFTSIZE+2];
float pPSD[FFTSPEC];
float pNoise[FFTSPEC];
float pDen[FFTSPEC];
float pDataOut[200];
/* Gain factorization */
float SNRlow = 0.0f;
float agf=0.0f;
float Enoise;
float SNRaver;
float lsnr;
/* Mel filter-bank */
float pMelBank[NUM_CHANNELS+2];
float phWF[(NUM_CHANNELS+1)*2+1];
float phWFtrunc[FL+1];
float *pState2 = pBuffer+320;
float *pState1 = pState2+320;
/*!!!! FIRST STAGE !!!!*/
/* Spectrum estimation */
ippsMul_32f(pCodec->noise.winHannCoeff, pState1+60,pFFT, WINHANNSIZE);
ippsZero_32f(pFFT+WINHANNSIZE, FFTSIZE-WINHANNSIZE);
ippsFFTFwd_RToPerm_32f(pFFT,pFFT,pCodec->noise.ctxFFT,NULL);
ippsSmoothedPowerSpectrumAurora_32f(pFFT,pFFT,FFTSIZE);
ippsCopy_32f(pFFT,pOutFFT,FFTSIZE);
/* Power spectral density mean */
ippsWeightedSum_32f(pFFT,pCodec->noise.pPSDprev_1,pPSD,FFTSPEC,1.0f,1.0f);
ippsCopy_32f(pFFT,pCodec->noise.pPSDprev_1,FFTSPEC);
ippsSqrt_32f_I(pPSD,FFTSPEC);
ippsSqrt_32f_I(pFFT,FFTSPEC);
/* Wiener filter design */
if( tc+1 < NB_FRAME_THRESHOLD_NSE)
lambdaNSE = 1 - 1.0f/(tc);
else
lambdaNSE = LAMBDA_NSE;
if(!flagVAD){
ippsWeightedSum_32f(pCodec->noise.pNoiseTnPrev_1,pPSD,pCodec->noise.pNoiseTn,FFTSPEC,lambdaNSE,(1-lambdaNSE));
ippsThreshold_32f_I(pCodec->noise.pNoiseTn,FFTSPEC,EPS,ippCmpLess);
}
ippsCopy_32f(pCodec->noise.pNoiseTn,pNoise,FFTSPEC);
ippsCopy_32f(pCodec->noise.pNoiseTn,pCodec->noise.pNoiseTnPrev_1,FFTSPEC);
ippsWienerFilterDesign_Aurora_32f(pPSD,pNoise,pCodec->noise.pDenPrev_1,pDen,FFTSPEC);
ippsMul_32f(pFFT, pDen, pCodec->noise.pDenPrev_1, FFTSPEC);
pCodec->noise.Eden2 = pCodec->noise.Eden1;pCodec->noise.Eden1 = pCodec->noise.Eden;
ippsSum_32f(pCodec->noise.pDenPrev_1,FFTSPEC,&pCodec->noise.Eden,ippAlgHintAccurate);
/* Mel filter-bank */
ippsEvalFBank_32f(pDen,pMelBank,pCodec->noise.pFBank);
ippsCopy_32f(pDen,pOutDen,FFTSPEC);
ippsCopy_32f(pMelBank,pOutMel,NUM_CHANNELS+2);
/* Mel IDCT */
ippsMatVecMul_32f_D2L((const float**)pCodec->noise.pIDCT,pMelBank,NUM_CHANNELS+2,phWF,10);
/* Apply filter and forming second buffer */
ippsTabsCalculation_Aurora_32f(phWF,phWFtrunc);
ippsMul_32f_I(pCodec->noise.winHannFilter, phWFtrunc, FL);
ippsResidualFilter_Aurora_32f(pState1+FRAMESHIFT-8,pState2+FRAMESHIFT*3,phWFtrunc);
/*!!! Start 2nd state !!!*/
if(tc>2){
/* Spectrum estimation 2 */
ippsMul_32f(pCodec->noise.winHannCoeff, pState2+60,pFFT, WINHANNSIZE);
ippsZero_32f(pFFT+WINHANNSIZE, FFTSIZE-WINHANNSIZE);
ippsFFTFwd_RToPerm_32f(pFFT,pFFT,pCodec->noise.ctxFFT,NULL);
ippsSmoothedPowerSpectrumAurora_32f(pFFT,pFFT,FFTSIZE);
/* Power spectral density mean 2 */
ippsWeightedSum_32f(pFFT,pCodec->noise.pPSDprev_2,pPSD,FFTSPEC,1.0f,1.0f);
ippsCopy_32f(pFFT,pCodec->noise.pPSDprev_2,FFTSPEC);
ippsSqrt_32f_I(pFFT,FFTSPEC);
/* Wiener filter design 2*/
if( tc+1 < NB_FRAME_THRESHOLD_NSE)
lambdaNSE = 1 - 1.0f/(tc-2);
else
lambdaNSE = LAMBDA_NSE;
if( tc-2 < 11) {
lambdaNSE = 1 - 1.0f/(tc-2);
ippsWeightedSum_32f(pCodec->noise.pNoisePrev_2,pPSD,pNoise,FFTSPEC,lambdaNSE,(1-lambdaNSE));
}else{
ippsNoiseSpectrumUpdate_Aurora_32f(pPSD,pCodec->noise.pNoisePrev_2,pNoise,FFTSPEC);
}
ippsCopy_32f(pNoise,pCodec->noise.pNoisePrev_2,FFTSPEC);
ippsSqrt_32f_I(pPSD,FFTSPEC);
ippsSqrt_32f_I(pNoise,FFTSPEC);
ippsWienerFilterDesign_Aurora_32f(pPSD,pNoise,pCodec->noise.pDenPrev_2,pDen,FFTSPEC);
ippsMul_32f(pFFT, pDen, pCodec->noise.pDenPrev_2, FFTSPEC);
/* Mel filter-bank 2 */
ippsEvalFBank_32f(pDen,pMelBank,pCodec->noise.pFBank);
/* Gain factorization */
ippsSum_32f(pNoise,FFTSPEC,&Enoise,ippAlgHintAccurate);
SNRaver = (pCodec->noise.Eden2 * pCodec->noise.Eden1 * pCodec->noise.Eden)/(Enoise * Enoise * Enoise);
if (SNRaver > 0.00001)
SNRaver = (float)((20 * log10 (SNRaver)) / 3.0);
else
SNRaver = -33.3333f;
if( ((SNRaver-pCodec->noise.SNRlowPrev)<10.0f) || (tc-2) < 10){
if((tc-2)<10)
lsnr = 1.0f - 1.0f/(tc-2);
else{
if(SNRaver < SNRlow)
lsnr = 0.95f;
else
lsnr = 0.99f;
}
SNRlow = lsnr * pCodec->noise.SNRlowPrev + (1-lsnr) * SNRaver;
pCodec->noise.SNRlowPrev = SNRlow;
}
if(SNRaver < (SNRlow+3.5f)){
agf = pCodec->noise.agfPrev+0.15f;
if(agf > 0.8)
agf = 0.8f;
}else{
agf = pCodec->noise.agfPrev-0.3f;
if(agf < 0.1f)agf = 0.1f;
}
pCodec->noise.agfPrev = agf;
for (i=0; i <=NUM_CHANNELS+1; i++){
pMelBank[i] = 1.0f + agf * (pMelBank[i]-1.0f);
}
/* Mel IDCT 2 */
ippsMatVecMul_32f_D2L((const float**)pCodec->noise.pIDCT,pMelBank,NUM_CHANNELS+2,phWF,10);
/* Apply filter 2 */
ippsTabsCalculation_Aurora_32f(phWF,phWFtrunc);
ippsMul_32f_I(pCodec->noise.winHannFilter, phWFtrunc, FL);
ippsResidualFilter_Aurora_32f(pState2+FRAMESHIFT-8,pBuffer+FRAMESHIFT*2,phWFtrunc);
/* Offset compensation */
ippsCompensateOffset_32f_I(pBuffer+FRAMESHIFT*2,FRAMESHIFT,
&(pCodec->noise.pSrc0),pCodec->noise.Dst0,0.9990234375f);
pCodec->noise.Dst0 = pBuffer[3*FRAMESHIFT-1];
/* Waveform Processing */
if(tc > 4){
ippsWaveProcessing_Aurora_32f(pBuffer,pDataOut);
/* output result */
ippsCopy_32f(pDataOut,pOutputBuffer,FRAMESHIFT*4);
(*tcur)++;
}
}
}
void ResetAuroraEncoder(AuroraEncoder *pCodec){
pCodec->Dst0=0.0;
pCodec->preFloat=0.0;
pCodec->pSrc0=0.0;
pCodec->curPosition = 0;
pCodec->iCountFrame = 0;
pCodec->mframeCounter = 1;
pCodec->pFrame->pos=6;
pCodec->NumberFrame = 0;
pCodec->VadFrame = 0;
ippsZero_32f(pCodec->bias,NUM_CEP_COEFF-1);
ippsZero_8u(pCodec->pIndexVQBuffer,2*NUM_CODEBOOK);
/* For noice reduction block */
ippsZero_32f(pCodec->noise.pPSDprev_1,FFTSPEC);
ippsZero_32f(pCodec->noise.pPSDprev_2,FFTSPEC);
ippsZero_32f(pCodec->noise.pDenPrev_1,FFTSPEC);
ippsZero_32f(pCodec->noise.pDenPrev_2,FFTSPEC);
ippsSet_32f((float)EPS,pCodec->noise.pNoiseTnPrev_1,FFTSPEC);
ippsSet_32f((float)EPS*EPS,pCodec->noise.pNoisePrev_2,FFTSPEC);
ippsSet_32f((float)EPS,pCodec->noise.pNoiseTn,FFTSPEC);
ippsZero_32f(pCodec->noise.pBStateBuf, 960);
ippsZero_32f(pCodec->noise.pBStateBig,640+FILTERLENGTH);
pCodec->noise.Eden=pCodec->noise.Eden1=pCodec->noise.Eden2=0.0f;
pCodec->noise.SNRlowPrev=0.0f;
pCodec->noise.agfPrev=0.8f;
pCodec->noise.pSrc0 = pCodec->noise.Dst0=0.0f;
pCodec->noise.Cur_TPrpocess=0;
pCodec->noise.tail=0;
pCodec->noise.start=0;
pCodec->noise.end=0;
pCodec->noise.vad.nbSpeechFrame=0;
pCodec->noise.vad.Eloglowtrack=0;
pCodec->noise.vad.flagVAD=0;
pCodec->noise.vad.hangOver = 0;
ippsZero_32f(pCodec->noise.s16.dataHP,560);
/* for 16Khz processing */
pCodec->noise.s16.vad16.nbSpeechFrame=0;
pCodec->noise.s16.vad16.Eloglowtrack=0;
pCodec->noise.s16.vad16.flagVAD=0;
pCodec->noise.s16.vad16.hangOver = 0;
pCodec->noise.s16.vad16.pNoise16[0]=0.0f;
pCodec->noise.s16.vad16.pNoise16[1]=0.0f;
pCodec->noise.s16.vad16.pNoise16[2]=0.0f;
pCodec->s11.time = (Ipp64f)pCodec->s11.history;
pCodec->s11.lastread = pCodec->s11.history;
ippsZero_16s((short*)pCodec->s11.p11,+pCodec->s11.len);
}
int InitAuroraEncoder(AuroraEncoder **pCodec, AuroraRate SamplingFrequency,AuroraDataType EncoderInput,AuroraDataType EncoderOutput){
IppStatus status;
int FrameShift=0;
int FrameLength=0;
float startingFeq=0,Frequency;
int i,j,k,n;
float *qCoeffLog, *qCoeff,*qCoeffVad;
float pLiftCoeff[NUM_CEP_COEFF-1] = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,};
float pFCentres[NUM_CHANNELS+2+2];
float norm;
float pDFK[NUM_CHANNELS+2+2];
float pCoeff[65];
float *pTmp;
int pCenters[NUM_CHANNELS+2+2];
/* float pLiftCoeff[NUM_CEP_COEFF-1] = {0.740585f,1.18330f,1.60780f,2.00544f,2.36814f,2.68850f,
2.96001f,3.17714f,3.33547f,3.43178f,3.46410f,3.43178f};
*/
pCodec[0] = (AuroraEncoder*)malloc(sizeof(AuroraEncoder));
if (pCodec[0]==NULL) return -1;
status = ippStsNoErr;
pCodec[0]->pFBank = NULL;
pCodec[0]->ctxFFT = NULL;
pCodec[0]->EncoderInput = EncoderInput;
pCodec[0]->pDCTLifter = NULL;
pCodec[0]->workBuffer = NULL;
pCodec[0]->ppCdbkState = NULL;
pCodec[0]->pFeatBuffer = NULL;
pCodec[0]->pFeatCyclicBuf = NULL;
pCodec[0]->HammingWindow = NULL;
pCodec[0]->pFeatBuffer = NULL;
pCodec[0]->pFeatCyclicBuf = NULL;
ippsVADGetBufSize_Aurora_32f(&n);
pCodec[0]->pVadMem = NULL;
pCodec[0]->VadFrame = 0;
pCodec[0]->iCountFrame = 0;
pCodec[0]->mframeCounter = 1;
pCodec[0]->NumberFrame = 0;
pCodec[0]->pFrame = NULL;
pCodec[0]->s11.ppState = NULL;
pCodec[0]->s11.p11 = NULL;
pCodec[0]->noise.ctxFFT = NULL;
pCodec[0]->noise.pFBank = NULL;
pCodec[0]->noise.pIDCT = NULL;
pCodec[0]->noise.s16.pFBank16 = NULL;
pCodec[0]->pFrame = (MFrame*)ippsMalloc_8u(sizeof(MFrame));
if(pCodec[0]->pFrame==NULL){
ReleaseAuroraEncoder(pCodec[0]);
return -1;
}
pCodec[0]->pFeatBuffer = ippsMalloc_32f(NUM_CEP_COEFF+1);
if(pCodec[0]->pFeatBuffer==NULL){
ReleaseAuroraEncoder(pCodec[0]);
return -1;
}
pCodec[0]->pFeatCyclicBuf = ippsMalloc_32f((NUM_CEP_COEFF+1)*7);
if(pCodec[0]->pFeatCyclicBuf==NULL){
ReleaseAuroraEncoder(pCodec[0]);
return -1;
}
pCodec[0]->pVadMem = ippsMalloc_8u(n);
if(pCodec[0]->pVadMem==NULL){
ReleaseAuroraEncoder(pCodec[0]);
return -1;
}
if(ippsVADInit_Aurora_32f(pCodec[0]->pVadMem)!=ippStsNoErr){
ReleaseAuroraEncoder(pCodec[0]);
return -1;
}
pCodec[0]->s11.history = 33;
pCodec[0]->s11.len = (pCodec[0]->s11.history+3)*2;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -