?? pitchdetect.cpp
字號(hào):
/*
//
// INTEL CORPORATION PROPRIETARY INFORMATION
// This software is supplied under the terms of a license agreement or
// nondisclosure agreement with Intel Corporation and may not be copied
// or disclosed except in accordance with the terms of that agreement.
// Copyright(c) 1999-2006 Intel Corporation. All Rights Reserved.
//
// Intel(R) Integrated Performance Primitives Speech Processing Sample for Windows*
//
// By downloading and installing this sample, you hereby agree that the
// accompanying Materials are being provided to you under the terms and
// conditions of the End User License Agreement for the Intel(R) Integrated
// Performance Primitives product previously accepted by you. Please refer
// to the file ippEULA.rtf located in the root directory of your Intel(R) IPP
// product installation for more information.
//
*/
#include <stdio.h>
#include <math.h>
#include <ipps.h>
#include <ippsr.h>
#include "pitchdetect.h"
short *DoFilter(const short *pInputBuffer, int nInLength){
int i;
short *pTmp = ippsMalloc_16s(nInLength);
ippsCopy_16s(pInputBuffer,pTmp,nInLength);
for(i=0; i < nInLength-4+1;i++){
pTmp[i]=(pTmp[i]+pTmp[i+1]+pTmp[i+2]+pTmp[i+3])>>2;
}
for(i=0; i < nInLength-4+1;i++){
pTmp[i]=(pTmp[i]+pTmp[i+1]+pTmp[i+2]+pTmp[i+3])>>2;
}
for(i=0; i < nInLength-5+1;i++){
pTmp[i]=(pTmp[i]+pTmp[i+1]+pTmp[i+2]+pTmp[i+3]+pTmp[i+4])/5;
}
return pTmp;
}
void DetectSilent(short *pInBuffer,unsigned char *pSilent,int VecSize){
float thresholdFactor = 0.01f;
float energyThreshold = 10000000.0f;
float DynamicEnergyThreshold=0.0f;
int crossnum = 45;
int startBCnt=0,stopBCnt=0;
short *pBuffer = pInBuffer;
unsigned char *pSilentWork=pSilent;
float energy = 0.0f;
ippsDotProd_16s32f(pBuffer,pBuffer,SILDETLEN,&energy);
int crossrate;
ippsSignChangeRate_16s(pBuffer,SILDETLEN,&crossrate);
DynamicEnergyThreshold += energy;
float threshold = IPP_MAX(DynamicEnergyThreshold*(float)thresholdFactor, (float)crossnum);
bool voiceFlag = false;
if ( energy*crossrate > energyThreshold*crossnum )
++startBCnt;
else
ippsZero_8u(pSilentWork,SILDETLEN>>1);
pBuffer+=SILDETLEN;
pSilentWork+=SILDETLEN;
for(int i=SILDETLEN;i<VecSize-SILDETLEN;pSilentWork+=SILDETLEN,pBuffer+=SILDETLEN,i+=SILDETLEN){
ippsDotProd_16s32f(pBuffer,pBuffer,SILDETLEN,&energy);
DynamicEnergyThreshold += energy;
ippsSignChangeRate_16s(pBuffer,SILDETLEN,&crossrate);
threshold = IPP_MAX(DynamicEnergyThreshold*(float)thresholdFactor, (float)crossnum);
if (!voiceFlag){
if ( energy*crossrate > energyThreshold*crossnum || crossrate >= crossnum){
if (++startBCnt >= STARTBLOCK) {
voiceFlag = true;
stopBCnt = 0;
}
}
else{
if (startBCnt<=1)
ippsZero_8u(pSilentWork-(SILDETLEN>>1),SILDETLEN);
else
ippsZero_8u(pSilentWork-SILDETLEN*(startBCnt)-(SILDETLEN>>1),SILDETLEN*startBCnt);
startBCnt = 0;
}
}
else if ( energy*crossrate < threshold*crossnum*2 || crossrate >= crossnum*2 || energy < threshold*2){
if (++stopBCnt >= STOPBLOCK){
startBCnt=0;
voiceFlag=false;
DynamicEnergyThreshold=0.0f;
}
}
else stopBCnt = 0;
}
}
int DetectPitch(short *pInBuffer,unsigned char *pSilent,int nInLength,_PitchStruct *pPitch){
int nMax = (int)MAX_PITCH;
int nMin = (int)MIN_PITCH;
int size = (nMax-nMin)*2;
float *pCoeff = ippsMalloc_32f(size);
int *pCandidate = ippsMalloc_32s(size);
unsigned char *pFlag = ippsMalloc_8u(size);
int nCountCandidate;
float max,beta;
int i=0,j=0,k=0;
int nPitch0=0;
float threshold;
bool unvoiced = true;
for(i=0,j=0; i<nInLength-2*nMax; i+=SAMPLE_SHIFT){
if(!pSilent[i])continue;
/* prepare threshold*/
if (unvoiced)
threshold = THIGH;
else
threshold = (TMIN > TMAX_RATIO * max) ? TMIN : TMAX_RATIO * max;
/* Find correlation coefficient*/
ippsCrossCorrCoeffDecim_16s32f( (&pInBuffer[i]), (&pInBuffer[i])+nMax,
nMax,nMin,pCoeff,STEP);
/* Find local maximum */
max = pCoeff[0];
bool lm=false;
float Maximum=0.0f;
nPitch0=0;
nCountCandidate=0;
for(k=1; k<(nMax-nMin)/SAMPLE_SHIFT-1; k++){
if( pCoeff[k] > max )
max=pCoeff[k];
if( pCoeff[k] > pCoeff[k-1] )
lm=true;
if( pCoeff[k] > Maximum && lm ){
Maximum = pCoeff[k];
if (Maximum >= threshold){
nPitch0 = nMin+k*SAMPLE_SHIFT;
}
}
if ( nPitch0 != 0 && pCoeff[k+1]<0.0) {
pFlag[nCountCandidate]=false;
pCandidate[nCountCandidate++] = nPitch0;
nPitch0 = 0;
Maximum = 0.0;
}
}
if(nCountCandidate==0){
unvoiced=true;
continue;
}
else
unvoiced = false;
float coefficient;
nPitch0 = pCandidate[0];
int head=0,tail=0;
bool headflag=true;
for(k=0;k<nCountCandidate;k++){
ippsCrossCorrCoeff_16s32f((&pInBuffer[i])+nMax, (&pInBuffer[i])+nMax+pCandidate[k],
pCandidate[k],&coefficient);
if(coefficient>threshold){
nPitch0 = pCandidate[k];
pFlag[k]=true;
if(headflag){
head = k;
headflag=false;
}
tail = k;
}
}
if(tail==0)tail = nCountCandidate-1;
float DPx;
nPitch0 = pCandidate[head];
ippsDotProd_16s32f((&pInBuffer[i])+nMax - nPitch0,(&pInBuffer[i])+nMax - nPitch0,
nPitch0, &DPx );
for(k=head;k<tail;k++){
if(pFlag[k]){
ippsCrossCorrCoeffPartial_16s32f((&pInBuffer[i])+nMax - pCandidate[tail],
(&pInBuffer[i])+nMax + pCandidate[k],pCandidate[tail], (float)(DPx),&coefficient);
if (k == head)
Maximum = coefficient;
else if (coefficient * TDH > Maximum) {
nPitch0 = pCandidate[k];
Maximum = coefficient;
break;
}
}
}
max = pCoeff[(nPitch0 - nMin)/SAMPLE_SHIFT];
/* define small region around peak */
int nPitch1,nPitch2;
nPitch1 = nPitch0 - 2 * STEP;
if(nPitch1-nMin<0)nPitch1 = nMin;
nPitch2 = nPitch0 + 2 * STEP;
ippsCrossCorrCoeffDecim_16s32f((&pInBuffer[i]),(&pInBuffer[i])+nMax,
nPitch2,nPitch1,&(pCoeff[nPitch1-nMin]),1);
int nMaxIdx;
ippsMaxIndx_32f(pCoeff+nPitch1 - nMin, nPitch2-nPitch1, &max, &nMaxIdx);
nPitch0 = nPitch1 + 1 + nMaxIdx;
/* found infinit part of pitch */
ippsCrossCorrCoeffInterpolation_16s32f((&pInBuffer[i])+nMax - nPitch0, (&pInBuffer[i]) + nMax, nPitch0,
&beta, &max);
if(beta<0.0){
beta=0.0f;
nPitch0--;
}
if(beta>1.0){
beta=0.0f;
nPitch0++;
}
/* found infinitive part of pitch - beta */
pPitch->Freq[j] = (float)(nPitch0 + beta);
pPitch->Frame[j++] = i/SAMPLE_SHIFT;
}
ippsFree(pCoeff);
ippsFree(pCandidate);
ippsFree(pFlag);
return j;
}
int PitchDetector(short *pInputBuffer, int nInLength, _PitchStruct *pPitch){
int i=0, k=0;
short *pInBuffer = DoFilter(pInputBuffer, nInLength);
unsigned char *pSilent = ippsMalloc_8u(nInLength);
ippsSet_8u(1,pSilent,nInLength);
DetectSilent(pInBuffer,pSilent,nInLength);
int CountPitch = DetectPitch(pInBuffer,pSilent,nInLength,pPitch);
ippsFree(pSilent);
ippsFree(pInBuffer);
bool flag=false;
int col=0;
for(i=0,k=0; i< CountPitch-1;i++){
if(pPitch->Frame[i]-pPitch->Frame[i+1]==-1){
flag=true;
col++;
pPitch->Frame[k]=pPitch->Frame[i];
pPitch->Freq[k++]=pPitch->Freq[i];
}else{
if(flag && col<100){
k=k-col;
}
col=0;
flag=false;
}
}
return k;
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -