?? segtofile(sen).c
字號:
p->m_tagType=nType;
}
/*50.地名識別*/
int PlaceRecognize(pSpan p,pDictionary dictCore,pDictionary placeDict)
{
int nStart=1,nEnd=1,i=1,nTemp;
double dPanelty=1.0;
/*各最佳標注循環*/
while(p->m_nBestTag[i]>-1)
{
if(p->m_nBestTag[i]==1)
{
nStart=i;
nEnd=nStart+1;
while(p->m_nBestTag[nEnd]==1)
{
if(nEnd>nStart+1)
dPanelty+=1.0;
nEnd++;
}
while(p->m_nBestTag[nEnd]==2)
nEnd++;
nTemp=nEnd;
while(p->m_nBestTag[nEnd]==3)
{
if(nEnd>nTemp)
dPanelty+=1.0;
nEnd++;
}
}
else if(p->m_nBestTag[i]==2)
{
dPanelty+=1.0;
nStart=i;
nEnd=nStart+1;
while(p->m_nBestTag[nEnd]==2)
nEnd++;
nTemp=nEnd;
while(p->m_nBestTag[nEnd]==3)
{
if(nEnd>nTemp)
dPanelty+=1.0;
nEnd++;
}
}
if(nEnd>nStart)
{
p->m_nUnknownWords[p->m_nUnknownIndex][0]=p->m_nWordPosition[nStart];
p->m_nUnknownWords[p->m_nUnknownIndex][1]=p->m_nWordPosition[nEnd];
p->m_dWordsPossibility[p->m_nUnknownIndex++]=ComputePossibility(p,nStart,nEnd-nStart+1,placeDict)+log(dPanelty);
nStart=nEnd;
}
if(i<nEnd)
i=nEnd;
else
i=i+1;
}
return TRUE;
}
/*51.位置標注,標注結構等*/
int POSTagging(pSpan p,PWORD_RESULT pWordItems,pDictionary dictCore,pDictionary dictUnknown)
{
int i=0,j,nStartPos;
Reset(p,FALSE);
while(i>-1&&pWordItems[i].sWord[0]!=0)
{
nStartPos=i;
i=GetFrom(p,pWordItems,nStartPos,dictCore,dictUnknown);
GetBestPOS(p);
switch(p->m_tagType)
{
case TT_NORMAL:
j=1;
while(p->m_nBestTag[j]!=-1&&j<p->m_nCurLength)
{
pWordItems[j+nStartPos-1].nHandle=p->m_nBestTag[j];
if(pWordItems[j+nStartPos-1].dValue>0&&IsExist(dictCore,pWordItems[j+nStartPos-1].sWord,-1))
pWordItems[j+nStartPos-1].dValue=GetFrequency(dictCore,pWordItems[j+nStartPos-1].sWord,p->m_nBestTag[j]);
j+=1;
}
break;
case TT_PERSON:
PersonRecognize(p,dictUnknown);
break;
case TT_PLACE:
case TT_TRANS_PERSON:
PlaceRecognize(p,dictCore,dictUnknown);
break;
default:
break;
}
Reset(p,TRUE);
}
return TRUE;
}
/*52.初始化標注結構*/
void ISpan(pSpan p)
{
/*100或0*/
if(p->m_tagType!=TT_NORMAL)
p->m_nTags[0][0]=100;
else
p->m_nTags[0][0]=0;
p->m_context=(pContextState)malloc(sizeof(struct ContextState));
IContextState(p->m_context);
p->m_nTags[0][1]=-1;
p->m_dFrequency[0][0]=0;
p->m_nCurLength=1;
p->m_nUnknownIndex=0;
p->m_nStartPos=0;
p->m_nWordPosition[1]=0;
p->m_sWords[0][0]=0;
p->m_tagType=TT_NORMAL;
}
/*53.原子切分,送入句子,生成切分圖*/
int AtomSegment(pSegGraph p,char *sSentence)
{
unsigned int i=0,j=0;
unsigned int nCurType,nNextType;
char sChar[3];
sChar[2]=0;
p->m_sAtom[j][0]=0;
p->m_nAtomLength[j]=0;
if(strncmp(sSentence,SENTENCE_BEGIN,strlen(SENTENCE_BEGIN))==0)
{
strcpy(p->m_sAtom[j],SENTENCE_BEGIN);
p->m_nAtomLength[j]=strlen(SENTENCE_BEGIN);
p->m_nAtomPOS[j]=CT_SENTENCE_BEGIN;
i+=p->m_nAtomLength[j];
j+=1;
p->m_sAtom[j][0]=0;
p->m_nAtomLength[j]=0;
}
/* 連接上首尾*/
while(i<strlen(sSentence))
{
if(strncmp(sSentence+i,SENTENCE_END,strlen(SENTENCE_END))==0)
{
strcpy(p->m_sAtom[j],SENTENCE_END);
p->m_nAtomLength[j]=strlen(SENTENCE_END);
p->m_nAtomPOS[j]=CT_SENTENCE_END;
i+=p->m_nAtomLength[j];
j+=1;
p->m_sAtom[j][0]=0;
p->m_nAtomLength[j]=0;
continue;
}
sChar[0]=*(sSentence+i);
sChar[1]=0;
i+=1;
if(sChar[0]<0)
{
sChar[1]=*(sSentence+i);
i+=1;
}
strcat(p->m_sAtom[j],sChar);
nCurType=charType((unsigned char *)sChar);
if(sChar[0]=='.'&&(charType((unsigned char *)sSentence+i)==CT_NUM||(*(sSentence+i)>='0'&&*(sSentence+i)<='9')))
nCurType=CT_NUM;
p->m_nAtomPOS[j]=nCurType;
if(nCurType==CT_CHINESE||nCurType==CT_INDEX||nCurType==CT_DELIMITER||nCurType==CT_OTHER)
{
p->m_nAtomLength[j]=strlen(p->m_sAtom[j]);
j+=1;
p->m_sAtom[j][0]=0;
}
else
{
nNextType=255;
if(i<strlen(sSentence))
nNextType=charType((unsigned char *)(sSentence+i));
if(nNextType!=nCurType||i==strlen(sSentence))
{
p->m_nAtomLength[j]=strlen(p->m_sAtom[j]);
j+=1;
p->m_sAtom[j][0]=0;
}
}
}
p->m_nAtomCount=j;
return TRUE;
}
/*54.產生詞節點,句子,切分圖,詞典*/
int GenerateWordNet(pSegGraph p,char *sSentence,pDictionary dictCore,int bOriginalFreq)
{
unsigned int j,i=0;
char sWord[WORD_MAXLENGTH]="",sWordMatch[WORD_MAXLENGTH];
int nHandleTemp,k,nPOS;
int nMatchFreq[20],nMatchHandle[20],nTotalFreq,nMatchCount;
double dValue=0;
p->m_nAtomCount=0;
if(p->m_segGraph){
SetEmpty(p->m_segGraph);
}
AtomSegment(p,sSentence);
for(i=0;i<p->m_nAtomCount;i++)
{
if(p->m_nAtomPOS[i]==CT_CHINESE)
{
if(!bOriginalFreq)
SetElement(p->m_segGraph,i,i+1,log(MAX_FREQUENCE),0,0);
else
SetElement(p->m_segGraph,i,i+1,0,0,p->m_sAtom[i]);
}
else
{
strcpy(sWord,p->m_sAtom[i]);
dValue=MAX_FREQUENCE;
switch(p->m_nAtomPOS[i])
{
case CT_INDEX:
case CT_NUM:
nPOS=-27904;
strcpy(sWord,"未##數");
dValue=0;
break;
case CT_DELIMITER:
nPOS=30464;
break;
case CT_LETTER:
nPOS=-'n'*256-'x';
dValue=0;
strcpy(sWord,"未##串");
break;
case CT_SINGLE:
if(GetCharCount("+-1234567890",p->m_sAtom[i])==(int)strlen(p->m_sAtom[i]))
{
nPOS=-27904;
strcpy(sWord,"未##數");
}
else
{
nPOS=-'n'*256-'x';
strcpy(sWord,"未##串");
}
dValue=0;
break;
default:
nPOS=p->m_nAtomPOS[i];
break;
}
if(!bOriginalFreq)
SetElement(p->m_segGraph,i,i+1,0,nPOS,0);
else
SetElement(p->m_segGraph,i,i+1,dValue,nPOS,sWord);
}
}
i=0;
while(i<p->m_nAtomCount)
{
strcpy(sWord,p->m_sAtom[i]);
j=i+1;
if(strcmp(sWord,"月")==0&&strcmp(p->m_sAtom[i+1],"份")==0)
j+=1;
while(j<=p->m_nAtomCount&&GetMaxMatch(dictCore,sWord,sWordMatch,&nHandleTemp))
{
if(strcmp(sWordMatch,sWord)==0)
{
nTotalFreq=0;
GetHandle(dictCore,sWord,&nMatchCount,nMatchHandle,nMatchFreq);
for(k=0;k<nMatchCount;k++)
{
nTotalFreq+=nMatchFreq[k];
}
if(strlen(sWord)==4&&i>=1&&(IsAllNum((unsigned char *)p->m_sAtom[i-1])
||IsAllChineseNum(p->m_sAtom[i-1]))&&(strncmp(sWord,"年",2)==0
||strncmp(sWord,"月",2)==0))
{
if(CC_Find("末內中底前間初",sWord+2))
break;
}
if(nMatchCount==1)
{
if(!bOriginalFreq)
SetElement(p->m_segGraph,i,j,-log(nTotalFreq+1)+log(MAX_FREQUENCE),nMatchHandle[0],0);
else
SetElement(p->m_segGraph,i,j,nTotalFreq,nMatchHandle[0],sWord);
}
else
{
if(!bOriginalFreq)
SetElement(p->m_segGraph,i,j,-log(nTotalFreq+1)+log(MAX_FREQUENCE),0,0);
else
SetElement(p->m_segGraph,i,j,nTotalFreq,0,sWord);
}
}
strcat(sWord,p->m_sAtom[j++]);
}
i+=1;
}
return TRUE;
}
/*55.釋放切分圖*/
void USegGraph(pSegGraph p)
{
free(p->m_segGraph);
p->m_segGraph=NULL;
}
/*56.初始化切分圖*/
void ISegGraph(pSegGraph p)
{
p->m_segGraph=(pDynamicArray)malloc(sizeof(struct DynamicArray));
IDynamicArray(p->m_segGraph,FALSE);
SetRowFirst(p->m_segGraph,TRUE);
}
/*57.最短路徑*/
int ShortPath(pNShortPath p)
{
unsigned int nPreNode,i,nIndex,nCurNode;
double eWeight;
PARRAY_CHAIN pEdgeList;
pEdgeList=(PARRAY_CHAIN)malloc(sizeof(ARRAY_CHAIN));
/*節點個數循環*/
for(nCurNode=1;nCurNode<p->m_nVertex;nCurNode++)
{
pQueue queWork;
queWork=(pQueue)malloc(sizeof(struct Queue));
IQueue(queWork);
/*從動態數組中相應的行列查找其元素的值*/
eWeight=GetElementValue(p->m_apCost,-1,nCurNode,NULL,&pEdgeList);
while(pEdgeList!=0 && pEdgeList->col==nCurNode)
{
nPreNode=pEdgeList->row;
eWeight=pEdgeList->value;
for(i=0;i<p->m_nValueKind;i++)
{
if(nPreNode>0)
{
if(p->m_pWeight[nPreNode-1][i]==INFINITE_VALUE)
break;
/*初始化隊列的各個節點指針,頭指針和p->m_pLastAccess指針*/
Push(queWork,nPreNode,i,eWeight+p->m_pWeight[nPreNode-1][i]);
}
else
{
Push(queWork,nPreNode,i,eWeight);
break;
}
}
pEdgeList=pEdgeList->next;
}
for(i=0;i<p->m_nValueKind;i++)
{
p->m_pWeight[nCurNode-1][i]=INFINITE_VALUE;
}
i=0;
while(i<p->m_nValueKind&&Pop(queWork,&nPreNode,&nIndex,&eWeight,TRUE,TRUE)!=-1)
{
if(p->m_pWeight[nCurNode-1][i]==INFINITE_VALUE)
p->m_pWeight[nCurNode-1][i]=eWeight;
else if(p->m_pWeight[nCurNode-1][i]<eWeight)
{
i++;
if(i==p->m_nValueKind)
break;
p->m_pWeight[nCurNode-1][i]=eWeight;
}
Push(&p->m_pParent[nCurNode-1][i],nPreNode,nIndex,0);
}
}
return 1;
}
/*58.獲取路徑*/
void GetPaths(pNShortPath p,unsigned int nNode,unsigned int nIndex,int **nResult,int bBest)
{
pQueue queResult;
int bFirstGet;
unsigned int nCurNode,nCurIndex,nParentNode;
unsigned int nParentIndex,nResultIndex=0;
queResult=(pQueue)malloc(sizeof(struct Queue));
IQueue(queResult);
if(p->m_nResultCount>=MAX_SEGMENT_NUM)
return ;
nResult[p->m_nResultCount][nResultIndex]=-1;
Push(queResult,nNode,nIndex,0);
nCurNode=nNode;
nCurIndex=nIndex;
while(!IsEmpty(queResult,FALSE))
{
while(nCurNode>0)
{
if(Pop(&p->m_pParent[nCurNode-1][nCurIndex],&nParentNode,&nParentIndex,0,FALSE,TRUE)!=-1)
{
nCurNode=nParentNode;
nCurIndex=nParentIndex;
}
if(nCurNode>0)
Push(queResult,nCurNode,nCurIndex,0);
}
if(nCurNode==0)
{
nResult[p->m_nResultCount][nResultIndex++]=nCurNode;
bFirstGet=TRUE;
nParentNode=nCurNode;
while(Pop(queResult,&nCurNode,&nCurIndex,0,FALSE,bFirstGet)!=-1)
{
nResult[p->m_nResultCount][nResultIndex++]=nCurNode;
bFirstGet=FALSE;
nParentNode=nCurNode;
}
nResult[p->m_nResultCount][nResultIndex]=-1;
p->m_nResultCount+=1;
if(p->m_nResultCount>=MAX_SEGMENT_NUM)
return ;
nResultIndex=0;
nResult[p->m_nResultCount][nResultIndex]=-1;
if(bBest)
return ;
}
Pop(queResult,&nCurNode,&nCurIndex,0,FALSE,TRUE);
while((IsEmpty(queResult,FALSE))==FALSE&&(IsSingle(&p->m_pParent[nCurNode-1][nCurIndex])||IsEmpty(&p->m_pParent[nCurNode-1][nCurIndex],TRUE)))
{
Pop(queResult,&nCurNode,&nCurIndex,0,TRUE,TRUE);
Pop(queResult,&nCurNode,&nCurIndex,0,FALSE,TRUE);
}
if(IsEmpty(queResult,FALSE)==FALSE&&IsEmpty(&p->m_pParent[nCurNo
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -