?? childfrm.cpp
字號(hào):
// ChildFrm.cpp : implementation of the CChildFrame class
//
#include "stdafx.h"
#include "PosTag.h"
#include "ChildFrm.h"
#include "MainFrm.h"
#include "PosTagDoc.h"
#include "PosTagView.h"
#include "test.h"
#include "Mtnodelist.h"
#include "MTNodeStu.h"
#include "math.h"
#include "Bplus.h"
#include "setnumdialog.h"
//CString CateChangePenn(char Test[10]);
extern SentenceType g_objMorSent;
extern DictNode* WordRestore(char *inputword, int nSearchMod);
int sentenceNum=1;
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
/////////////////////////////////////////////////////////////////////////////
// CChildFrame
IMPLEMENT_DYNCREATE(CChildFrame, CMDIChildWnd)
BEGIN_MESSAGE_MAP(CChildFrame, CMDIChildWnd)
//{{AFX_MSG_MAP(CChildFrame)
ON_COMMAND(ID_POSTAG, OnPostag)
//}}AFX_MSG_MAP
END_MESSAGE_MAP()
/////////////////////////////////////////////////////////////////////////////
// CChildFrame construction/destruction
CChildFrame::CChildFrame()
{
// TODO: add member initialization code here
}
CChildFrame::~CChildFrame()
{
}
BOOL CChildFrame::PreCreateWindow(CREATESTRUCT& cs)
{
// TODO: Modify the Window class or styles here by modifying
// the CREATESTRUCT cs
return CMDIChildWnd::PreCreateWindow(cs);
}
/////////////////////////////////////////////////////////////////////////////
// CChildFrame diagnostics
#ifdef _DEBUG
void CChildFrame::AssertValid() const
{
CMDIChildWnd::AssertValid();
}
void CChildFrame::Dump(CDumpContext& dc) const
{
CMDIChildWnd::Dump(dc);
}
#endif //_DEBUG
/////////////////////////////////////////////////////////////////////////////
// CChildFrame message handlers
bool CChildFrame::SearchWordInit1(LPSTR pszSysDicIndexName,
LPSTR pszHuffmanFreqDataName)
{
char szSysDictIndexName[MAX_FILENAME_LEN]; // 索引文件名
char szSysDictDtaName[MAX_FILENAME_LEN]; // 二級(jí)索引文件名
char szSysDictDatName[MAX_FILENAME_LEN]; // 數(shù)據(jù)文件名
//yys 98.5.22 Bgn
char szAddtionDictIndexName[MAX_FILENAME_LEN]; // 索引文件名
char szAddtionDictDtaName[MAX_FILENAME_LEN]; // 二級(jí)索引文件名
char szAddtionDictDatName[MAX_FILENAME_LEN]; // 數(shù)據(jù)文件名
char pszAddtionDicIndexName[] = "DictRes\\AddDict"; //系統(tǒng)附加詞典名
DictSearch qsearch;
int found=0;
strcpy(szSysDictIndexName,pszSysDicIndexName);
strcat(szSysDictIndexName,".idx");
strcpy(szAddtionDictIndexName,pszAddtionDicIndexName);
strcat(szAddtionDictIndexName,".idx");
strcpy(szSysDictDtaName,pszSysDicIndexName);
strcat(szSysDictDtaName,".dta");
strcpy(szAddtionDictDtaName,pszAddtionDicIndexName);
strcat(szAddtionDictDtaName,".dta");
strcpy(szSysDictDatName,pszSysDicIndexName);
strcat(szSysDictDatName,".dat");
strcpy(szAddtionDictDatName,pszAddtionDicIndexName);
strcat(szAddtionDictDatName,".dat");
if ( (( (CPosTagApp* ) AfxGetApp() ) ->m_fpSysIndexDat = fopen(szSysDictDatName,"r+b") ) == NULL ||
(( (CPosTagApp* ) AfxGetApp() ) ->m_fpAddtionIndexDat = fopen(szAddtionDictDatName,"r+b") ) == NULL ) {
CString strMsg;
strMsg.Format("Cann't open file %s or %s ! ",
szSysDictDatName,szAddtionDictDatName);
AfxMessageBox(strMsg);
return FALSE;
}
( (CPosTagApp* ) AfxGetApp() ) ->m_obSysDiction = new Dictionary(szSysDictDtaName,szSysDictIndexName,0);
( (CPosTagApp* ) AfxGetApp() ) ->m_obAddtionDiction = new Dictionary(szAddtionDictDtaName,szAddtionDictIndexName,0);
found=qsearch.LoadSysHushTableFromIndexDat(( (CPosTagApp* ) AfxGetApp() ) ->m_fpSysIndexDat,( (CPosTagApp* ) AfxGetApp() ) ->m_fpAddtionIndexDat);
if(found==0)
return FALSE;
return TRUE;
}
void CChildFrame::OnPostag()
{
// TODO: Add your command handler code here
if ( SearchWordInit1("DictRes\\EcDict","DictRes\\HuffFreq.dat") == TRUE )
{
bIsInit=TRUE;
}
if ( !bIsInit )
{
AfxMessageBox("Please Initializing!");
return;
}
char szSour[3000];
CPosTagView* pView;
int nCurrLineIndex;
CString strLine;
int nLen,Len;
// char Re_CompareFileName[]="cdqprg\\re_compare.txt"; //44個(gè)詞性標(biāo)注符號(hào)
// FILE* fpOutput1;
fpOutput1=fopen(Re_CompareFileName,"wb");
if ( fpOutput1 == NULL )
{
char stErrorMsg[200];
sprintf(stErrorMsg,"Error Open %s !",Re_CompareFileName);
AfxMessageBox(stErrorMsg);
return;
}
pView=(CPosTagView*)GetActiveView( );
nCurrLineIndex = pView->GetRichEditCtrl().LineFromChar(-1);// 取當(dāng)前行號(hào)
for (m_nCurrLineIndex=0;m_nCurrLineIndex<sentenceNum;m_nCurrLineIndex++)
{
nLen = pView->GetRichEditCtrl().GetLine(nCurrLineIndex,strLine.GetBuffer(300));
//Test
if (nLen<=2)
break;
char cTemp;
cTemp=strLine.GetBuffer(300)[nLen-2];
cTemp=strLine.GetBuffer(300)[nLen-1];
//Test
Len = nLen;
if ( strLine.GetBuffer(300)[Len-2] == 13 )
{
strLine.GetBuffer(300)[Len-2] = '\0';
strLine.ReleaseBuffer();
}
else
{
while (strLine.GetBuffer(300)[Len-2] != 13 )
{
strLine.GetBuffer(300)[Len]='\0';
CString strLineTemp;
strLine.ReleaseBuffer();
nCurrLineIndex = nCurrLineIndex+1;
nLen = pView->GetRichEditCtrl().GetLine(nCurrLineIndex,strLineTemp.GetBuffer(300));
if ( strLineTemp.GetBuffer(300)[nLen-2] == 13 )
{
strLineTemp.GetBuffer(300)[nLen-2] = '\0';
strLineTemp.ReleaseBuffer();
Len = Len+nLen;
strLine+=strLineTemp;
break;
}
else
{
strLineTemp.GetBuffer(300)[nLen] = '\0';
strLineTemp.ReleaseBuffer();
Len = Len+nLen;
strLine+=strLineTemp;
}
}
}
strcpy(szSour,strLine);
nCurrLineIndex = nCurrLineIndex+1;
MorphorAnalyze(szSour);
BestSequence();
}
fclose(fpOutput1);
delete ( (CPosTagApp* ) AfxGetApp() ) ->m_obSysDiction;
delete ( (CPosTagApp* ) AfxGetApp() ) ->m_obAddtionDiction;
AfxMessageBox("OK!");
}
void CChildFrame::BestSequence()
{
CString linTmp1,linTmp2,linTmp3;
CString strTmp1,strTmp2,strTmp3;
CString posTmp1,posTmp2;
char crTmp[200];
// char Test[10];
double n_prob;
double posPro[10];
double b[44];
double beginPro[44];
double viterbiPro[200][44];
double posPairPro[44][44];
CStringArray viterbiPos[200]; //記錄第I個(gè)單詞中最大viterbiPro值所對(duì)應(yīng)的前一個(gè)單詞的詞性
CStringArray viterbiAnn[200]; //記錄第I(0~499)個(gè)單詞的J(0~44)個(gè)詞性
CStringArray wordContent;
CStringArray posAnnotate;
CStringArray possibleAnn;
int i,j,l,h,s,t;
int ncount;
int count[200][8],nowcount[8];
const int c_nMaxLineLen=1500;
char szOneLine[c_nMaxLineLen];
char szPennTableName[]="cdqprg\\conmarker.txt";
char divertFileName[]="cdqprg\\divmatrix.txt"; //轉(zhuǎn)移概率矩陣
char beginFileName[]="cdqprg\\beginstate.txt"; //初始狀態(tài)分布
CStringArray npos;
FILE* fpInput2;
fpInput2=fopen(szPennTableName,"rb");
if ( fpInput2 == NULL )
{
char stErrorMsg[200];
sprintf(stErrorMsg,"Error Open %s !",szPennTableName);
AfxMessageBox(stErrorMsg);
return;
}
FILE* fpInput3;
fpInput3=fopen(divertFileName,"rb");
if ( fpInput3 == NULL )
{
char stErrorMsg[200];
sprintf(stErrorMsg,"Error Open %s !",divertFileName);
AfxMessageBox(stErrorMsg);
return;
}
for ( i=0;i<44;i++)
{//轉(zhuǎn)移概率賦值
fgets(szOneLine,c_nMaxLineLen,fpInput3);
linTmp3=szOneLine;
for ( int j=0;j<44;j++ )
{
strTmp3=linTmp3.Left(linTmp3.Find(" "));
posPairPro[i][j]=atof(strTmp3);
linTmp3=linTmp3.Right(linTmp3.GetLength()-linTmp3.Find(" ")-2);
}
}
FILE* fpInput4;
fpInput4=fopen(beginFileName,"rb");
if ( fpInput4 == NULL )
{
char stErrorMsg[200];
sprintf(stErrorMsg,"Error Open %s !",beginFileName);
AfxMessageBox(stErrorMsg);
return;
}
for ( i=0;i<44;i++ )
{//初始狀態(tài)賦值
fgets(szOneLine,c_nMaxLineLen,fpInput4);
linTmp3=szOneLine;
strTmp3=linTmp3.Right(linTmp3.GetLength()-linTmp3.Find(' ')-1);
beginPro[i]=atof(strTmp3);
}
/*
char viterbiFileName[]="viterbi.txt";
FILE* fpOutput;
fpOutput=fopen(viterbiFileName,"wb");
if ( fpOutput == NULL )
{
char stErrorMsg[200];
sprintf(stErrorMsg,"Error Open %s !",viterbiFileName);
AfxMessageBox(stErrorMsg);
return;
}
*/ struct DictNode *CurNode;
int m=0;
CurNode = g_objMorSent.m_pWordFirst;
while ( CurNode!=NULL )
{
i=0;
strTmp1 = CurNode->m_pszEnglish;
Find(strTmp1,wordContent); //單詞內(nèi)容(詞性、發(fā)射概率)都存于arDictContent數(shù)組中
//取CurNode.m_pFirstChin.m_nCate
//及CurNode.m_pNextChin.m_nCate....的值賦予arDictContent數(shù)組中,
//若有多個(gè)詞性,需分別賦值且每個(gè)詞性的發(fā)射概率都為1。
if (strcmp(strTmp1,"There")!=0 &&
strcmp(strTmp1,"there")!=0)
{
/* if ( strcmp(strTmp1,CurNode->m_pszOrig)==NULL )
{//只在單詞與原型一致時(shí)使用系統(tǒng)詞性(')
if ( strcmp(strTmp1,CurNode->m_pszOrig)==NULL )
{//只在單詞與原型不一致時(shí)使用系統(tǒng)詞性(")
*/ if ( CurNode->m_pszAmbig != 0 )
{
strTmp3 = CurNode->m_pszAmbig;
i=1;
posTmp2="";
posTmp1 = strTmp3.Left(strTmp3.Find('/'));
if ( strchr(posTmp1,'v')!=0 && strchr(posTmp1,'a')==0 )
posTmp2="v/";
else
posTmp2=posTmp1+"/";
strTmp3 = strTmp3.Right(strTmp3.GetLength()-strTmp3.Find('/')-1);
while ( strstr(strTmp3,"/") != NULL )
{
posTmp1 = strTmp3.Left(strTmp3.Find('/'));
if ( strchr(posTmp1,'v')!=0 && strchr(posTmp1,'a')==0 )
{
if (strstr(posTmp2,"v/")==0)
{
posTmp2=posTmp2+"v/";
}
else
{
strTmp3 = strTmp3.Right(strTmp3.GetLength()-strTmp3.Find('/')-1);
break;
}
}
else
{
posTmp2=posTmp2+posTmp1+"/";
}
strTmp3 = strTmp3.Right(strTmp3.GetLength()-strTmp3.Find('/')-1);
i=i+1;
}
posTmp1=strTmp3;
if ( strchr(posTmp1,'v')!=0 && strchr(posTmp1,'a')==0 )
{
if (strstr(posTmp2,"v/")==0)
{
posTmp2=posTmp2+"v";
i=i+1;
}
else
posTmp2=posTmp2.Left(posTmp2.GetLength()-1);
}
else
{
posTmp2=posTmp2+posTmp1;
i=i+1;
}
}
if (i>wordContent.GetSize())
{
wordContent.RemoveAll();
ChangeWordContent(posTmp2,wordContent);
}
// }
}
t=0;
for ( i=0;i<wordContent.GetSize();i++ )
{
strTmp2=wordContent[i];
posTmp1=strTmp2.Right(strTmp2.GetLength()-strTmp2.Find(" ")-1);
n_prob=atof(posTmp1);
posTmp1=strTmp2.Left(strTmp2.Find(" "));
posTmp1=" "+posTmp1+" ";
j=0;
while ( !feof(fpInput2) )
{
fgets(szOneLine,c_nMaxLineLen,fpInput2);
linTmp2=szOneLine;
if ( strstr(linTmp2,posTmp1)==0 )
{
j=j+1;
}
else
{
b[j]=n_prob;
posTmp2=linTmp2.Right(linTmp2.GetLength()-linTmp2.Find(" ")-1);
posTmp2=posTmp2.Left(posTmp2.Find(" "));
break;
}
}
fseek(fpInput2,0L,SEEK_SET);
s=0;
h=0;
if ( m==0 )
{
if (beginPro[j]==0 )
{
posPro[s]=0;
viterbiPro[m][j]=0;
}
else
{
posPro[s]=exp(log(beginPro[j])+log(b[j]));
viterbiPro[m][j]=posPro[s];
}
s=s+1;
npos.Add(posTmp2);
count[m][t]=j;
t=t+1;
possibleAnn.Add( posTmp2 );
}
else
{
for ( int l=0;l<viterbiAnn[m-1].GetSize();l++ )
{
if ( viterbiPro[m-1][l]==0 || posPairPro[count[m-1][l]][j]==0 )
{
posPro[s]=0;
}
else
{
posPro[s]=exp(log(viterbiPro[m-1][l])+log(posPairPro[count[m-1][l]][j])+log(b[j]));
}
s=s+1;
npos.Add(viterbiAnn[m-1][l]);
nowcount[h]=j;
h=h+1;
possibleAnn.Add(posTmp2);
}
}
if ( possibleAnn.GetSize() != 1 )
{
for ( l=1;l<possibleAnn.GetSize();l++ )
{
if ( posPro[0]<posPro[l] )
{
posPro[0]=posPro[l];
nowcount[0]=nowcount[l];
possibleAnn[0]=possibleAnn[l];
npos[0]=npos[l];
}
}
h=0;
}
viterbiAnn[m].Add(possibleAnn[0]);
ncount=viterbiAnn[m].GetSize()-1;
viterbiPro[m][ncount]=posPro[0];
if ( m != 0 )
{
count[m][ncount]=nowcount[0];
}
viterbiPos[m].Add(npos[0]);
// fprintf(fpOutput,"viterbiPro[%d][%d]=%7.6e %s %s\n",m,ncount,viterbiPro[m][ncount],viterbiAnn[m][ncount],viterbiPos[m][ncount]);
possibleAnn.RemoveAll();
npos.RemoveAll();
}
wordContent.RemoveAll();
CurNode = CurNode->m_pNextWordNode;
m=m+1;
}
for ( i=m-1;i>=0;i-- )
{//確定最佳詞性序列
if ( i==m-1 )
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -