?? dictionary.cpp
字號:
/****************************************************************************
*
* Copyright (c) 2000, 2001
* Machine Group
* Software Research Lab.
* Institute of Computing Tech.
* Chinese Academy of Sciences
* All rights reserved.
*
* This file is the confidential and proprietary property of
* Institute of Computing Tech. and the posession or use of this file requires
* a written license from the author.
* Filename: Dictionary.cpp
* Abstract:
* implementation of the CDictionary class.
* Author: Kevin Zhang
* (zhanghp@software.ict.ac.cn)
* Date: 2002-1-8
*
* Notes:
*
****************************************************************************/
#include "stdafx.h"
#include "Dictionary.h"
#include "Utility.h"
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
CDictionary::CDictionary()
{
//initilization
memset(m_IndexTable,0,sizeof(m_IndexTable));
m_pModifyTable=NULL;
}
CDictionary::~CDictionary()
{
for(int i=0;i<CC_NUM;i++)
{//delete the memory of word item array in the dictionary
for(int j=0;j<m_IndexTable[i].nCount;j++)
delete m_IndexTable[i].pWordItemHead[j].sWord;
delete [] m_IndexTable[i].pWordItemHead;
}
DelModified();
}
/*********************************************************************
*
* Func Name : Load
*
* Description: Load the dictionary from the file .dct
*
*
* Parameters : sFilename: the file name
*
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-9
*********************************************************************/
bool CDictionary::Load(char *sFilename,bool bReset)
{
FILE *fp;
int i,j,nBuffer[3];
if((fp=fopen(sFilename,"rb"))==NULL)
return false;//fail while opening the file
//Release the memory for new files
for( i=0;i<CC_NUM;i++)
{//delete the memory of word item array in the dictionary
for( j=0;j<m_IndexTable[i].nCount;j++)
delete m_IndexTable[i].pWordItemHead[j].sWord;
delete [] m_IndexTable[i].pWordItemHead;
}
DelModified();
for(i=0;i<CC_NUM;i++)
{
fread(&(m_IndexTable[i].nCount),sizeof(int),1,fp);
if(m_IndexTable[i].nCount>0)
m_IndexTable[i].pWordItemHead=new WORD_ITEM[m_IndexTable[i].nCount];
else
{
m_IndexTable[i].pWordItemHead=0;
continue;
}
j=0;
while(j<m_IndexTable[i].nCount)
{
fread(nBuffer,sizeof(int),3,fp);
m_IndexTable[i].pWordItemHead[j].sWord=new char[nBuffer[1]+1];
if(nBuffer[1])//String length is more than 0
{
fread(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
}
m_IndexTable[i].pWordItemHead[j].sWord[nBuffer[1]]=0;
if(bReset)//Reset the frequency
m_IndexTable[i].pWordItemHead[j].nFrequency=0;
else
m_IndexTable[i].pWordItemHead[j].nFrequency=nBuffer[0];
m_IndexTable[i].pWordItemHead[j].nWordLen=nBuffer[1];
m_IndexTable[i].pWordItemHead[j].nHandle=nBuffer[2];
j+=1;//Get next item in the original table.
}
}
fclose(fp);
return true;
}
/*********************************************************************
*
* Func Name : Save
*
* Description: Save the dictionary as the file .dct
*
*
* Parameters : sFilename: the file name
*
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-9
*********************************************************************/
bool CDictionary::Save(char *sFilename)
{
FILE *fp;
int i,j,nCount,nBuffer[3];
PWORD_CHAIN pCur;
if((fp=fopen(sFilename,"wb"))==NULL)
return false;//fail while opening the file
for(i=0;i<CC_NUM;i++)
{
pCur=NULL;
if(m_pModifyTable)
{//Modification made
nCount=m_IndexTable[i].nCount+m_pModifyTable[i].nCount-m_pModifyTable[i].nDelete;
fwrite(&nCount,sizeof(int),1,fp);
pCur=m_pModifyTable[i].pWordItemHead;
j=0;
while(pCur!=NULL&&j<m_IndexTable[i].nCount)
{//Output to the file after comparision
if(strcmp(pCur->data.sWord,m_IndexTable[i].pWordItemHead[j].sWord)<0||(strcmp(pCur->data.sWord,m_IndexTable[i].pWordItemHead[j].sWord)==0&&pCur->data.nHandle<m_IndexTable[i].pWordItemHead[j].nHandle))
{//Output the modified data to the file
nBuffer[0]=pCur->data.nFrequency;
nBuffer[1]=pCur->data.nWordLen;
nBuffer[2]=pCur->data.nHandle;
fwrite(nBuffer,sizeof(int),3,fp);
if(nBuffer[1])//String length is more than 0
fwrite(pCur->data.sWord,sizeof(char),nBuffer[1],fp);
pCur=pCur->next;//Get next item in the modify table.
}
else if(m_IndexTable[i].pWordItemHead[j].nFrequency==-1)
{//The item has been removed,so skip it
j+=1;
}
else if(strcmp(pCur->data.sWord,m_IndexTable[i].pWordItemHead[j].sWord)>0||(strcmp(pCur->data.sWord,m_IndexTable[i].pWordItemHead[j].sWord)==0&&pCur->data.nHandle>m_IndexTable[i].pWordItemHead[j].nHandle))
{//Output the index table data to the file
nBuffer[0]=m_IndexTable[i].pWordItemHead[j].nFrequency;
nBuffer[1]=m_IndexTable[i].pWordItemHead[j].nWordLen;
nBuffer[2]=m_IndexTable[i].pWordItemHead[j].nHandle;
fwrite(nBuffer,sizeof(int),3,fp);
if(nBuffer[1])//String length is more than 0
fwrite(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
j+=1;//Get next item in the original table.
}
}
if(j<m_IndexTable[i].nCount)
{
while(j<m_IndexTable[i].nCount)
{
if(m_IndexTable[i].pWordItemHead[j].nFrequency!=-1)
{//Has been deleted
nBuffer[0]=m_IndexTable[i].pWordItemHead[j].nFrequency;
nBuffer[1]=m_IndexTable[i].pWordItemHead[j].nWordLen;
nBuffer[2]=m_IndexTable[i].pWordItemHead[j].nHandle;
fwrite(nBuffer,sizeof(int),3,fp);
if(nBuffer[1])//String length is more than 0
fwrite(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
}
j+=1;//Get next item in the original table.
}
}
else////No Modification
while(pCur!=NULL)//Add the rest data to the file.
{
nBuffer[0]=pCur->data.nFrequency;
nBuffer[1]=pCur->data.nWordLen;
nBuffer[2]=pCur->data.nHandle;
fwrite(nBuffer,sizeof(int),3,fp);
if(nBuffer[1])//String length is more than 0
fwrite(pCur->data.sWord,sizeof(char),nBuffer[1],fp);
pCur=pCur->next;//Get next item in the modify table.
}
}
else
{
fwrite(&m_IndexTable[i].nCount,sizeof(int),1,fp);
//write to the file
j=0;
while(j<m_IndexTable[i].nCount)
{
nBuffer[0]=m_IndexTable[i].pWordItemHead[j].nFrequency;
nBuffer[1]=m_IndexTable[i].pWordItemHead[j].nWordLen;
nBuffer[2]=m_IndexTable[i].pWordItemHead[j].nHandle;
fwrite(nBuffer,sizeof(int),3,fp);
if(nBuffer[1])//String length is more than 0
fwrite(m_IndexTable[i].pWordItemHead[j].sWord,sizeof(char),nBuffer[1],fp);
j+=1;//Get next item in the original table.
}
}
}
fclose(fp);
return true;
}
/*********************************************************************
*
* Func Name : AddItem
*
* Description: Add a word item to the dictionary
*
*
* Parameters : sWord: the word
* nHandle:the handle number
* nFrequency: the frequency
* Returns : success or fail
* Author : Kevin Zhang
* History :
* 1.create 2002-1-9
*********************************************************************/
bool CDictionary::AddItem(char *sWord, int nHandle,int nFrequency)
{
char sWordAdd[WORD_MAXLENGTH-2];
int nPos,nFoundPos;
PWORD_CHAIN pRet,pTemp,pNext;
int i=0;
if(!PreProcessing(sWord, &nPos,sWordAdd,true))
return false;
if(FindInOriginalTable(nPos,sWordAdd,nHandle,&nFoundPos))
{//The word exists in the original table, so add the frequency
//Operation in the index table and its items
if(m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency==-1)
{//The word item has been removed
m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency=nFrequency;
if(!m_pModifyTable)//Not prepare the buffer
{
m_pModifyTable=new MODIFY_TABLE[CC_NUM];
memset(m_pModifyTable,0,CC_NUM*sizeof(MODIFY_TABLE));
}
m_pModifyTable[nPos].nDelete-=1;
}
else
m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency+=nFrequency;
return true;
}
//The items not exists in the index table.
//As following, we have to find the item whether exists in the modify data region
//If exists, change the frequency .or else add a item
if(!m_pModifyTable)//Not prepare the buffer
{
m_pModifyTable=new MODIFY_TABLE[CC_NUM];
memset(m_pModifyTable,0,CC_NUM*sizeof(MODIFY_TABLE));
}
if(FindInModifyTable(nPos,sWordAdd,nHandle,&pRet))
{
if(pRet!=NULL)
pRet=pRet->next;
else
pRet=m_pModifyTable[nPos].pWordItemHead;
pRet->data.nFrequency+=nFrequency;
return true;
}
//find the proper position to add the word to the modify data table and link
pTemp=new WORD_CHAIN;//Allocate the word chain node
if(pTemp==NULL)//Allocate memory failure
return false;
memset(pTemp,0,sizeof(WORD_CHAIN));//init it with 0
pTemp->data.nHandle=nHandle;//store the handle
pTemp->data.nWordLen=strlen(sWordAdd);
pTemp->data.sWord=new char[1+pTemp->data.nWordLen];
strcpy(pTemp->data.sWord,sWordAdd);
pTemp->data.nFrequency=nFrequency;
pTemp->next=NULL;
if(pRet!=NULL)
{
pNext=pRet->next;//Get the next item before the current item
pRet->next=pTemp;//link the node to the chain
}
else
{
pNext=m_pModifyTable[nPos].pWordItemHead;
m_pModifyTable[nPos].pWordItemHead=pTemp;//Set the pAdd as the head node
}
pTemp->next=pNext;//Very important!!!! or else it will lose some node
//Modify in 2001-10-29
m_pModifyTable[nPos].nCount++;//the number increase by one
return true;
}
bool CDictionary::DelItem(char *sWord,int nHandle)
{
char sWordDel[WORD_MAXLENGTH-2];
int nPos,nFoundPos,nTemp;
PWORD_CHAIN pPre,pTemp,pCur;
if(!PreProcessing(sWord, &nPos,sWordDel))
return false;
if(FindInOriginalTable(nPos,sWordDel,nHandle,&nFoundPos))
{
if(!m_pModifyTable)//Not prepare the buffer
{
m_pModifyTable=new MODIFY_TABLE[CC_NUM];
memset(m_pModifyTable,0,CC_NUM*sizeof(MODIFY_TABLE));
}
m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency=-1;
m_pModifyTable[nPos].nDelete+=1;
if(nHandle==-1)//Remove all items which word is sWordDel,ignoring the handle
{
/* nTemp=nFoundPos-1;//Check its previous position
while(nTemp>0&&strcmp(m_IndexTable[nPos].pWordItemHead[nFoundPos].sWord,sWordDel)==0)
{
m_IndexTable[nPos].pWordItemHead[nTemp].nFrequency=-1;
m_pModifyTable[nPos].nDelete+=1;
nTemp-=1;
}
*/ nTemp=nFoundPos+1;//Check its previous position
while(nTemp<m_IndexTable[nPos].nCount&&strcmp(m_IndexTable[nPos].pWordItemHead[nFoundPos].sWord,sWordDel)==0)
{
m_IndexTable[nPos].pWordItemHead[nTemp].nFrequency=-1;
m_pModifyTable[nPos].nDelete+=1;
nTemp+=1;
}
}
return true;
}
//Operation in the modify table and its items
if(FindInModifyTable(nPos,sWordDel,nHandle,&pPre))
{
pCur=m_pModifyTable[nPos].pWordItemHead;
if(pPre!=NULL)
pCur=pPre->next;
while(pCur!=NULL && _stricmp(pCur->data.sWord,sWordDel)==0&&(pCur->data.nHandle==nHandle||nHandle<0))
{
pTemp=pCur;
if(pPre!=NULL)//pCur is the first item
pPre->next=pCur->next;
else
m_pModifyTable[nPos].pWordItemHead=pCur->next;
pCur=pCur->next;
delete pTemp->data.sWord;//Delete the word
delete pTemp;
}
return true;
}
return false;
}
bool CDictionary::DelModified()
{
PWORD_CHAIN pTemp,pCur;
if(!m_pModifyTable)
return true;
for(int i=0;i<CC_NUM;i++)
{
pCur=m_pModifyTable[i].pWordItemHead;
while(pCur!=NULL)
{
pTemp=pCur;
pCur=pCur->next;
delete pTemp->data.sWord;
delete pTemp;
}
}
delete [] m_pModifyTable;
m_pModifyTable=NULL;
return true;
}
bool CDictionary::IsExist(char *sWord, int nHandle)
{
char sWordFind[WORD_MAXLENGTH-2];
int nPos;
if(!PreProcessing(sWord, &nPos,sWordFind))
return false;
return(FindInOriginalTable(nPos,sWordFind,nHandle)||FindInModifyTable(nPos,sWordFind,nHandle));
}
bool CDictionary::GetHandle(char *sWord,int *pnCount,int *pnHandle,int *pnFrequency)
{
char sWordGet[WORD_MAXLENGTH-2];
int nPos,nFoundPos,nTemp;
PWORD_CHAIN pPre,pCur;
*pnCount=0;
if(!PreProcessing(sWord, &nPos,sWordGet))
return false;
if(FindInOriginalTable(nPos,sWordGet,-1,&nFoundPos))
{
pnHandle[*pnCount]=m_IndexTable[nPos].pWordItemHead[nFoundPos].nHandle;
pnFrequency[*pnCount]=m_IndexTable[nPos].pWordItemHead[nFoundPos].nFrequency;
*pnCount+=1;
/* nTemp=nFoundPos-1;//Check its previous position
while(nTemp>0&&strcmp(m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)==0)
{
pnHandle[*pnCount]=m_IndexTable[nPos].pWordItemHead[nTemp].nHandle;
pnFrequency[*pnCount]=m_IndexTable[nPos].pWordItemHead[nTemp].nFrequency;
*pnCount+=1;
nTemp-=1;
}
*/ nTemp=nFoundPos+1;//Check its previous position
while(nTemp<m_IndexTable[nPos].nCount&&strcmp(m_IndexTable[nPos].pWordItemHead[nTemp].sWord,sWordGet)==0)
{
pnHandle[*pnCount]=m_IndexTable[nPos].pWordItemHead[nTemp].nHandle;
pnFrequency[*pnCount]=m_IndexTable[nPos].pWordItemHead[nTemp].nFrequency;
*pnCount+=1;
nTemp+=1;
}
return true;
}
//Operation in the index table and its items
if(FindInModifyTable(nPos,sWordGet,-1,&pPre))
{
pCur=m_pModifyTable[nPos].pWordItemHead;
if(pPre!=NULL)
pCur=pPre->next;
while(pCur!=NULL && _stricmp(pCur->data.sWord,sWordGet)==0)
{
pnHandle[*pnCount]=pCur->data.nHandle;
pnFrequency[*pnCount]=pCur->data.nFrequency;
*pnCount+=1;
pCur=pCur->next;
}
return true;
}
return false;
}
/*********************************************************************
*
* Func Name : FindInOriginalTable
*
* Description: judge the word and handle exist in the inner table and its items
*
*
* Parameters : nInnerCode: the inner code of the first CHines char
* sWord: the word
* nHandle:the handle number
* *nPosRet:the position which node is matched
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -