?? segtofile(sen).c
字號(hào):
#include "segment.h"
#include <direct.h>
#include <io.h>
#include <string.h>
/**************************************************************************************
判斷字串是否子串,有無(wú)半角分割符,是否都是分割符號(hào)構(gòu)成,是否 全部是分割符,字符類(lèi)型,漢字
數(shù)字,字母
1. 判斷是否子串
***************************************************************************************/
char *CC_Find(const char *string, const char *strCharSet)
{
/* 取第一串中與第二串相同的后續(xù)部分*/
char *cp=strstr(string,strCharSet);
/* cp非空同時(shí)cp與string不相同*/
if(cp!=NULL&&(cp-string)%2==1)
{
return NULL;
}
return cp;
}
/**************************************************************************************
2. 判斷串中有無(wú)半角分割符
***************************************************************************************/
int IsSpaceWord(unsigned char *sString)
{
unsigned int nLen=strlen((const char *)sString),i=0;
while(i<nLen-1&&(sString[i]==' '||sString[i]=='\r'||sString[i]=='\n'))
{/*如果當(dāng)前位置上取到的字符是分隔符,則繼續(xù)取下1字符分析*/
i+=2;
}
if(i<nLen)/*存在其他類(lèi)型字符,說(shuō)明句串不是全分隔符*/
return FALSE;
return TRUE;
}
/**************************************************************************************
3. 判斷句串sString是否都是分割符號(hào)構(gòu)成
***************************************************************************************/
int IsAllDelimiter(unsigned char *sString)
{
unsigned int nLen=strlen((const char *)sString),i=0;
while(i<nLen-1&&(sString[i]==161||sString[i]==163))
{/*如果當(dāng)前位置上取到的字符是分隔符,則繼續(xù)取下1字符分析,161=,163=*/
i+=2;
}
if(i<nLen)/*存在其他類(lèi)型字符,說(shuō)明句串不是全分隔符*/
return FALSE;
return TRUE;
}
/**************************************************************************************
4. 判斷句串sString的符號(hào)類(lèi)型,由unsigned char 判斷,<128是分割符,包括標(biāo)點(diǎn)和括號(hào)等;162為索引;第一位163,176~185為數(shù)字
;第一位163,193~218或225~250為字母;163或161為分割符;176~247是漢字
對(duì)于ASCII碼為:48~57,65~90,97~122
***************************************************************************************/
int charType(unsigned char *sChar)
{
if(*sChar<128)
{
if(strchr("\042!,.?()[]{}+=",(int)*sChar))
return CT_DELIMITER;
return CT_SINGLE;
}
else if(*sChar==162)
return CT_INDEX;
else if(*sChar==163&&*(sChar+1)>175&&*(sChar+1)<186)
return CT_NUM;
else if(*sChar==163&&(*(sChar+1)>=193&&*(sChar+1)<=218||*(sChar+1)>=225&&*(sChar+1)<=250))
return CT_LETTER;
else if(*sChar==161||*sChar==163)
return CT_DELIMITER;
else if(*sChar>=176&&*sChar<=247)
return CT_CHINESE;
else
return CT_OTHER;
}
/**************************************************************************************
5.判斷句串sString是否都是漢字構(gòu)成,如果在176~247之間,且可一直加2。
***************************************************************************************/
int IsAllChinese(unsigned char *sString)
{
unsigned int nLen=strlen((const char *)sString);
unsigned int i=0;
while(i<nLen-1&&sString[i]<248&&sString[i]>175)
{
i+=2;
}
if(i<nLen)
return FALSE;
return TRUE;
}
/**************************************************************************************
6.判斷句串sString是否都是單字節(jié)符號(hào)構(gòu)成,<128
***************************************************************************************/
int IsAllSingleByte(unsigned char *sString)
{
unsigned int nLen=strlen((const char *)sString),i=0;
while(i<nLen&&sString[i]<128)
{
i=i+1;
}
if(i<nLen)
return FALSE;
return TRUE;
}
/**************************************************************************************
7.判斷句串sString是否都是數(shù)字構(gòu)成,第一位=163,第二位176~185
***************************************************************************************/
int IsAllNum(unsigned char *sString)
{
unsigned int nLen=strlen((const char *)sString),i=0;
char sChar[3];
sChar[2]=0;
if(i<nLen)
{
sChar[0]=sString[i];
i=i+1;
if(sChar[0]<0)
{
sChar[1]=sString[i];
i=i+1;
}
else
sChar[1]=0;
if(!strstr("±+—-+",sChar))
{
i=0;
}
}
while(i<nLen-1&&sString[i]==163&&sString[i+1]>175&&sString[i+1]<186)
{
i+=2;
}
if(i<nLen)
{
sChar[0]=sString[i];
i=i+1;
if(sChar[0]<0)
{
sChar[1]=sString[i];
i=i+1;
}
else
sChar[1]=0;
if(CC_Find("∶·./",sChar)||sChar[0]=='.'||sChar[0]=='/')
{
while(i<nLen-1&&sString[i]==163&&sString[i+1]>175&&sString[i+1]<186)
{
i+=2;
}
}
else
{
i=i-strlen(sChar);
}
}
if(i>=nLen)
return TRUE;
while(i<nLen&&sString[i]>'0'-1&&sString[i]<'9'+1)
{
i+=1;
}
if(i<nLen)
{
sChar[0]=sString[i];
i=i+1;
if(sChar[0]<0)
{
sChar[1]=sString[i];
i=i+1;
}
else
sChar[1]=0;
if(CC_Find("∶·./",sChar)||sChar[0]=='.'||sChar[0]=='/')
{
while(i<nLen&&sString[i]>'0'-1&&sString[i]<'9'+1)
{
i=i+1;
}
}
else
{
i=i-strlen(sChar);
}
}
if(i<nLen)
{
sChar[0]=sString[i];
i=i+1;
if(sChar[0]<0)
{
sChar[1]=sString[i];
i=i+1;
}
else
sChar[1]=0;
if(!CC_Find("百千萬(wàn)億佰仟%‰",sChar)&&sChar[0]!='%')
i=i-strlen(sChar);
}
if(i>=nLen)
return TRUE;
return FALSE;
}
/**************************************************************************************
8.判斷句串sString是否都是字母構(gòu)成,第一位163,第二位193~218,225~250
***************************************************************************************/
int IsAllLetter(unsigned char *sString)
{
unsigned int nLen=strlen((const char *)sString),i=0;
while(i<nLen-1&&sString[i]==163&&((sString[i+1]>=193&&sString[i+1]<=218)||(sString[i+1]>=225&&sString[i+1]<=250)))
{
i+=2;
}
if(i<nLen)
return FALSE;
return TRUE;
}
/**************************************************************************************
9. 二分查找:表長(zhǎng)度,表指針,查找值val;中點(diǎn)=val?,<,Start=Mid+1;>,End=Mid-1,Mid=(Start+End)/2
***************************************************************************************/
int BinarySearch(int nVal, int *nTable,int nTableLen)
{
int nStart=0,nEnd=nTableLen-1,nMid=(nStart+nEnd)/2;
while(nStart<=nEnd)
{
if(nTable[nMid]==nVal)
{
return nMid;
}
else if(nTable[nMid]<nVal)
{
nStart=nMid+1;
}
else
{
nEnd=nMid-1;
}
nMid=(nStart+nEnd)/2;
}
return -1;
}
/**************************************************************************************
10. 子串個(gè)數(shù)
***************************************************************************************/
int GetCharCount(char *sCharSet,char *sWord)
{
unsigned int k=0;
char tchar[3];
int nCount=0;
tchar[2]=0;
while(k < strlen(sWord))
{
tchar[0]=sWord[k];
tchar[1]=0;
if(sWord[k]<0)
{
tchar[1]=sWord[k+1];
k+=1;
}
k+=1;
if((tchar[0]<0&&CC_Find(sCharSet, tchar))||strchr(sCharSet,tchar[0]))
nCount=nCount+1;
}
return nCount;
}
/**************************************************************************************
11.英,日或俄的翻譯人名個(gè)數(shù)
***************************************************************************************/
int GetForeignCharCount(char *sWord)
{
unsigned int nForeignCount,nCount;
nForeignCount=GetCharCount(TRANS_ENGLISH,sWord);
nCount=GetCharCount(TRANS_JAPANESE,sWord);
if(nForeignCount<=nCount)
nForeignCount=nCount;
nCount=GetCharCount(TRANS_RUSSIAN,sWord);
if(nForeignCount<=nCount)
nForeignCount=nCount;
return nForeignCount;
}
/**************************************************************************************
12. 是否大寫(xiě)中文數(shù)字
***************************************************************************************/
int IsAllChineseNum(char *sWord)
{
unsigned int k;
char tchar[3];
char ChineseNum[]="零○一二兩三四五六七八九十廿百千萬(wàn)億壹貳叁肆伍陸柒捌玖拾佰仟∶·./點(diǎn)";
char sPrefix[]="幾數(shù)第上成";
for(k = 0; k < strlen(sWord); k+=2)
{
strncpy(tchar,sWord+k,2) ;
tchar[2]='\0';
if(strncmp(sWord+k,"分之",4)==0)
{
k+=2;
continue;
}
if(!CC_Find(ChineseNum, tchar)&&!(k==0&&CC_Find(sPrefix, tchar)))
return FALSE;
}
return TRUE;
}
/**************************************************************************************
13. 去掉空格,漢字計(jì)算高字節(jié)和低字節(jié),分割符返回標(biāo)志
***************************************************************************************/
int PreProcessing(char *sWord, int *nId, char *sWordRet,int bAdd)
{
int nType=charType((unsigned char *)sWord);
int nLen=strlen(sWord);
int nEnd=nLen-1,nBegin=0;
if(nLen==0)
return FALSE;
/*去掉sWord首尾空格個(gè)數(shù)*/
while(nEnd>=0&&sWord[nEnd]==' ')
nEnd-=1;
while(nBegin<=nEnd&&sWord[nBegin]==' ')
nBegin+=1;
if(nBegin>nEnd)
return FALSE;
/*去掉sWord首尾空格*/
if(nEnd!=nLen-1||nBegin!=0)
{
strncpy(sWord,sWord+nBegin,nEnd-nBegin+1);
sWord[nEnd-nBegin+1]=0;
}
/*如果漢字,計(jì)算高字節(jié)和低字節(jié),并拷貝給sWordRet*/
if(nType==CT_CHINESE)
{
*nId=CC_ID(sWord[0],sWord[1]);
strcpy(sWordRet,&sWord[2]);
return TRUE;
}
/*如果是分割符*/
if(nType==CT_DELIMITER)
{
*nId=3755;
strcpy(sWordRet,sWord);
return TRUE;
}
return FALSE;
}
/**************************************************************************************
14.是否年數(shù)
***************************************************************************************/
int IsYearTime(char *sNum)
{
unsigned int nLen=strlen(sNum);
char sTemp[3];
strncpy(sTemp,sNum,2);
sTemp[2]=0;
if(IsAllSingleByte((unsigned char *)sNum)&&(nLen==4||nLen==2&&sNum[0]>'4'))
return TRUE;
if(IsAllNum((unsigned char *)sNum)&&(nLen>=6||nLen==4&&CC_Find("56789",sTemp)))
return TRUE;
if(GetCharCount("零○一二三四五六七八九壹貳叁肆伍陸柒捌玖",sNum)==(int)nLen/2&&nLen>=3)
return TRUE;
if(nLen==8&&GetCharCount("千仟零○",sNum)==2)
return TRUE;
if(nLen==2&&GetCharCount("千仟",sNum)==1)
return TRUE;
if(nLen==4&&GetCharCount("甲乙丙丁戊己庚辛壬癸",sNum)==1&&GetCharCount("子丑寅卯辰巳午未申酉戌亥",sNum+2)==1)
return TRUE;
return FALSE;
}
/**************************************************************************************
15. 北大973詞性標(biāo)注
***************************************************************************************/
int PKU2973POS(int nHandle, char *sPOS973)
{
int nHandleSet[46]={24832,24932,24935,24942,25088,25344,25600,25703,25856,26112,26368,26624,26880,27136,27392,27648,27904,28160,28263,28274,28275,28276,28280,28282,28416,28672,28928,29184,29440,29696,29799,29952,30052,30055,30058,30060,30070,30074,30208,30308,30311,30318,30464,30720,30976,31232};
char sPOSRelated[46][3]={"a", "ad","ga","an","f", "c", "d", "d", "e","nd","g", "h", "i", "j", "k", "l", "m", "n", "gn","nh","ns","ni","ws", "nz","o", "p", "q", "r", "nl","nt","gt","u", "ud","ug","uj","ul","uv","uz","v", "vd","gv","vn","w", "x", "u", "a"};
int nIndex=BinarySearch(nHandle,nHandleSet,46);
if(nIndex==-1)
strcpy(sPOS973,"@");
else
strcpy(sPOS973,sPOSRelated[nIndex]);
return TRUE;
}
/**************************************************************************************
16. 讀字典文件中的內(nèi)容到鏈表中
***************************************************************************************/
int LoadDicFile(pDictionary p, char *sFilename,int bReset)
{
FILE *fp;
int i,j,nBuffer[3];
if((fp=fopen(sFilename,"rb"))==NULL)
return FALSE;
/*鏈表中的指針為NULL*/
for( i=0;i<CC_NUM;i++)
{
if(p->m_IndexTable[i].pWordItemHead)
{
for( j=0;j<p->m_IndexTable[i].nCount;j++)
{
if(p->m_IndexTable[i].pWordItemHead[j].sWord!=NULL)
{
free(p->m_IndexTable[i].pWordItemHead[j].sWord);
p->m_IndexTable[i].pWordItemHead[j].sWord=NULL;
}
}
free(p->m_IndexTable[i].pWordItemHead);
p->m_IndexTable[i].pWordItemHead=NULL;
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -