?? utility.h
字號:
/****************************************************************************
*
* Copyright (c) 2000, 2001
* Machine Group
* Software Research Lab.
* Institute of Computing Tech.
* Chinese Academy of Sciences
* All rights reserved.
*
* This file is the confidential and proprietary property of
* Institute of Computing Tech. and the posession or use of this file requires
* a written license from the author.
* Filename: Utility.h
* Abstract:
* Utility functions for Chinese Language Processing
* Author: Kevin Zhang
* (zhanghp@software.ict.ac.cn)
* Date: 2002-1-8
*
* Notes:
*
*
****************************************************************************/
#if !defined(AFX_CHINESE_UTILITY_H__B6D7EA03_7BCD_46AD_B38C_D8033ACD5813__INCLUDED_)
#define AFX_CHINESE_UTILITY_H__B6D7EA03_7BCD_46AD_B38C_D8033ACD5813__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#define CT_SENTENCE_BEGIN 0//Sentence begin
#define CT_SENTENCE_END 1//Sentence ending
#define CT_SINGLE 3//SINGLE byte
#define CT_DELIMITER CT_SINGLE+1//delimiter
#define CT_CHINESE CT_SINGLE+2//Chinese Char
#define CT_LETTER CT_SINGLE+3//HanYu Pinyin
#define CT_NUM CT_SINGLE+4//HanYu Pinyin
#define CT_INDEX CT_SINGLE+5//HanYu Pinyin
#define CT_OTHER CT_SINGLE+12//Other
#define POSTFIX_SINGLE "壩邦堡杯城池村單島道堤店洞渡隊法峰府岡港閣宮溝國海號河湖環(huán)集江獎礁角街井郡坑口礦里嶺樓路門盟廟弄牌派坡鋪旗橋區(qū)渠泉人山省市水寺塔臺灘壇堂廳亭屯灣文屋溪峽縣線鄉(xiāng)巷型洋窯營嶼語園苑院閘寨站鎮(zhèn)州莊族陂庵町"
#define POSTFIX_MUTIPLE {"半島","草原","城市","大堤","大公國","大橋","地區(qū)","帝國","渡槽","港口","高速公路","高原","公路","公園","共和國","谷地","廣場","國道","海峽","胡同","機場","集鎮(zhèn)","教區(qū)","街道","口岸","碼頭","煤礦","牧場","農(nóng)場","盆地","平原","丘陵","群島","沙漠","沙洲","山脈","山丘","水庫","隧道","特區(qū)","鐵路","新村","雪峰","鹽場","鹽湖","漁場","直轄市","自治區(qū)","自治縣","自治州",""}
#define TRANS_ENGLISH "·—阿埃艾愛安昂敖奧澳笆芭巴白拜班邦保堡鮑北貝本比畢彼別波玻博勃伯泊卜布才采倉查差柴徹川茨慈次達大戴代丹旦但當(dāng)?shù)赖碌玫牡堑系业俚鄱|杜敦多額俄厄鄂恩爾伐法范菲芬費佛夫福弗甫噶蓋干岡哥戈革葛格各根古瓜哈海罕翰汗?jié)h豪合河赫亨侯呼胡華霍基吉及加賈堅簡杰金京久居君喀卡凱坎康考柯科可克肯庫奎拉喇萊來蘭郎朗勞勒雷累楞黎理李里莉麗歷利立力連廉良列烈林隆盧虜魯路倫侖羅洛瑪馬買麥邁曼茅茂梅門蒙盟米蜜密敏明摩莫墨默姆木穆那娜納乃奈南內(nèi)尼年涅寧紐努諾歐帕潘畔龐培佩彭皮平潑普其契恰強喬切欽沁泉讓熱榮肉儒瑞若薩塞賽桑瑟森莎沙山善紹舍圣施詩石什史士守斯司絲蘇素索塔泰坦湯唐陶特提汀圖土吐托陀瓦萬王旺威韋維魏溫文翁沃烏吾武伍西錫希喜夏相香歇謝辛新牙雅亞彥堯葉依伊衣宜義因音英雍尤于約宰澤增詹珍治中仲朱諸卓孜祖佐伽婭尕腓滕濟嘉津賴蓮琳律略慕妮聶裴浦奇齊琴茹珊衛(wèi)欣遜札哲智茲芙汶迦珀琪梵斐胥黛"
#define TRANS_RUSSIAN "·阿安奧巴比彼波布察茨大德得丁杜爾法夫伏甫蓋格哈基加堅捷金卡科可克庫拉萊蘭勒雷里歷利連列盧魯羅洛馬梅蒙米姆娜涅寧諾帕潑普奇齊喬切日薩色山申什斯索塔坦特托娃維文烏西希謝亞耶葉依伊以扎佐柴達登蒂戈果海赫華霍吉季津柯理琳瑪曼穆納尼契欽丘桑沙舍泰圖瓦萬雅卓茲"
#define TRANS_JAPANESE "安奧八白百邦保北倍本比濱博步部彩菜倉昌長朝池赤川船淳次村大代島稻道德地典渡爾繁飯風(fēng)福岡高工宮古谷關(guān)廣桂貴好浩和合河黑橫恒宏后戶荒繪吉紀佳加見健江介金今進井靜敬靖久酒菊俊康可克口梨理里禮栗麗利立涼良林玲鈴柳隆鹿麻瑪美萌彌敏木納南男內(nèi)鳥寧朋片平崎齊千前淺橋琴青清慶秋丘曲泉仁忍日榮若三森紗杉山善上伸神圣石實矢世市室水順司松泰桃藤天田土萬望尾未文武五舞西細夏憲相小孝新星行雄秀雅亞巖楊洋陽遙野也葉一伊衣逸義益櫻永由有佑宇羽郁淵元垣原遠月悅早造則澤增扎宅章昭沼真政枝知之植智治中忠仲竹助椎子佐阪坂堀荻菅薰浜瀨鳩筱"
//Translation type
#define TT_ENGLISH 0
#define TT_RUSSIAN 1
#define TT_JAPANESE 2
//Seperator type
#define SEPERATOR_C_SENTENCE "。!?:;…"
#define SEPERATOR_C_SUB_SENTENCE "、,()“”‘’"
#define SEPERATOR_E_SENTENCE "!?:;"
#define SEPERATOR_E_SUB_SENTENCE ",()\042'"
#define SEPERATOR_LINK "\n\r "
//Sentence begin and ending string
#define SENTENCE_BEGIN "始##始"
#define SENTENCE_END "末##末"
bool GB2312_Generate(char *sFileName);
//Generate the GB2312 List file
bool CC_Generate(char *sFileName);
//Generate the Chinese Char List file
char *CC_Find(const char *string, const char *strCharSet);
//Find a Chinese sub-string in the Chinese String
int charType(unsigned char *sChar);
//Judge the type of sChar or (sChar,sChar+1)
unsigned int GetCCPrefix(unsigned char *sSentence);
//Get the max Prefix string made up of Chinese Char
bool IsAllChinese(unsigned char *sString);
//Judge the string is all made up of Chinese Char
bool IsAllNonChinese(unsigned char *sString);
//Judge the string is all made up of non-Chinese Char
bool IsAllSingleByte(unsigned char *sString);
//Judge the string is all made up of Single Byte Char
bool IsAllNum(unsigned char *sString);
//Judge the string is all made up of Num Char
bool IsAllIndex(unsigned char *sString);
//Judge the string is all made up of Index Num Char
bool IsAllLetter(unsigned char *sString);
//Judge the string is all made up of Letter Char
bool IsAllDelimiter(unsigned char *sString);
//Judge the string is all made up of Delimiter
int BinarySearch(int nVal, int *nTable,int nTableLen);
//Binary search a value in a table which len is nTableLen
bool IsForeign(char *sWord);
//sWord maybe is a foreign translation
bool IsAllChineseNum(char *sWord);
//Decide whether the word is Chinese Num word
bool IsAllForeign(char *sWord);
//Decide whether the word is all foreign translation
int GetForeignCharCount(char *sWord);
//Decide whether the word is all non-foreign translation
int GetCharCount(char *sCharSet,char *sWord);
//Get the count of char which is in sWord and in sCharSet
int GetForeignType(char *sWord);
//Return the foreign type
bool PostfixSplit(char *sWord, char *sWordRet, char *sPostfix);
//Get the postfix
//bool IsSingleByteDelimiter(char cByteChar);
//Judge whether it's a num
#endif // !defined(AFX_CHINESE_UTILITY_H__B6D7EA03_7BCD_46AD_B38C_D8033ACD5813__INCLUDED_)
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -