亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? utility.java

?? 基于java語言的分詞系統
?? JAVA
?? 第 1 頁 / 共 3 頁
字號:
package org.ictclas4j.utility;

import java.io.DataInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;

import org.ictclas4j.bean.Dictionary;
import org.ictclas4j.bean.PersonName;
import org.ictclas4j.segment.PosTagger;

public class Utility {
	public static final int CC_NUM = 6768;

	// The number of Chinese Char,including 5 empty position between 3756-3761
	public static final int WORD_MAXLENGTH = 100;

	public static final int WT_DELIMITER = 0;

	public static final int WT_CHINESE = 1;

	public static final int WT_OTHER = 2;

	public static final int CT_SENTENCE_BEGIN = 1;// Sentence begin

	public static final int CT_SENTENCE_END = 4;// Sentence ending

	public static final int CT_SINGLE = 5;// SINGLE byte

	public static final int CT_DELIMITER = CT_SINGLE + 1;// delimiter

	public static final int CT_CHINESE = CT_SINGLE + 2;// Chinese Char

	public static final int CT_LETTER = CT_SINGLE + 3;// HanYu Pinyin

	public static final int CT_NUM = CT_SINGLE + 4;// HanYu Pinyin

	public static final int CT_INDEX = CT_SINGLE + 5;// HanYu Pinyin

	public static final int CT_OTHER = CT_SINGLE + 12;// Other

	public static final int MAX_WORDS = 650;

	public static final int MAX_SEGMENT_NUM = 10;

	public static final String POSTFIX_SINGLE = "壩邦堡杯城池村單島道堤店洞渡隊法峰府岡港閣宮溝國海號河湖環集江獎礁角街井郡坑口礦里嶺樓路門盟廟弄牌派坡鋪旗橋區渠泉人山省市水寺塔臺灘壇堂廳亭屯灣文屋溪峽縣線鄉巷型洋窯營嶼語園苑院閘寨站鎮州莊族陂庵町";

	public static final String[] POSTFIX_MUTIPLE = { "半島", "草原", "城市", "大堤", "大公國", "大橋", "地區", "帝國", "渡槽", "港口",
			"高速公路", "高原", "公路", "公園", "共和國", "谷地", "廣場", "國道", "海峽", "胡同", "機場", "集鎮", "教區", "街道", "口岸", "碼頭", "煤礦",
			"牧場", "農場", "盆地", "平原", "丘陵", "群島", "沙漠", "沙洲", "山脈", "山丘", "水庫", "隧道", "特區", "鐵路", "新村", "雪峰", "鹽場", "鹽湖",
			"漁場", "直轄市", "自治區", "自治縣", "自治州", "" };

	public static final String TRANS_ENGLISH = "·—阿埃艾愛安昂敖奧澳笆芭巴白拜班邦保堡鮑北貝本比畢彼別波玻博勃伯泊卜布才采倉查差柴徹川茨慈次達大戴代丹旦但當道德得的登迪狄蒂帝丁東杜敦多額俄厄鄂恩爾伐法范菲芬費佛夫福弗甫噶蓋干岡哥戈革葛格各根古瓜哈海罕翰汗漢豪合河赫亨侯呼胡華霍基吉及加賈堅簡杰金京久居君喀卡凱坎康考柯科可克肯庫奎拉喇萊來蘭郎朗勞勒雷累楞黎理李里莉麗歷利立力連廉良列烈林隆盧虜魯路倫侖羅洛瑪馬買麥邁曼茅茂梅門蒙盟米蜜密敏明摩莫墨默姆木穆那娜納乃奈南內尼年涅寧紐努諾歐帕潘畔龐培佩彭皮平潑普其契恰強喬切欽沁泉讓熱榮肉儒瑞若薩塞賽桑瑟森莎沙山善紹舍圣施詩石什史士守斯司絲蘇素索塔泰坦湯唐陶特提汀圖土吐托陀瓦萬王旺威韋維魏溫文翁沃烏吾武伍西錫希喜夏相香歇謝辛新牙雅亞彥堯葉依伊衣宜義因音英雍尤于約宰澤增詹珍治中仲朱諸卓孜祖佐伽婭尕腓滕濟嘉津賴蓮琳律略慕妮聶裴浦奇齊琴茹珊衛欣遜札哲智茲芙汶迦珀琪梵斐胥黛";

	public static final String TRANS_RUSSIAN = "·阿安奧巴比彼波布察茨大德得丁杜爾法夫伏甫蓋格哈基加堅捷金卡科可克庫拉萊蘭勒雷里歷利連列盧魯羅洛馬梅蒙米姆娜涅寧諾帕潑普奇齊喬切日薩色山申什斯索塔坦特托娃維文烏西希謝亞耶葉依伊以扎佐柴達登蒂戈果海赫華霍吉季津柯理琳瑪曼穆納尼契欽丘桑沙舍泰圖瓦萬雅卓茲";

	public static final String TRANS_JAPANESE = "安奧八白百邦保北倍本比濱博步部彩菜倉昌長朝池赤川船淳次村大代島稻道德地典渡爾繁飯風福岡高工宮古谷關廣桂貴好浩和合河黑橫恒宏后戶荒繪吉紀佳加見健江介金今進井靜敬靖久酒菊俊康可克口梨理里禮栗麗利立涼良林玲鈴柳隆鹿麻瑪美萌彌敏木納南男內鳥寧朋片平崎齊千前淺橋琴青清慶秋丘曲泉仁忍日榮若三森紗杉山善上伸神圣石實矢世市室水順司松泰桃藤天田土萬望尾未文武五舞西細夏憲相小孝新星行雄秀雅亞巖楊洋陽遙野也葉一伊衣逸義益櫻永由有佑宇羽郁淵元垣原遠月悅早造則澤增扎宅章昭沼真政枝知之植智治中忠仲竹助椎子佐阪坂堀荻菅薰浜瀨鳩筱";

	// Translation type
	public static final int TT_ENGLISH = 0;

	public static final int TT_RUSSIAN = 1;

	public static final int TT_JAPANESE = 2;

	// Seperator type
	public static final String SEPERATOR_C_SENTENCE = "。!?:;…";

	public static final String SEPERATOR_C_SUB_SENTENCE = "、,()“”‘’";

	public static final String SEPERATOR_E_SENTENCE = "!?:;";

	public static final String SEPERATOR_E_SUB_SENTENCE = ",()\"'";

	public static final String SEPERATOR_LINK = "\n\r  ";

	// Sentence begin and ending string
	public static final String SENTENCE_BEGIN = "始##始";

	public static final String SENTENCE_END = "末##末";

	// Seperator between two words
	public static final String WORD_SEGMENTER = "@";

	public static final int MAX_WORDS_PER_SENTENCE = 120;

	public static final int MAX_UNKNOWN_PER_SENTENCE = 200;

	public static final int MAX_POS_PER_WORD = 20;

	public static final int LITTLE_FREQUENCY = 6;

	public enum TAG_TYPE {
		TT_NORMAL, TT_PERSON, TT_PLACE, TT_TRANS_PERSON
	};

	public static final int MAX_FREQUENCE = 2079997;// 7528283+329805

	// //1993123+86874

	public static final int MAX_SENTENCE_LEN = 2000;

	public static final double INFINITE_VALUE = 10000.00;

	// 平滑參數
	public static final double SMOOTH_PARAM = 0.1;

	public static final String UNKNOWN_PERSON = "未##人";

	public static final String UNKNOWN_SPACE = "未##地";

	public static final String UNKNOWN_NUM = "未##數";

	public static final String UNKNOWN_TIME = "未##時";

	public static final String UNKNOWN_LETTER = "未##串";

	public static boolean gbGenerate(String fileName) {
		File file;
		int i, j;
		file = new File(fileName);
		try {
			PrintWriter out = new PrintWriter(new FileOutputStream(file));
			if (!file.canWrite())
				return false;// fail while opening the file
			for (i = 161; i < 255; i++)
				for (j = 161; j < 255; j++)
					out.println("" + i + j + "," + i + "," + j);
			out.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		}
		return true;
	}

	/***************************************************************************
	 * 
	 * Func Name : CC_Generate
	 * 
	 * Description: Generate the Chinese Char List file
	 * 
	 * 
	 * Parameters : sFilename: the file name for the output CC List
	 * 
	 * Returns : public static boolean Author : Kevin Zhang History : 1.create
	 * 2002-1-8
	 **************************************************************************/
	public static boolean CC_Generate(String fileName) {
		File file;
		int i, j;
		file = new File(fileName);
		try {
			PrintWriter out = new PrintWriter(new FileOutputStream(file));
			for (i = 176; i < 255; i++)
				for (j = 161; j < 255; j++)
					out.println("" + i + j + "," + i + "," + j);
			out.close();
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return true;
	}

	/***************************************************************************
	 * 
	 * Func Name : CC_Find
	 * 
	 * Description: Find a Chinese sub-string in the Chinese String
	 * 
	 * 
	 * Parameters : string:Null-terminated string to search
	 * 
	 * strCharSet:Null-terminated string to search for
	 * 
	 * Returns : String Author : Kevin Zhang History : 1.create 2002-1-8
	 **************************************************************************/
	public static boolean CC_Find(final byte[] string, final byte[] strCharSet) {
		if (string != null && strCharSet != null) {
			int index = strstr(string, strCharSet);
			if (index != -1 && (index % 2 == 1)) {
				return false;
			}
		}
		return true;
	}

	/***************************************************************************
	 * 
	 * Func Name : charType
	 * 
	 * Description: Judge the type of sChar or (sChar,sChar+1)
	 * 
	 * 
	 * Parameters : sFilename: the file name for the output CC List
	 * 
	 * Returns : int : the type of char Author : Kevin Zhang History : 1.create
	 * 2002-1-8
	 **************************************************************************/
	public static int charType(String str) {

		if (str != null && str.length() > 0) {
			byte[] b = str.getBytes();
			byte b1 = b[0];
			byte b2 = b.length > 1 ? b[1] : 0;
			if (getUnsigned(b1) < 128) {
				if ("\"!,.?()[]{}+=".indexOf((char) b1) != -1)
					return CT_DELIMITER;
				return CT_SINGLE;
			} else if (getUnsigned(b1) == 162)
				return CT_INDEX;
			else if (getUnsigned(b1) == 163 && getUnsigned(b2) > 175 && getUnsigned(b2) < 186)
				return CT_NUM;
			else if (getUnsigned(b1) == 163
					&& (getUnsigned(b2) >= 193 && getUnsigned(b2) <= 218 || getUnsigned(b2) >= 225
							&& getUnsigned(b2) <= 250))
				return CT_LETTER;
			else if (getUnsigned(b1) == 161 || getUnsigned(b1) == 163)
				return CT_DELIMITER;
			else if (getUnsigned(b1) >= 176 && getUnsigned(b1) <= 247)
				return CT_CHINESE;

		}
		return CT_OTHER;

	}

	/***************************************************************************
	 * 
	 * Func Name : GetCCPrefix
	 * 
	 * Description: Get the max Prefix string made up of Chinese Char
	 * 
	 * 
	 * Parameters : sSentence: the original sentence which includes Chinese or
	 * Non-Chinese char
	 * 
	 * Returns : the end of the sub-sentence Author : Kevin Zhang History :
	 * 1.create 2002-1-8
	 **************************************************************************/
	public static int getCCPrefix(byte[] sSentence) {
		int nLen = sSentence.length;
		int nCurPos = 0;
		while (nCurPos < nLen && getUnsigned(sSentence[nCurPos]) > 175 && getUnsigned(sSentence[nCurPos]) < 248) {
			nCurPos += 2;// Get next Chinese Char
		}
		return nCurPos;
	}

	/***************************************************************************
	 * 
	 * Func Name : IsAllSingleByte
	 * 
	 * Description: Judge the string is all made up of Single Byte Char
	 * 
	 * 
	 * Parameters : sSentence: the original sentence which includes Chinese or
	 * Non-Chinese char
	 * 
	 * Returns : the end of the sub-sentence Author : Kevin Zhang History :
	 * 1.create 2002-1-24
	 **************************************************************************/
	public static boolean isAllChinese(String str) {
		if (str != null) {

			String temp = str + " ";
			for (int i = 0; i < str.length(); i++) {
				byte[] b = temp.substring(i, i + 1).getBytes();
				if (b.length == 2) {
					if (!(getUnsigned(b[0]) < 248 && getUnsigned(b[0]) > 175)
							|| !(getUnsigned(b[0]) < 253 && getUnsigned(b[0]) > 160))
						return false;
				}
			}

			return true;
		}
		return false;
	}

	/***************************************************************************
	 * 
	 * Func Name : IsAllNonChinese
	 * 
	 * Description: Judge the string is all made up of Single Byte Char
	 * 
	 * 
	 * Parameters : sSentence: the original sentence which includes Chinese or
	 * Non-Chinese char
	 * 
	 * Returns : the end of the sub-sentence Author : Kevin Zhang History :
	 * 1.create 2002-1-24
	 **************************************************************************/
	public static boolean isAllNonChinese(byte[] sString) {
		int nLen = sString.length;
		int i = 0;

		while (i < nLen) {
			if (getUnsigned(sString[i]) < 248 && getUnsigned(sString[i]) > 175)
				return false;
			if (sString[i] < 0)
				i += 2;
			else
				i += 1;
		}
		return true;
	}

	/***************************************************************************
	 * 
	 * Func Name : IsAllSingleByte
	 * 
	 * Description: Judge the string is all made up of Single Byte Char
	 * 
	 * 
	 * Parameters : sSentence: the original sentence which includes Chinese or
	 * Non-Chinese char
	 * 
	 * Returns : the end of the sub-sentence Author : Kevin Zhang History :
	 * 1.create 2002-1-24
	 **************************************************************************/
	public static boolean isAllSingleByte(String str) {
		if (str != null) {
			int len = str.length();
			int i = 0;
			byte[] b = str.getBytes();
			while (i < len && b[i] < 128) {
				i++;
			}
			if (i < len)
				return false;
			return true;
		}
		return false;
	}

	/***************************************************************************
	 * 
	 * Func Name : IsAllNum
	 * 
	 * Description: Judge the string is all made up of Num Char
	 * 
	 * 
	 * Parameters : sSentence: the original sentence which includes Chinese or
	 * Non-Chinese char
	 * 
	 * Returns : the end of the sub-sentence Author : Kevin Zhang History :
	 * 1.create 2002-1-24
	 **************************************************************************/
	public static boolean isAllNum(String str) {

		if (str != null) {
			int i = 0;
			String temp = str + " ";
			// 判斷開頭是否是+-之類的符號
			if ("±+—-+".indexOf(temp.substring(0, 1)) != -1)
				i++;
			/** 如果是全角的0123456789 字符* */
			while (i < str.length() && "0123456789".indexOf(str.substring(i, i + 1)) != -1)
				i++;

			// Get middle delimiter such as .
			if (i < str.length()) {
				String s = str.substring(i, i + 1);
				if ("∶·./".indexOf(s) != -1 || ".".equals(s) || "/".equals(s)) {// 98.1%
					i++;

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
99精品国产视频| 亚洲自拍偷拍网站| 欧美一区二区私人影院日本| 欧洲精品一区二区三区在线观看| 97久久精品人人澡人人爽| 成人性视频网站| 99国产精品久久久| 成人不卡免费av| 在线免费观看日本欧美| 在线日韩一区二区| 欧美日韩激情在线| 日韩免费观看高清完整版在线观看| 日韩一区二区电影网| 亚洲精品一区二区三区蜜桃下载| 久久精品视频免费观看| ...中文天堂在线一区| 亚洲人精品午夜| 日本特黄久久久高潮 | 国产精品综合一区二区| 国产精品1024久久| 色婷婷精品大在线视频| 精品视频资源站| 久久日韩粉嫩一区二区三区| 中文字幕在线不卡一区二区三区| 亚洲精品乱码久久久久久黑人| 一区二区三区欧美激情| 蜜桃视频在线一区| 不卡一卡二卡三乱码免费网站| 欧美性猛交xxxxxx富婆| 精品少妇一区二区三区 | 色乱码一区二区三区88| 欧美日韩激情一区二区三区| 久久午夜免费电影| 一区二区激情视频| 另类中文字幕网| 色94色欧美sute亚洲线路一ni| 日韩一区二区三区观看| 亚洲另类春色校园小说| 久久国产欧美日韩精品| 91亚洲国产成人精品一区二三| 欧美一区二区三区性视频| 国产精品久久久久桃色tv| 日韩电影免费一区| 99久久99久久精品免费看蜜桃| 欧美伦理视频网站| 亚洲视频免费观看| 国产成人精品综合在线观看| 欧美日韩国产在线播放网站| 中文字幕一区二区三区精华液| 婷婷开心激情综合| 91国偷自产一区二区三区成为亚洲经典 | 在线播放日韩导航| 中文字幕在线观看不卡视频| 精品一区二区三区免费播放| 欧美在线综合视频| 18成人在线视频| 国产精品一级在线| 欧美大片免费久久精品三p| 一区二区三区四区在线播放 | 韩日精品视频一区| 91精品国产综合久久久蜜臀粉嫩| 亚洲精选免费视频| 成人黄色大片在线观看| 久久久电影一区二区三区| 日本三级韩国三级欧美三级| 欧美日韩亚洲综合在线| 一区二区三区中文字幕电影 | 极品少妇xxxx精品少妇| 欧美猛男gaygay网站| 亚洲精品高清视频在线观看| caoporm超碰国产精品| 中文字幕av一区 二区| 国产一区二区调教| 国产午夜亚洲精品不卡| 国产在线国偷精品产拍免费yy| 日韩欧美一区二区视频| 青青草原综合久久大伊人精品 | 无吗不卡中文字幕| 欧美日韩综合在线免费观看| 亚洲与欧洲av电影| 欧美日韩视频在线一区二区| 视频一区二区不卡| 日韩一区二区三区在线| 国产一区二区三区综合| 欧美激情一区二区三区在线| 91亚洲国产成人精品一区二区三| 国产精品福利影院| 欧美三区在线视频| 免费精品视频在线| 久久精品夜色噜噜亚洲aⅴ| 风流少妇一区二区| 亚洲精品成人精品456| 欧美理论在线播放| 韩国女主播一区二区三区| 国产午夜亚洲精品不卡| 91久久精品一区二区二区| 香蕉久久一区二区不卡无毒影院| 91精品久久久久久久99蜜桃| 国产在线看一区| 最新成人av在线| 欧美一区二区在线观看| 成人免费三级在线| 亚洲一区二区在线播放相泽| 日韩一级黄色片| www.性欧美| 美女视频黄免费的久久| 欧美国产精品一区二区| 欧美最猛黑人xxxxx猛交| 久久69国产一区二区蜜臀| 国产精品久久久久桃色tv| 777欧美精品| 成人少妇影院yyyy| 日韩精品一区第一页| 亚洲国产精品成人综合色在线婷婷| 欧美综合色免费| 国产经典欧美精品| 日韩成人一级大片| 亚洲免费观看高清完整版在线观看熊| 欧美年轻男男videosbes| 国产成人精品亚洲777人妖| 亚洲成人av资源| 国产精品电影一区二区| 日韩精品一区二区三区在线观看 | 午夜av一区二区| 中文子幕无线码一区tr| 欧美日本国产一区| 99久久国产综合精品色伊| 国内精品久久久久影院色| 亚洲18色成人| 亚洲女厕所小便bbb| 国产欧美日韩另类一区| 91麻豆精品国产91久久久更新时间| 国产 欧美在线| 久久精品噜噜噜成人88aⅴ| 亚洲精品免费播放| 亚洲同性gay激情无套| 日韩欧美123| 日韩一区二区三区精品视频| 欧美视频一区在线| 99国产精品久| 91社区在线播放| 成av人片一区二区| 国产成人综合视频| 国产91丝袜在线观看| 风间由美中文字幕在线看视频国产欧美| 亚洲在线成人精品| 亚洲午夜久久久久中文字幕久| 欧美国产综合色视频| 国产午夜一区二区三区| 久久精品欧美一区二区三区麻豆| 日韩免费高清av| 日韩美女在线视频| 久久久国产午夜精品| 国产午夜精品一区二区三区视频 | 91色乱码一区二区三区| 成人av动漫在线| 99r国产精品| 色婷婷香蕉在线一区二区| 色综合中文字幕| 欧美亚洲动漫精品| 在线成人免费观看| 精品三级av在线| 中文字幕精品综合| 日韩毛片精品高清免费| 亚洲激情图片一区| 午夜不卡av在线| 国内外成人在线视频| 成人av电影在线观看| 欧美日韩综合不卡| 911国产精品| 欧美激情中文不卡| 亚洲一区二三区| 精品一区二区三区在线观看| 豆国产96在线|亚洲| 91国产丝袜在线播放| 欧美一区二区三区播放老司机| 精品国产人成亚洲区| 亚洲视频免费看| 日韩av高清在线观看| 国产成人在线网站| 欧美日韩一区久久| 久久综合九色综合97婷婷女人| 国产精品成人一区二区艾草| 五月激情六月综合| 成人免费看黄yyy456| 欧美猛男男办公室激情| 亚洲精品一区二区三区精华液| 亚洲国产精品99久久久久久久久| 一区二区三区在线观看动漫| 久久国产剧场电影| 在线免费不卡视频| 久久久夜色精品亚洲| 亚洲五码中文字幕| 国产99久久久久| 欧美日韩五月天| 中文字幕视频一区二区三区久| 性久久久久久久久久久久| 国产91丝袜在线18| 精品少妇一区二区三区在线视频| 亚洲欧美精品午睡沙发|