亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關(guān)于我們
? 蟲蟲下載站

?? filedictionaries.java

?? 對Lcuene的良好的封裝,提供了中文分詞字典 功能強大
?? JAVA
字號:
/**
 * Copyright 2007 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.paoding.analysis.knife;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import net.paoding.analysis.dictionary.BinaryDictionary;
import net.paoding.analysis.dictionary.Dictionary;
import net.paoding.analysis.dictionary.HashBinaryDictionary;
import net.paoding.analysis.dictionary.Hit;
import net.paoding.analysis.dictionary.Word;
import net.paoding.analysis.dictionary.support.detection.Detector;
import net.paoding.analysis.dictionary.support.detection.DifferenceListener;
import net.paoding.analysis.dictionary.support.detection.ExtensionFileFilter;
import net.paoding.analysis.dictionary.support.filewords.FileWordsReader;
import net.paoding.analysis.exception.PaodingAnalysisException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * 中文字典緩存根據(jù)地,為{@link CJKKnife}所用。<br>
 * 從本對象可以獲取中文需要的相關(guān)字典。包括詞匯表、姓氏表、計量單位表、忽略的詞或單字等。
 * <p>
 * 
 * @author Zhiliang Wang [qieqie.wang@gmail.com]
 * 
 * @see CJKKnife
 * 
 * @since 1.0
 */
public class FileDictionaries implements Dictionaries {

	// -------------------------------------------------

	protected Log log = LogFactory.getLog(this.getClass());

	// -------------------------------------------------

	/**
	 * 詞匯表字典
	 */
	protected Dictionary vocabularyDictionary;

	/**
	 * lantin+cjk的詞典
	 */
	protected Dictionary combinatoricsDictionary;

	/**
	 * 姓氏字典
	 * 
	 */
	protected Dictionary confucianFamilyNamesDictionary;

	/**
	 * 忽略的單字
	 */
	protected Dictionary noiseCharactorsDictionary;

	/**
	 * 忽略的詞語
	 * 
	 */
	protected Dictionary noiseWordsDictionary;

	/**
	 * 計量單位
	 */
	protected Dictionary unitsDictionary;

	// -------------------------------------------------

	protected Map/* <String, Set<String>> */allWords;

	protected String dicHome;
	protected String skipPrefix;
	protected String noiseCharactor;
	protected String noiseWord;
	protected String unit;
	protected String confucianFamilyName;
	protected String combinatorics;
	protected String charsetName;

	// ----------------------

	public FileDictionaries() {
	}

	public FileDictionaries(String dicHome, String skipPrefix,
			String noiseCharactor, String noiseWord, String unit,
			String confucianFamilyName, String combinatorics, String charsetName) {
		this.dicHome = dicHome;
		this.skipPrefix = skipPrefix;
		this.noiseCharactor = noiseCharactor;
		this.noiseWord = noiseWord;
		this.unit = unit;
		this.confucianFamilyName = confucianFamilyName;
		this.combinatorics = combinatorics;
		this.charsetName = charsetName;

	}

	public String getDicHome() {
		return dicHome;
	}

	public void setDicHome(String dicHome) {
		this.dicHome = dicHome;
	}

	public String getSkipPrefix() {
		return skipPrefix;
	}

	public void setSkipPrefix(String skipPrefix) {
		this.skipPrefix = skipPrefix;
	}

	public String getNoiseCharactor() {
		return noiseCharactor;
	}

	public void setNoiseCharactor(String noiseCharactor) {
		this.noiseCharactor = noiseCharactor;
	}

	public String getNoiseWord() {
		return noiseWord;
	}

	public void setNoiseWord(String noiseWord) {
		this.noiseWord = noiseWord;
	}

	public String getUnit() {
		return unit;
	}

	public void setUnit(String unit) {
		this.unit = unit;
	}

	public String getConfucianFamilyName() {
		return confucianFamilyName;
	}

	public void setConfucianFamilyName(String confucianFamilyName) {
		this.confucianFamilyName = confucianFamilyName;
	}

	public String getCharsetName() {
		return charsetName;
	}

	public void setCharsetName(String charsetName) {
		this.charsetName = charsetName;
	}

	public void setLantinFllowedByCjk(String lantinFllowedByCjk) {
		this.combinatorics = lantinFllowedByCjk;
	}

	public String getLantinFllowedByCjk() {
		return combinatorics;
	}

	// -------------------------------------------------

	/**
	 * 詞匯表字典
	 * 
	 * @return
	 */
	public synchronized Dictionary getVocabularyDictionary() {
		if (vocabularyDictionary == null) {
			// 大概有5639個字有詞語,故取0x2fff=x^13>8000>8000*0.75=6000>5639
			vocabularyDictionary = new HashBinaryDictionary(
					getVocabularyWords(), 0x2fff, 0.75f);
			Dictionary noiseWordsDic = getNoiseWordsDictionary();
			for (int i = 0; i < noiseWordsDic.size(); i++) {
				Hit hit = vocabularyDictionary.search(noiseWordsDic.get(i), 0, noiseWordsDic.get(i).length());
				if (hit.isHit()) {
					hit.getWord().setNoiseWord();
				}
			}
			Dictionary noiseCharactorsDic = getNoiseCharactorsDictionary();
			for (int i = 0; i < noiseCharactorsDic.size(); i++) {
				Hit hit = vocabularyDictionary.search(noiseCharactorsDic.get(i), 0, noiseCharactorsDic.get(i).length());
				if (hit.isHit()) {
					hit.getWord().setNoiseCharactor();
				}
			}
			
		}
		return vocabularyDictionary;
	}

	/**
	 * 姓氏字典
	 * 
	 * @return
	 */
	public synchronized Dictionary getConfucianFamilyNamesDictionary() {
		if (confucianFamilyNamesDictionary == null) {
			confucianFamilyNamesDictionary = new BinaryDictionary(
					getConfucianFamilyNames());
		}
		return confucianFamilyNamesDictionary;
	}

	/**
	 * 忽略的詞語
	 * 
	 * @return
	 */
	public synchronized Dictionary getNoiseCharactorsDictionary() {
		if (noiseCharactorsDictionary == null) {
			noiseCharactorsDictionary = new HashBinaryDictionary(
					getNoiseCharactors(), 256, 0.75f);
		}
		return noiseCharactorsDictionary;
	}

	/**
	 * 忽略的單字
	 * 
	 * @return
	 */
	public synchronized Dictionary getNoiseWordsDictionary() {
		if (noiseWordsDictionary == null) {
			noiseWordsDictionary = new BinaryDictionary(getNoiseWords());
		}
		return noiseWordsDictionary;
	}

	/**
	 * 計量單位
	 * 
	 * @return
	 */
	public synchronized Dictionary getUnitsDictionary() {
		if (unitsDictionary == null) {
			unitsDictionary = new HashBinaryDictionary(getUnits(), 1024, 0.75f);
		}
		return unitsDictionary;
	}

	public synchronized Dictionary getCombinatoricsDictionary() {
		if (combinatoricsDictionary == null) {
			combinatoricsDictionary = new BinaryDictionary(
					getCombinatoricsWords());
		}
		return combinatoricsDictionary;
	}

	
	private Detector detector;
	
	public synchronized void startDetecting(int interval, DifferenceListener l) {
		if (detector != null || interval < 0) {
			return;
		}
		Detector detector = new Detector();
		detector.setHome(dicHome);
		detector.setFilter(new ExtensionFileFilter(".dic"));
		detector.setLastSnapshot(detector.flash());
		detector.setListener(l);
		detector.setInterval(interval);
		detector.start(true);
		this.detector = detector;
	}


	public synchronized void stopDetecting() {
		if (detector == null) {
			return;
		}
		detector.setStop();
		detector = null;
	}
	
	/**
	 * 
	 * @param dicName
	 */
	protected synchronized void refreshDicWords(String dicPath) {
		int index = dicPath.lastIndexOf(".dic");
		String dicName = dicPath.substring(0, index);
		if (allWords != null) {
			try {
				Map/* <String, Set<String>> */temp = FileWordsReader
						.readWords(dicHome + dicPath, charsetName);
				allWords.put(dicName, temp.values().iterator().next());
			} catch (FileNotFoundException e) {
				// 如果源文件已經(jīng)被刪除了,則表示該字典不要了
				allWords.remove(dicName);
			} catch (IOException e) {
				throw toRuntimeException(e);
			}
			if (!isSkipForVacabulary(dicName)) {
				this.vocabularyDictionary = null;
			}
			// 如果來的是noiseWord
			if (isNoiseWordDicFile(dicName)) {
				this.noiseWordsDictionary = null;
				// noiseWord和vocabulary有關(guān),所以需要更新vocabulary
				this.vocabularyDictionary = null;
			}
			// 如果來的是noiseCharactors
			else if (isNoiseCharactorDicFile(dicName)) {
				this.noiseCharactorsDictionary = null;
				// noiseCharactorsDictionary和vocabulary有關(guān),所以需要更新vocabulary
				this.vocabularyDictionary = null;
			}
			// 如果來的是單元
			else if (isUnitDicFile(dicName)) {
				this.unitsDictionary = null;
			}
			// 如果來的是亞洲人人姓氏
			else if (isConfucianFamilyNameDicFile(dicName)) {
				this.confucianFamilyNamesDictionary = null;
			}
			// 如果來的是以字母,數(shù)字等組合類語言為開頭的詞匯
			else if (isLantinFollowedByCjkDicFile(dicName)) {
				this.combinatoricsDictionary = null;
			}
		}
	}

	// ---------------------------------------------------------------
	// 以下為輔助性的方式-類私有或package私有

	protected Word[] getVocabularyWords() {
		Map/* <String, Set<Word>> */dics = loadAllWordsIfNecessary();
		Set/* <Word> */set = null;
		Iterator/* <Word> */iter = dics.keySet().iterator();
		while (iter.hasNext()) {
			String name = (String) iter.next();
			if (isSkipForVacabulary(name)) {
				continue;
			}
			Set/* <Word> */dic = (Set/* <Word> */) dics.get(name);
			if (set == null) {
				set = new HashSet/* <Word> */(dic);
			} else {
				set.addAll(dic);
			}
		}
		Word[] words = (Word[]) set.toArray(new Word[set.size()]);
		Arrays.sort(words);
		return words;
	}

	protected Word[] getConfucianFamilyNames() {
		return getDictionaryWords(confucianFamilyName);
	}

	protected Word[] getNoiseWords() {
		return getDictionaryWords(noiseWord);
	}

	protected Word[] getNoiseCharactors() {
		return getDictionaryWords(noiseCharactor);
	}

	protected Word[] getUnits() {
		return getDictionaryWords(unit);
	}

	protected Word[] getCombinatoricsWords() {
		return getDictionaryWords(combinatorics);
	}

	protected Word[] getDictionaryWords(String dicNameRelativeDicHome) {
		Map dics;
		try {
			dics = FileWordsReader.readWords(dicHome + "/"
					+ dicNameRelativeDicHome + ".dic", charsetName);
		} catch (IOException e) {
			throw toRuntimeException(e);
		}
		Set/* <Word> */set = (Set/* <Word> */) dics.get(dicNameRelativeDicHome);
		Word[] words = (Word[]) set.toArray(new Word[set.size()]);
		Arrays.sort(words);
		return words;
	}

	// -------------------------------------

	/**
	 * 讀取字典安裝目錄及子孫目錄下的字典文件;并以該字典相對安裝目錄的路徑(包括該字典的文件名,但不包括擴展名)作為key。
	 * 比如,如果字典安裝在dic目錄下,該目錄下有division/china.dic,則該字典文件對應(yīng)的key是"division/china"
	 */
	protected synchronized Map/* <String, Set<String>> */loadAllWordsIfNecessary() {
		if (allWords == null) {
			try {
				log.info("loading dictionaries from " + dicHome);
				allWords = FileWordsReader.readWords(dicHome, charsetName);
				if (allWords.size() == 0) {
					String message = "Not found any dictionary files, have you set the 'paoding.dic.home' right? ("
							+ this.dicHome + ")";
					log.error(message);
					throw new PaodingAnalysisException(message);
				}
				log.info("loaded success!");
			} catch (IOException e) {
				throw toRuntimeException(e);
			}
		}
		return allWords;
	}

	// ---------------------------------------

	protected final boolean isSkipForVacabulary(String dicNameRelativeDicHome) {
		return dicNameRelativeDicHome.startsWith(skipPrefix)
				|| dicNameRelativeDicHome.indexOf("/" + skipPrefix) != -1;
	}

	protected boolean isUnitDicFile(String dicName) {
		return dicName.equals(this.unit);
	}

	protected boolean isNoiseCharactorDicFile(String dicName) {
		return dicName.equals(this.noiseCharactor);
	}

	protected boolean isNoiseWordDicFile(String dicName) {
		return dicName.equals(this.noiseWord);
	}

	protected boolean isConfucianFamilyNameDicFile(String dicName) {
		return dicName.equals(this.confucianFamilyName);
	}

	protected boolean isLantinFollowedByCjkDicFile(String dicName) {
		return dicName.equals(this.combinatorics);
	}

	// --------------------------------------

	protected RuntimeException toRuntimeException(IOException e) {
		return new PaodingAnalysisException(e);
	}
}

?? 快捷鍵說明

復(fù)制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
国产精品女人毛片| 精品一区二区三区免费| 成人av在线看| 中文字幕中文字幕一区二区 | 久久精子c满五个校花| 国产一区二三区好的| 久久九九影视网| 99久久99久久综合| 亚洲国产中文字幕| 制服丝袜av成人在线看| 看电视剧不卡顿的网站| 久久久久久一二三区| 丁香亚洲综合激情啪啪综合| 中文字幕在线观看不卡视频| 色婷婷av一区二区三区大白胸| 亚洲一区二区三区美女| 91精品免费观看| 国产精品66部| 亚洲精品欧美激情| 欧美一区二区三区小说| 成人免费视频视频| 亚洲一区二区av在线| 精品成人免费观看| 不卡一区中文字幕| 亚洲成人久久影院| 国产午夜精品久久久久久免费视| 色先锋资源久久综合| 欧美a一区二区| 国产精品区一区二区三区| 在线看国产一区二区| 国产在线不卡一区| 一区二区在线观看免费| 精品免费日韩av| 91免费国产在线| 青青草伊人久久| 中文字幕在线不卡国产视频| 日韩视频永久免费| 91美女片黄在线观看91美女| 麻豆精品一区二区av白丝在线| 国产女主播视频一区二区| 在线一区二区观看| 国产盗摄一区二区| 日本亚洲免费观看| 一区二区三区四区不卡在线| 久久日韩粉嫩一区二区三区| 欧美精品丝袜中出| 91丨porny丨国产入口| 国产美女一区二区三区| 日韩经典中文字幕一区| 亚洲欧美精品午睡沙发| 国产网站一区二区| 日韩欧美精品在线视频| 欧美日韩中文字幕精品| 99re这里只有精品首页| 国产福利电影一区二区三区| 美女脱光内衣内裤视频久久网站 | 成人看片黄a免费看在线| 日本视频一区二区| 一区二区三区精品| 欧美国产精品中文字幕| 日韩欧美视频一区| 欧美在线观看一区二区| 午夜精品一区二区三区免费视频| 一区精品在线播放| xf在线a精品一区二区视频网站| 色综合久久66| 国产99精品在线观看| 美日韩黄色大片| 亚洲高清免费视频| 亚洲免费观看视频| 国产精品久久久久久妇女6080 | 国产人妖乱国产精品人妖| 日韩亚洲欧美成人一区| 欧美视频一区二区在线观看| 成人国产一区二区三区精品| 免费在线视频一区| 午夜视频在线观看一区二区| 亚洲天堂a在线| 国产精品嫩草影院av蜜臀| 日韩精品一区二区三区蜜臀| 91麻豆精品国产无毒不卡在线观看| 精品一区二区免费视频| 久久99久久精品| 日韩二区三区四区| 视频一区二区欧美| 亚洲综合免费观看高清完整版在线| 欧美国产1区2区| 国产日产欧产精品推荐色 | 丰满亚洲少妇av| 国产精品一区二区三区99| 精品一区二区三区免费毛片爱| 午夜精品福利久久久| 亚洲国产毛片aaaaa无费看| 日韩毛片精品高清免费| 亚洲欧洲成人自拍| 亚洲视频在线一区二区| 自拍偷拍欧美精品| 亚洲精品国产a久久久久久| 亚洲精品乱码久久久久久日本蜜臀| 亚洲女与黑人做爰| 一区二区三区久久| 调教+趴+乳夹+国产+精品| 日韩精品一二三四| 蜜臀av性久久久久蜜臀aⅴ四虎| 蜜乳av一区二区三区| 亚洲精品国产无天堂网2021| 免费成人结看片| 狠狠色综合播放一区二区| 国产成人在线免费观看| 成人av在线资源| 欧洲av一区二区嗯嗯嗯啊| 欧美情侣在线播放| 日韩精品一区二区在线观看| 欧美日韩一区二区三区四区五区| 欧美日本免费一区二区三区| 日韩一区二区精品葵司在线| 久久久久久毛片| 亚洲人被黑人高潮完整版| 亚洲成人一区二区| 久久99精品久久久久久国产越南| 国产一区二区三区| 不卡视频免费播放| 色综合婷婷久久| 精品国产乱码久久久久久1区2区| 国产日本欧美一区二区| 亚洲天堂av老司机| 日本v片在线高清不卡在线观看| 激情另类小说区图片区视频区| 国产成人自拍网| 欧美日韩亚洲综合在线 欧美亚洲特黄一级| 欧美日精品一区视频| 91精品欧美一区二区三区综合在 | 丝袜亚洲另类欧美| 国产伦精一区二区三区| 在线欧美日韩国产| 精品日本一线二线三线不卡| 中文字幕一区二区三区四区不卡| 国产精品国产自产拍在线| 久久精品国产色蜜蜜麻豆| 91日韩精品一区| 欧美精品一区二区三区四区| 亚洲视频一区二区免费在线观看| 奇米精品一区二区三区在线观看 | 国产精品影视在线| 欧美视频一区二区三区在线观看| 久久影音资源网| 亚洲国产裸拍裸体视频在线观看乱了| 国产尤物一区二区| 欧美区一区二区三区| 国产精品久久福利| 国产精品亚洲综合一区在线观看| 欧美日韩一区精品| 日韩伦理免费电影| 精久久久久久久久久久| 99久久精品国产毛片| 精品久久久久av影院| 午夜私人影院久久久久| 91污片在线观看| 久久久久国产精品麻豆ai换脸| 婷婷综合在线观看| 91日韩一区二区三区| 久久九九国产精品| 久久精品国产99| 欧美日本国产一区| 亚洲一区二三区| 99久久精品国产观看| 国产精品日日摸夜夜摸av| 亚洲国产一区二区三区青草影视| 99在线精品观看| 亚洲国产高清aⅴ视频| 国产一区二区三区四区在线观看| 91精品国产全国免费观看| 亚洲线精品一区二区三区八戒| 不卡欧美aaaaa| 国产精品欧美一级免费| 免费人成在线不卡| 精品精品欲导航| 日韩精品久久理论片| 欧美日韩大陆一区二区| 一个色妞综合视频在线观看| a级精品国产片在线观看| 国产精品天天看| 成人午夜激情影院| 亚洲日本电影在线| 91在线视频观看| 亚洲免费视频成人| 欧美中文字幕久久| 午夜电影网亚洲视频| 精品视频资源站| 亚洲色大成网站www久久九九| 成人精品一区二区三区中文字幕| 欧美激情一区二区| 99精品欧美一区二区三区小说 | 首页亚洲欧美制服丝腿| 欧美日韩久久不卡| 日本不卡一二三| 久久九九影视网| 国产一区二区三区美女| 亚洲天堂免费在线观看视频| 色综合久久66|