?? chinese.java
字號:
package net.paoding.analysis.examples.gettingstarted.ch3;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import net.paoding.analysis.examples.gettingstarted.BoldFormatter;
import net.paoding.analysis.examples.gettingstarted.ContentReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
public class Chinese {
private static String FIELD_NAME = "content";
private static String QUERY = "崇武古城";
public static void main(String[] args) throws Exception {
if (args.length != 0) {
QUERY = args[0];
}
// 將庖丁封裝成符合Lucene要求的Analyzer規(guī)范
Analyzer analyzer = new PaodingAnalyzer();
//讀取本類目錄下的text.txt文件
String content = ContentReader.readText(Chinese.class);
//接下來是標(biāo)準(zhǔn)的Lucene建立索引和檢索的代碼
Directory ramDir = new RAMDirectory();
IndexWriter writer = new IndexWriter(ramDir, analyzer);
Document doc = new Document();
Field fd = new Field(FIELD_NAME, content, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(fd);
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(ramDir);
String queryString = QUERY;
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
Query query = parser.parse(queryString);
Searcher searcher = new IndexSearcher(ramDir);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Hits hits = searcher.search(query);
BoldFormatter formatter = new BoldFormatter();
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(
query));
highlighter.setTextFragmenter(new SimpleFragmenter(50));
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
int maxNumFragmentsRequired = 5;
String fragmentSeparator = "...";
TermPositionVector tpv = (TermPositionVector) reader
.getTermFreqVector(hits.id(i), FIELD_NAME);
TokenStream tokenStream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired, fragmentSeparator);
System.out.println("\n" + result);
}
reader.close();
}
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -