?? tfidfsmoother.java
字號:
package dragon.ir.search.smooth;import dragon.ir.index.*;import dragon.ir.query.SimpleTermPredicate;/** * <p>TF-IDF Smoother for vector space model</p> * <p></p> * <p>Copyright: Copyright (c) 2005</p> * <p>Company: IST, Drexel University</p> * @author Davis Zhou * @version 1.0 */public class TFIDFSmoother extends AbstractSmoother{ private int docNum; private double curTermIDF, curDocLengthRatio; private double avgDocLength; private double bm25k1, bm25b; private double param1, param2; private boolean useBM25; public TFIDFSmoother(IRCollection collection) { docNum=collection.getDocNum(); this.useLog=false; this.docFirstOptimal=true; this.querytermFirstOptimal=true; this.useBM25 =false; } public TFIDFSmoother(IRCollection collection, double bm25k1, double bm25b) { docNum=collection.getDocNum(); avgDocLength=collection.getTermCount()*1.0/docNum; this.bm25b=bm25b; this.bm25k1 =bm25k1; this.useBM25 =true; this.useLog=false; this.docFirstOptimal=true; this.querytermFirstOptimal=true; param1=bm25k1*(1-bm25b); param2=bm25k1*bm25b; } public boolean setParameters(double[] params){ if(params!=null && params.length>=2) { this.bm25k1 =params[0]; this.bm25b =params[1]; param1=bm25k1*(1-bm25b); param2=bm25k1*bm25b; return true; } else return false; } public void setQueryTerm(SimpleTermPredicate queryTerm){ this.queryWeight =queryTerm.getWeight(); curTermIDF=Math.log((1+docNum)/(0.5+queryTerm.getDocFrequency())); } public void setDoc(IRDoc doc){ if(useBM25) curDocLengthRatio=param2*doc.getTermCount()/avgDocLength; } protected double computeSmoothedProb(int termFrequency){ if(useBM25) return queryWeight*termFrequency*curTermIDF/(param1+curDocLengthRatio+termFrequency); else return queryWeight*termFrequency*curTermIDF; }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -