?? unknownseg.java
字號:
package com.gftech.ictclas4j.segment;
import java.util.ArrayList;
import com.gftech.ictclas4j.bean.SegGraph;
import com.gftech.ictclas4j.bean.Span;
import com.gftech.ictclas4j.bean.WordResult;
import com.gftech.ictclas4j.utility.Dictionary;
import com.gftech.ictclas4j.utility.DynamicArray;
import com.gftech.ictclas4j.utility.Utility.TAG_TYPE;
public class UnknownSeg {
Dictionary unDict;
Span roleTag;
int pos;
String unknownFlags;
public UnknownSeg() {
roleTag = new Span();
}
public boolean recognition(ArrayList<WordResult> wrs, DynamicArray graphOpt, Dictionary coreDict) {
int j = 0;
int startPos = 0;
int atomStart = 0;
int atomEnd = 0;
ArrayList<SegGraph> sgs = graphOpt.getSgs();
roleTag.posTagging(wrs, coreDict, unDict);
for (int i = 0; i < roleTag.m_nUnknownIndex; i++) {
while (j < sgs.size() && startPos < roleTag.m_nUnknownWords[i][0]) {
startPos += sgs.get(j++).getLen();
}
atomStart = j;
while (j < sgs.size() && startPos < roleTag.m_nUnknownWords[i][1])
startPos += sgs.get(j++).getLen();
atomEnd = j;
if (atomStart < atomEnd) {
SegGraph sg = graphOpt.getElement(atomStart, atomEnd);
if (sg != null && sg.getValue() > roleTag.m_dWordsPossibility[i]) {
SegGraph sg2 = new SegGraph();
sg2.setRow(atomStart);
sg2.setCol(atomEnd);
sg2.setValue(roleTag.m_dWordsPossibility[i]);
sg2.setPos(pos);
sg2.setWord(unknownFlags);
graphOpt.setElement(sg2);
}
}
}
return true;
}
public boolean configure(String fileName, TAG_TYPE type) {
if (fileName != null) {
unDict = new Dictionary();
unDict.load(fileName + ".dct");
roleTag = new Span();
roleTag.loadContext(fileName + ".ctx");
roleTag.setType(type);
switch (type) {
case TT_PERSON:
// Set the special flag for transliterations
case TT_TRANS_PERSON:
pos = -28274;// -'n'*256-'r';
unknownFlags = "未##人";
break;
case TT_PLACE:
pos = -28275;// -'n'*256-'s';
unknownFlags = "未##地";
break;
default:
pos = 0;
break;
}
return true;
}
return false;
}
// Judge whether the name is a given name
public boolean isGivenName(String sName) {
String firstChar;
String secondChar;
// given Name Possibility
double gnp = 0;
// singleNamePossibility
double snp = 0;
if (sName != null) {
if (sName.getBytes().length != 4)
return false;
firstChar = sName.substring(0, 1);
secondChar = sName.substring(1);
// The possibility of P(Wi|Ti)
gnp += Math.log((double) unDict.getFrequency(firstChar, 2) + 1.0);
gnp -= Math.log(roleTag.context.getFrequency(0, 2) + 1.0);
gnp += Math.log((double) unDict.getFrequency(secondChar, 3) + 1.0);
gnp -= Math.log(roleTag.context.getFrequency(0, 3) + 1.0);
// The possibility of conversion from 2 to 3
gnp += Math.log(roleTag.context.getContextPossibility(0, 2, 3) + 1.0);
gnp -= Math.log(roleTag.context.getFrequency(0, 2) + 1.0);
// The possibility of P(Wi|Ti)
snp += Math.log((double) unDict.getFrequency(firstChar, 1) + 1.0);
snp -= Math.log(roleTag.context.getFrequency(0, 1) + 1.0);
snp += Math.log((double) unDict.getFrequency(secondChar, 4) + 1.0);
snp -= Math.log(roleTag.context.getFrequency(0, 4) + 1.0);
// The possibility of conversion from 1 to 4
snp += Math.log(roleTag.context.getContextPossibility(0, 1, 4) + 1.0);
snp -= Math.log(roleTag.context.getFrequency(0, 1) + 1.0);
// 張震||m_dict.getFrequency(sFirstChar,1)/m_dict.getFrequency(sFirstChar,2)>=10
// The possibility being a single given name is more than being a
// 2-char given name
if (snp >= gnp)
return false;
return true;
}
return false;
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -