?? sentenceseg.java
字號:
package com.gftech.ictclas4j.segment;
import java.util.ArrayList;
import com.gftech.ictclas4j.bean.Sentence;
import com.gftech.ictclas4j.utility.Utility;
import com.gftech.util.GFString;
public class SentenceSeg {
private String src;
private ArrayList<Sentence> sens;
public SentenceSeg(String src){
this.src=src;
sens=sentenceSplit();
}
/**
* 進行句子分隔
*
* @param src
* @return
*/
private ArrayList<Sentence> sentenceSplit( ) {
ArrayList<Sentence> result = null;
if (src != null) {
result = new ArrayList<Sentence>();
String s1 = Utility.SENTENCE_BEGIN;
String[] ss = GFString.atomSplit(src);
for (int i = 0; i < ss.length; i++) {
// 如果是分隔符,比如回車換行/逗號等
if (Utility.SEPERATOR_C_SENTENCE.indexOf(ss[i]) != -1
|| Utility.SEPERATOR_LINK.indexOf(ss[i]) != -1
|| Utility.SEPERATOR_C_SUB_SENTENCE.indexOf(ss[i]) != -1
|| Utility.SEPERATOR_E_SUB_SENTENCE.indexOf(ss[i]) != -1) {
// 如果不是回車換行和空格
if (Utility.SEPERATOR_LINK.indexOf(ss[i]) == -1)
s1 += ss[i];
// 斷句
if (s1.length() > 0 && !Utility.SENTENCE_BEGIN.equals(s1)) {
if (Utility.SEPERATOR_C_SUB_SENTENCE.indexOf(ss[i]) == -1
&& Utility.SEPERATOR_E_SUB_SENTENCE
.indexOf(ss[i]) == -1)
s1 += Utility.SENTENCE_END;
result.add(new Sentence(s1, true));
s1 = "";
}
// 是回車換行符或空格,則不需要進行分析處理
if (Utility.SEPERATOR_LINK.indexOf(ss[i]) != -1) {
result.add(new Sentence(ss[i]));
s1 = Utility.SENTENCE_BEGIN;
} else if (Utility.SEPERATOR_C_SENTENCE.indexOf(ss[i]) != -1
|| Utility.SEPERATOR_E_SENTENCE.indexOf(ss[i]) != -1)
s1 = Utility.SENTENCE_BEGIN;
else
s1 = ss[i];
} else
s1 += ss[i];
}
if (s1.length() > 0 && !Utility.SENTENCE_BEGIN.equals(s1)) {
s1 += Utility.SENTENCE_END;
result.add(new Sentence(s1, true));
}
}
return result;
}
public ArrayList<Sentence> getSens() {
return sens;
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -