?? matchnamerule.cs
字號:
?using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
namespace KTDictSeg
{
/// <summary>
/// 匹配姓名
/// </summary>
class MatchName : IRule
{
PosBinRule m_PosBinRule;
CPOS m_Pos;
/// <summary>
/// 沒有明顯歧異的姓氏
/// </summary>
static string[] FAMILY_NAMES = {
//有明顯歧異的姓氏
"王","張","黃","周","徐",
"胡","高","林","馬","于",
"程","傅","曾","葉","余",
"夏","鐘","田","任","方",
"石","熊","白","毛","江",
"史","候","龍","萬","段",
"雷","錢","湯","易","常",
"武","賴","文", "查",
//沒有明顯歧異的姓氏
"趙", "肖", "孫", "李",
"吳", "鄭", "馮", "陳",
"褚", "衛", "蔣", "沈",
"韓", "楊", "朱", "秦",
"尤", "許", "何", "呂",
"施", "桓", "孔", "曹",
"嚴", "華", "金", "魏",
"陶", "姜", "戚", "謝",
"鄒", "喻", "柏", "竇",
"蘇", "潘", "葛", "奚",
"范", "彭", "魯", "韋",
"昌", "俞", "袁", "酆",
"鮑", "唐", "費", "廉",
"岑", "薛", "賀", "倪",
"滕", "殷", "羅", "畢",
"郝", "鄔", "卞", "康",
"卜", "顧", "孟", "穆",
"蕭", "尹", "姚", "邵",
"湛", "汪", "祁", "禹",
"狄", "貝", "臧", "伏",
"戴", "宋", "茅", "龐",
"紀", "舒", "屈", "祝",
"董", "梁", "杜", "阮",
"閔", "賈", "婁", "顏",
"郭", "邱", "駱", "蔡",
"樊", "凌", "霍", "虞",
"柯", "昝", "盧", "柯",
"繆", "宗", "丁", "賁",
"鄧", "郁", "杭", "洪",
"崔", "龔", "嵇", "邢",
"滑", "裴", "陸", "榮",
"荀", "惠", "甄", "芮",
"羿", "儲", "靳", "汲",
"邴", "糜", "隗", "侯",
"宓", "蓬", "郗", "仲",
"欒", "鈄", "歷", "戎",
"劉", "詹", "幸", "韶",
"郜", "黎", "薊", "溥",
"蒲", "邰", "鄂", "咸",
"卓", "藺", "屠", "喬",
"郁", "胥", "蒼", "莘",
"翟", "譚", "貢", "勞",
"冉", "酈", "雍", "璩",
"桑", "桂", "濮", "扈",
"冀", "浦", "莊", "晏",
"瞿", "閻", "慕", "茹",
"習", "宦", "艾", "容",
"慎", "戈", "廖", "庾",
"衡", "耿", "弘", "匡",
"闕", "殳", "沃", "蔚",
"夔", "隆", "鞏", "聶",
"晁", "敖", "融", "訾",
"辛", "闞", "毋", "乜",
"鞠", "豐", "蒯", "荊",
"竺", "盍", "萬俟",
"司馬", "上官", "歐陽",
"夏侯", "諸葛", "聞人",
"東方", "赫連", "皇甫",
"尉遲", "公羊", "澹臺",
"公冶", "宗政", "濮陽",
"淳于", "單于", "太叔",
"申屠", "公孫", "仲孫",
"軒轅", "令狐", "徐離",
"宇文", "長孫", "慕容",
"司徒", "司空"};
static Hashtable m_FamilyNameTbl;
public MatchName(CPOS pos)
{
m_PosBinRule = new PosBinRule(pos);
m_Pos = pos;
m_FamilyNameTbl = new Hashtable();
foreach (String familyName in FAMILY_NAMES)
{
m_FamilyNameTbl[familyName] = true;
}
}
/// <summary>
/// 是否是中文名字
/// </summary>
/// <param name="familyName">姓</param>
/// <param name="firstName">名</param>
/// <returns>是返回true</returns>
static public bool IsChineseName(String familyName, String firstName)
{
if (firstName.Length > 2 || familyName.Length > 2)
{
return false;
}
return m_FamilyNameTbl[familyName] != null;
}
#region IRule 成員
/// <summary>
/// 匹配姓位于單詞首部的情況
/// </summary>
/// <param name="preWords"></param>
/// <param name="index"></param>
/// <param name="retWords"></param>
/// <returns></returns>
private int MatchFamilyNameInHead(ArrayList preWords, int index, ArrayList retWords)
{
String curWord = (String)preWords[index];
if (index >= preWords.Count - 1)
{
return -2;
}
if (curWord.Length > 2)
{
return -1;
}
String nextWord = (String)preWords[index + 1];
if (curWord[0] < 0x4e00 || curWord[0] > 0x9fa5)
{
//不是漢字
return -2;
}
if (nextWord[0] < 0x4e00 || nextWord[0] > 0x9fa5)
{
//不是漢字
return -2;
}
if (m_PosBinRule.Match(curWord, nextWord))
{
return -2;
}
String familyName;
if (curWord.Length == 1)
{
if (m_FamilyNameTbl[curWord] == null)
{
return -1;
}
else
{
familyName = curWord;
}
}
else
{
if (m_FamilyNameTbl[curWord] == null)
{
if (m_FamilyNameTbl[curWord[0].ToString()] == null)
{
return -1;
}
else
{
familyName = curWord[0].ToString();
}
}
else
{
familyName = curWord;
}
}
String name = curWord + nextWord;
if (name.Length - familyName.Length == 1)
{
//單字名 還要嘗試是否是雙字名
if (index < preWords.Count - 2)
{
String nnext = (String)preWords[index + 2];
if (nnext.Length == 1)
{
if (!m_PosBinRule.MatchNameInHead(nnext))
{
name += nnext;
retWords.Add(name);
return index + 3;
}
}
}
}
else if (name.Length - familyName.Length > 2)
{
String nnext = nextWord;
if (nnext.Length > 1)
{
if (m_PosBinRule.MatchNameInHead(nnext.Substring(1, nnext.Length-1)))
{
name = curWord + nnext[0].ToString();
preWords.Insert(index +2, nnext.Substring(1, nnext.Length - 1));
retWords.Add(name);
return index + 2;
}
}
}
retWords.Add(name);
return index + 2;
}
/// <summary>
/// 匹配姓位于單詞尾部的情況
/// </summary>
/// <param name="preWords"></param>
/// <param name="index"></param>
/// <param name="retWords"></param>
/// <returns></returns>
private int MatchFamilyNameInTail(ArrayList preWords, int index, ArrayList retWords)
{
if (retWords.Count < 1)
{
return -1;
}
String curWord = (String)retWords[retWords.Count-1];
if (curWord.Length < 2)
{
return -1;
}
String nextWord = (String)preWords[index];
if (nextWord.Length > 2)
{
return -1;
}
String familyName;
//單姓
familyName = curWord[curWord.Length - 1].ToString();
if (m_FamilyNameTbl[familyName] == null)
{
familyName = curWord.Substring(curWord.Length-2, 2);
if (m_FamilyNameTbl[familyName] == null)
{
return -1;
}
}
String remain = curWord.Substring(0, curWord.Length - familyName.Length);
if (retWords.Count > 0)
{
//重新組合前面的詞,并判斷詞性匹配
String newWord = null;
bool isReg;
if (retWords.Count > 1)
{
newWord = retWords[retWords.Count - 2] + remain;
m_Pos.GetPos(newWord, out isReg);
if (!isReg)
{
newWord = null;
}
else
{
if (!m_PosBinRule.MatchNameInTail(newWord))
{
newWord = null;
}
}
if (newWord != null)
{
retWords.Remove(retWords.Count - 1);
retWords.Remove(retWords.Count - 1);
}
}
if (newWord == null)
{
newWord = remain;
m_Pos.GetPos(newWord, out isReg);
if (!isReg)
{
if (retWords.Count > 1)
{
newWord = retWords[retWords.Count - 2] + remain;
retWords.RemoveAt(retWords.Count - 1);
}
}
else
{
if (!m_PosBinRule.MatchNameInTail(newWord))
{
newWord = null;
}
}
if (newWord != null)
{
retWords.RemoveAt(retWords.Count - 1);
}
}
if (newWord != null)
{
retWords.Add(newWord);
}
else
{
return -1;
}
}
String name = familyName + nextWord;
if (name.Length - familyName.Length == 1)
{
//單字名 還要嘗試是否是雙字名
if (index < preWords.Count - 1)
{
String nnext = name + (String)preWords[index + 1];
nnext = nnext.Substring(familyName.Length, nnext.Length - familyName.Length);
if (nnext.Length <= 2)
{
if (!m_PosBinRule.MatchNameInHead(nnext))
{
name = name + (String)preWords[index + 1];
retWords.Add(name);
return index + 2;
}
}
}
}
retWords.Add(name);
return index + 1;
}
public int ProcRule(ArrayList preWords, int index, ArrayList retWords)
{
int idx = MatchFamilyNameInHead(preWords, index, retWords);
return idx;
/*
if (idx < -1)
{
return -1;
}
if (idx < 0)
{
return MatchFamilyNameInTail(preWords, index, retWords);
}
else
{
return idx;
}
*/
}
#endregion
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -