?? extractwords.cs
字號:
{
m_GameNodes.Add(obj);
}
}
}
deep = 0;
nodes.Clear();
}
}
else
{
nodes.Add(begin);
deep++;
T_WordInfo last = (T_WordInfo)words[begin];
bool nextStep = false;
bool reach = false;
int endPos = last.Position + last.Word.Length - 1;
int oldDeep = deep;
int oldSpace = spaceNum;
for (int i = begin + 1; i <= end; i++)
{
T_WordInfo cur = (T_WordInfo)words[i];
if (endPos < cur.Position + cur.Word.Length - 1)
{
endPos = cur.Position + cur.Word.Length - 1;
}
if (last.Position + last.Word.Length <= cur.Position)
{
nextStep = true;
if (reach)
{
reach = false;
spaceNum = oldSpace;
deep = oldDeep;
nodes.RemoveAt(nodes.Count - 1);
}
spaceNum += cur.Position - (last.Position + last.Word.Length);
List<int> oneNodes;
oneNodes = GameTree(words, nodes, false, i, end, ref spaceNum, ref deep);
if (oneNodes != null)
{
bool select = false;
if (m_MinSpace > spaceNum ||
(m_MinSpace == spaceNum && deep < m_MinDeep))
{
select = true;
}
else if (m_MinDeep == deep && m_MinSpace == spaceNum)
{
if (m_CompareByPos != null && m_MinSpace == 0)
{
select = m_CompareByPos(words, m_GameNodes, oneNodes);
}
else
{
select = CompareGroup(words, m_GameNodes, oneNodes, MatchDirection);
}
}
if (select)
{
reach = true;
nextStep = false;
m_MinDeep = deep;
m_MinSpace = spaceNum;
m_GameNodes.Clear();
foreach (int obj in oneNodes)
{
m_GameNodes.Add(obj);
}
}
else
{
spaceNum = oldSpace;
deep = oldDeep;
nodes.RemoveRange(deep, nodes.Count - deep);
}
}
else
{
spaceNum = oldSpace;
deep = oldDeep;
nodes.RemoveRange(deep , nodes.Count - deep);
}
}
}
if (!nextStep)
{
spaceNum += endPos - (last.Position + last.Word.Length-1);
List<int> ret = new List<int>();
foreach (int obj in nodes)
{
ret.Add(obj);
}
return ret;
}
}
return null;
}
/// <summary>
/// 最大匹配提取全文中所有匹配的單詞
/// </summary>
/// <param name="fullText">全文</param>
/// <returns>返回T_WordInfo[]數組,如果沒有找到一個匹配的單詞,返回長度為0的數組</returns>
public List<T_WordInfo> ExtractFullTextMaxMatch(String fullText)
{
List<T_WordInfo> retWords = new List<T_WordInfo>();
List<T_WordInfo> words = ExtractFullText(fullText);
int i = 0;
while (i < words.Count)
{
T_WordInfo wordInfo = (T_WordInfo)words[i];
int j;
int rangeEndPos = 0;
for (j = i; j < words.Count-1; j++)
{
if (j - i > 16)
{
//嵌套太多的情況一般很少發生,如果發生,強行中斷,以免造成博弈樹遍歷層次過多
//降低系統效率
break;
}
if (rangeEndPos < ((T_WordInfo)words[j]).Position + ((T_WordInfo)words[j]).Word.Length -1)
{
rangeEndPos = ((T_WordInfo)words[j]).Position + ((T_WordInfo)words[j]).Word.Length - 1;
}
if (rangeEndPos <
((T_WordInfo)words[j + 1]).Position)
{
break;
}
}
if (j > i)
{
int spaceNum = 0;
int deep = 0;
m_GameNodes = new List<int>();
m_MinDeep = 65535;
m_MinSpace = 65535 * 256;
GameTree(words, new List<int>(), true, i, j, ref spaceNum, ref deep);
foreach (int index in m_GameNodes)
{
T_WordInfo info = (T_WordInfo)words[index];
retWords.Add(info);
}
i = j + 1;
continue;
}
else
{
retWords.Add(wordInfo);
i++;
}
}
return retWords;
}
/// <summary>
/// 提取全文
/// </summary>
/// <param name="fullText">全文</param>
/// <returns>返回T_WordInfo[]數組,如果沒有找到一個匹配的單詞,返回長度為0的數組</returns>
public List<T_WordInfo> ExtractFullText(String fullText)
{
List<T_WordInfo> words = new List<T_WordInfo>();
if (fullText == null || fullText == "")
{
return words;
}
T_DfaUnit cur = null;
bool find = false;
int pos = 0;
int i = 0;
while (i < fullText.Length)
{
cur = m_WordDfa.Next(cur, fullText[i]);
if (cur != null && !find)
{
pos = i;
find = true;
}
if (find)
{
if (cur == null)
{
find = false;
i = pos + 1; //有可能存在包含關系的詞匯,所以需要回溯
continue;
}
else if (cur.QuitWord != null)
{
T_WordInfo wordInfo = new T_WordInfo();
wordInfo.Word = cur.QuitWord;
wordInfo.Position = pos;
wordInfo.Rank = m_WordDfa.GetRank(wordInfo.Word);
wordInfo.Tag = cur.Tag;
words.Add(wordInfo);
if (cur.Childs == null)
{
find = false;
cur = null;
i = pos + 1; //有可能存在包含關系的詞匯,所以需要回溯
continue;
}
}
}
i++;
}
return words;
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -