?? pe.cpp
字號(hào):
// PE.cpp: implementation of the LZW class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "pe.h"
CPatternAlaysis::CPatternAlaysis()
{
m_pDes = NULL;
m_alpDic.NoRepeat = true;
}
CPatternAlaysis::~CPatternAlaysis()
{
if(m_pDes)
delete m_pDes;
}
int AddWordToPattern(BYTE* lps)
{
int nLength = 0;
while(lps[nLength] == ' ')
nLength++;
while(lps[nLength] && lps[nLength] != ' ')
nLength++;
return max(2, nLength);
}
int GetPatternLength(BYTE* lps, int& nPrevLength, int nMinPatternWords = 2)
{
int nLength = 0, nCount = 0;
while(nMinPatternWords--)
{
while(lps[nLength] == ' ')
nLength++;
if(nCount++ == 1)
nPrevLength = nLength;
while(lps[nLength] && lps[nLength] != ' ')
nLength++;
}
return max(2, nLength);
}
//////////////////////////////////////////////////////////////////////
void CPatternAlaysis::ConstructPatterns(BYTE *pSrc, int nSrcLen, LPCSTR lpcsDelimiters /*= NULL*/, int nMinPatternWords /*= 2*/, bool bFixedNGram /*= false*/)
{
// discard initial spaces
while(*pSrc == ' ')
pSrc++, nSrcLen--;
if(m_pDes)
delete m_pDes;
m_pDes = new BYTE[nSrcLen+1];
int nIndex[2] = { 0, 0 }, nDesLen = 0;
if(lpcsDelimiters == NULL)
// copy source buffer
memcpy(m_pDes, pSrc, nIndex[1] = nSrcLen);
else // discard delimiters
while (nIndex[0] < nSrcLen)
{
if(strchr(lpcsDelimiters, pSrc[nIndex[0]]) == NULL)
m_pDes[nIndex[1]++] = pSrc[nIndex[0]];
nIndex[0]++;
}
// discard repeated spaces
nIndex[0] = 0;
while (nIndex[0] < nIndex[1])
{
// discard sequenced spaces
while(m_pDes[nIndex[0]] == ' ' && m_pDes[nIndex[0]+1] == ' ')
nIndex[0]++;
m_pDes[nDesLen++] = m_pDes[nIndex[0]++];
}
m_pDes[nDesLen] = 0;
m_alpDic.RemoveAll();
// tree node to keep last success search to start with
CBinaryTreeNode<CPattern, int>* pNode = m_alpDic.Root;
// left m_alpDic Samples points to the source buffer
int nPrevLength;
CPattern node(m_pDes, GetPatternLength(m_pDes, nPrevLength, nMinPatternWords));
// scan the input buffer
while(node.m_pBuffer < m_pDes+nDesLen)
{
pNode = m_alpDic.Insert(&node, -1, pNode);
pNode->Key.m_nFrequency = pNode->Count;
if(bFixedNGram == false && pNode->Count > 1)
// (repeated pattern), increment node length by a new word length
node.m_nLength += AddWordToPattern(node.m_pBuffer+node.m_nLength);
else
{ // initialize node to next entity
node.m_pBuffer += nPrevLength;
node.m_nLength = GetPatternLength(node.m_pBuffer, nPrevLength, nMinPatternWords);
// initialize binary tree search root
pNode = m_alpDic.Root;
}
}
}
int CPatternAlaysis::GetPatternCount()
{
return m_alpDic.Count;
}
void CPatternAlaysis::GetPatterns(IN int nSortType, IN bool bIgnoreUniquePatterns, OUT vector<CPattern*>& vPatterns)
{
vPatterns.clear();
if(nSortType == 0)
{ // alphabetical
CBinaryTreeNode<CPattern, int>* pAlpNode = m_alpDic.Min(m_alpDic.Root);
while(pAlpNode)
{
if(pAlpNode->Count > 1 || !bIgnoreUniquePatterns) // ignore unique pattern
vPatterns.push_back(&pAlpNode->Key);
pAlpNode = m_alpDic.Successor(pAlpNode);
}
}
else if(nSortType == 1 || nSortType == 2)
{ // frequency - pattern length
CBinaryTree<CValue<int>, int, vector<CPattern*>, vector<CPattern*>* > displayDic;
CBinaryTreeNode<CPattern, int>* pAlpNode = m_alpDic.Min(m_alpDic.Root);
while(pAlpNode != NULL)
{
if(pAlpNode->Count > 1 || !bIgnoreUniquePatterns) // ignore unique pattern
displayDic.Insert(nSortType == 1 ? pAlpNode->Count/*frequency*/ : pAlpNode->Key.m_nLength/*length*/)->Data.push_back(&pAlpNode->Key);
pAlpNode = m_alpDic.Successor(pAlpNode);
}
CBinaryTreeNode<CValue<int>, vector<CPattern*> >* pNode = displayDic.Max(displayDic.Root);
while(pNode)
{
for(vector<CPattern*>::iterator i = pNode->Data.begin(), end = pNode->Data.end(); i != end; i++)
vPatterns.push_back(*i);
pNode = displayDic.Predecessor(pNode);
}
}
}
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -