?? ftwbrk.cpp
字號:
for (i = 1; i < rgWL[iWL-1]; i++)
{
w = GetWeight(vmOurDict,pchFst,pchFst+i,pchLim,0);
if (w + i > wMaxSum)
{
rgWL[0] = i;
iMax = 0;
break;
}
}
}
else
for (i = iWL-1; i>=0;i--)
{
w = GetWeight(vmOurDict,pchFst,pchFst+rgWL[i],pchLim,0);
//
// Store max weight
//
if ((w + rgWL[i] > wMaxSum) ||
((w + rgWL[i] == wMaxSum) && (max(w,rgWL[i]) <= wMaxPri)))
{
//
// if the weight is equal, choose the pair that have same len
//
wMaxSum = max(w,1) + rgWL[i];
wMaxPri = max(w,rgWL[i]);
wMaxSnd = wMaxSum - wMaxPri;
iMax = i;
}
}
iLoop = iMax;
cch = rgWL[iLoop];
}
#else
if (iWL)
{
unsigned char *pchWordWalk;
BOOL fTryLastChance = false;
INT cchLastChance = 0;
//
// Select the properly length.
// (longest and can break next word)
//
fCanBreakNext = false;
for(iLoop = iWL-1; (iLoop >= 0 || fTryLastChance) && (!fCanBreakNext); iLoop--)
{
cch = 0;
if (fTryLastChance)
{
iLoop = 0; // iLoop must be 0 while we're in LastChance Loop
//
// No more chance?
//
if (cchLastChance == 0)
break;
//
// Go ahead!
//
pchWordWalk = pchFst + cchLastChance;
}
else
{
pchWordWalk = pchFst + rgWL[iLoop];
}
pchDictWalk = vmOurDict;
//pchWordWalkSav = NULL;
if (!(*pchWordWalk))
{
fCanBreakNext = true;
break;
}
fCharInfoNotMatch = false;
while ((*pchWordWalk) && (!fCanBreakNext) && (!fCharInfoNotMatch))
{
//
// Can not break next word...
//
if (*pchDictWalk & ENDCHILD)
{
//
// Without this it may be forever loop also.
//
if (fTryLastChance && cchLastChance > 0)
cchLastChance--;
break;
}
//
// get next TCB node's address.
//
cbDiff.b.b2 = ((*(pchDictWalk++)) & 0x3F);
cbDiff.b.b1 = *(pchDictWalk++);
cbDiff.b.b0 = *(pchDictWalk++);
#ifdef _DICTIONARY20_
//
// Not only 1 char?
//
if (((nSubStrLen = vmOurDict[cbDiff.l]) < MAX_DICT_CTRL_CHAR) &&
(vmOurDict[cbDiff.l+1] == *pchWordWalk))
{
++cbDiff.l;
for (iSubLoop = 0; iSubLoop < nSubStrLen; iSubLoop++)
{
if (vmOurDict[cbDiff.l++] != *pchWordWalk) // word not match
{
fCharInfoNotMatch = true;
if (fTryLastChance && cchLastChance > 0)
--cchLastChance;
break;
}
else
{
cch++;
pchWordWalk++;
if (vmOurDict[cbDiff.l] < MAX_DICT_CTRL_CHAR)
{
if (vmOurDict[cbDiff.l] & SUBSTRENDWORD)
{
if (fTryLastChance)
{
if (rgWL[iWL-1] < cch || !*pchWordWalk)
{
//
// Ok!!!! We got it!!!
// Change the first item of WordLength
// and assume it's correct break position
//
rgWL[0] = cchLastChance;
fCanBreakNext = true;
}
}
else
fCanBreakNext = true;
}
if (vmOurDict[cbDiff.l] & SUBSTRHAVECHILD) // this means end of substr
{ // next 3 byte will be 'Address'
pchDictWalk = vmOurDict + cbDiff.l + 1; // of next node.
break; // goto LFTWBLoop1;
}
cbDiff.l++;
}
}
}
}
else
#endif //_DICTIONARY20_
//
// Compare char
//
if ((chDict = vmOurDict[cbDiff.l]) < *pchWordWalk)
{
}
else if (chDict == *pchWordWalk)
{
++cch;
++pchWordWalk;
//
// Can break word ?
//
if (vmOurDict[cbDiff.l+1] & ENDWORD)
{
if (fTryLastChance)
{
if (rgWL[iWL-1] < cch || !*pchWordWalk)
{
//
// Ok!!!! We got it!!!
// Change the first item of WordLength
// and assume it's correct break position
//
rgWL[0] = cchLastChance;
fCanBreakNext = true;
}
}
else
fCanBreakNext = true;
}
//
// Move to first child node.
//
pchDictWalk = vmOurDict + cbDiff.l + 1;
if (!*pchWordWalk && fTryLastChance && (cchLastChance > 0))
cchLastChance--;
}
else // cbDict > *pchWord
{
//
// If this is last chance to break the word that can also
// match the next word. (iLoop = 0)
// Than, Can we this card the first word for the next word that longer?
// (If it is, ofcause) :)
//
if (iLoop == 0)
{
if (!fTryLastChance)
{
fTryLastChance = true;
cchLastChance = rgWL[0]-1;
}
else
if (cchLastChance > 0)
{
cchLastChance--;
}
else
{
// we tried.
fTryLastChance = false;
}
}
fCharInfoNotMatch = true;
break;
}
}//while
if (fCanBreakNext)
break; // exit for loop;
}
//
// Cannot break next word, then select longest length.
//
if (!fCanBreakNext)
iLoop = iWL-1;
LCanBreakNext:
cch = rgWL[iLoop];
}
#endif // dontuseweight
else
{
cch = 1;
}
//
// Store wordbreak position;
//
if (cch == 1)
{
if (HaveType(*pchFst,XT_THA) &&
((pchFst + 1) < pchLim) &&
(*(pchFst+1) != KARAN))
{
pchWordWalk = pchFst+1;
while ((pchWordWalk < pchLim) && HaveType(*pchWordWalk,XT_ZWIDTH|XT_FVOW))
{
++pchWordWalk;
++cch;
}
}
else if((*pchFst == 0x0D) &&
((pchFst + 1) < pchLim) &&
(*(pchFst+1) == 0x0A))
{
cch = 2;
}
}
LStoreWordLen:
if (((unsigned long)pchFst > (unsigned long)szText) && /* not a first char */
fCanCat(*(pchFst-1),*pchFst,uFlags))
{
if (iBrk && rgbBrk[iBrk] == 0)
rgbBrk[iBrk-1] += cch;
else
rgbBrk[iBrk] += cch;
}
else
if (
((unsigned long)pchFst > (unsigned long)szText) && /* not the first char */
(XCharType(*(pchFst-1)) & XCharType(*pchFst)) && /* type can mearge */
(
(
(cch == 1) &&
(!(HaveType(*pchFst,XT_WRDBEG))) &&
(!(HaveType(*pchFst,XT_LVOW) && ((pchFst + 1) < pchLim) && HaveType(*(pchFst+1),XT_THA)))
)
||
(
(rgbBrk[iBrk] == 1) ||
(iBrk && (rgbBrk[iBrk] == 0) && (rgbBrk[iBrk-1] == 1))
)
)
)
{
if (rgbBrk[iBrk])
rgbBrk[iBrk] += cch;
else
rgbBrk[iBrk-1] += cch;
}
else
{
if (rgbBrk[iBrk])
++iBrk;
rgbBrk[iBrk] = cch;
}
pchFst += cch;
pchWordWalk = pchFst;
if (iBrk == nMaxBrk)
{
rgbBrk[iBrk] = 0; // there are no room to store word break
break;
}
}
if (rgbBrk[iBrk]) // length of last word in given string
iBrk++;
rgbBrk[iBrk] = 0; // close array
return iBrk;
}
int ThaiBreakIterator::GetWeight(unsigned char* vmOurDict, unsigned char* szText,unsigned char* pchFst,unsigned char* pchLim, unsigned long dwFlags)
{
unsigned char *pchWordWalk;
unsigned char *pchDictWalk;
bool fTryLastChance = false, fCharInfoNotMatch = false;
int cchLastChance = 0;
DIFFINDEX cbDiff;
unsigned char chDict;
int weight = 0;
int nSubStrLen;
int cch = 0;
//
// End of string
//
if (!(*pchFst)) return 1000; /* maximum weight */
//
// Initialize
//
pchWordWalk = pchFst;
pchDictWalk = vmOurDict;
cbDiff.l = 0;
// pchWordWalkSav = NULL;
//
// Loop thru end of string or get the longest len
//
while ( (*pchWordWalk) &&
(pchWordWalk < pchLim) &&
(!fCharInfoNotMatch))
{
*pchWordWalk;
//
// Can not break next word...
//
if (*pchDictWalk & ENDCHILD) /* */
{
weight = cch;
break;
}
//
// get next node's address.
//
cbDiff.b.b2 = ((*(pchDictWalk++)) & 0x3F);
cbDiff.b.b1 = *(pchDictWalk++);
cbDiff.b.b0 = *(pchDictWalk++);
//
// Not only 1 char?
//
if (((nSubStrLen = vmOurDict[cbDiff.l]) < MAX_DICT_CTRL_CHAR) &&
(vmOurDict[cbDiff.l+1] == *pchWordWalk))
{
int iSubLoop;
++cbDiff.l;
for (iSubLoop = 0; iSubLoop < nSubStrLen; iSubLoop++)
{
if (vmOurDict[cbDiff.l++] != *pchWordWalk) // word not match
{
fCharInfoNotMatch = true;
break;
}
else
{
cch++;
pchWordWalk++;
if (vmOurDict[cbDiff.l] < MAX_DICT_CTRL_CHAR)
{
if (vmOurDict[cbDiff.l] & SUBSTRENDWORD)
weight = cch;
if (vmOurDict[cbDiff.l] & SUBSTRHAVECHILD) // this means end of substr
{ // next 3 byte will be 'Address'
pchDictWalk = vmOurDict + cbDiff.l + 1; // of next node.
break; // goto LFTWBLoop1;
}
cbDiff.l++;
}
}
}
}
else
//
// Compare char
//
if ((chDict = vmOurDict[cbDiff.l]) < *pchWordWalk)
{
}
else if (chDict == *pchWordWalk)
{
++cch;
++pchWordWalk;
//
// Can break word ?
//
if (vmOurDict[cbDiff.l+1] & ENDWORD)
weight = cch;
//
// Move to first child node.
//
pchDictWalk = vmOurDict + cbDiff.l + 1;
}
else // cbDict > *pchWord
{
fCharInfoNotMatch = true;
break;
}
}//while
return (weight);
}
bool ThaiBreakIterator::fCanCat(unsigned char chPrev, unsigned char ch, unsigned int uFlags)
{
//
// If the prev char is a break char, that's it!
// It's break pos, cannot cat any char to it.
// (except breakchar)
//
/*if (FBreakChar(chPrev))
return FBreakChar(ch); */
//
// Non stop?
//
if (FNonbreakChar(chPrev))
return true;
//
// Mode independence case
//
if (HaveType(chPrev,XT_THA) &&
HaveType(ch,XT_FVOW | XT_ZWIDTH))
return true;
if (ch == ' ')
return true;
else if (chPrev == ' ')
return false;
// N_Y_I
//
// Look at the mode or option of wordbreak proc
//
// SeparateSymbol :: mean symbol can't cat to anytype of
// char except symbol type. Normally use this option for
// caret movement.
//
if (uFlags & FTWB_SEPARATESYMBOL)
{
// Not symbol both
if ((HaveType(chPrev,XT_ALLSYMBOL)?1:0) != (HaveType(ch,XT_ALLSYMBOL)?1:0))
return false;
return (
(HaveType(chPrev,XT_LVOW) && HaveType(ch,XT_THA)) ||
(HaveType(chPrev,XT_ALLSYMBOL) && HaveType(ch,XT_ALLSYMBOL)) ||
(HaveType(chPrev,XT_ENG) && HaveType(ch,XT_ENG))
);
}
//
// WordWrap :: use this option only for word wrapping. Such as when
// user resize the windows.
//
else // if (uFlag & FTWB_WORDWRAP)
{
if (HaveType(ch,XT_WRDBEG) && !HaveType(chPrev,XT_WRDBEG))
return false;
return (
HaveType(ch,XT_WRDEND | XT_SNTEND) ||
HaveType(ch,XT_SNTEND) ||
HaveType(chPrev,XT_WRDBEG) ||
(HaveType(chPrev,XT_LVOW) && HaveType(ch,XT_THA)) ||
(HaveType(chPrev,XT_PUNCT) && HaveType(ch,XT_ENG|XT_DIGIT)) ||
HaveType(ch,XT_PUNCT)
);
}
//
// default
//
return false;
}
bool ThaiBreakIterator::HaveType(char ch, unsigned long IsType)
{
#ifdef _DEBUG
unsigned long uType = m_rgThaiCharTypeTable[(unsigned char)ch];
bool bReturn = ((uType & IsType)?true:false);
return bReturn;
#else
return m_rgThaiCharTypeTable[(unsigned char)ch] & IsType;
#endif
}
unsigned long ThaiBreakIterator::XCharType(char ch)
{
#ifdef _DEBUG
unsigned long nReturn = m_rgThaiCharTypeTable[(unsigned char)ch];
return nReturn;
#else
return m_rgThaiCharTypeTable[(unsigned char)ch];
#endif
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -