?? regularexpression.cpp
字號:
// ---------------------------------------------------------------------------XMLCh* RegularExpression::replace(const XMLCh* const matchString, const XMLCh* const replaceString){ return replace(matchString, replaceString, 0, XMLString::stringLen(matchString));}XMLCh* RegularExpression::replace(const XMLCh* const matchString, const XMLCh* const replaceString, const int start, const int end){ //check if matches zero length string - throw error if so if (matches(XMLUni::fgZeroLenString, fMemoryManager)){ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_RepPatMatchesZeroString, fMemoryManager); } RefVectorOf<Match> *subEx = new (fMemoryManager) RefVectorOf<Match>(10, true, fMemoryManager); Janitor<RefVectorOf<Match> > janSubEx(subEx); //Call to tokenize with Match vector so that we keep track of the locations //of the subExpression within each of the matches RefArrayVectorOf<XMLCh>* tokenStack = tokenize(matchString, start, end, subEx); Janitor<RefArrayVectorOf<XMLCh> > janTokStack(tokenStack); XMLBuffer result(1023, fMemoryManager); int numSubEx = 0; if (subEx && subEx->size() > 0) numSubEx = subEx->elementAt(0)->getNoGroups() - 1; int tokStackSize = tokenStack->size(); const XMLCh* curRepString = XMLString::replicate(replaceString, fMemoryManager); for (int i = 0; i < tokStackSize; i++){ result.append(tokenStack->elementAt(i)); if (i != tokStackSize - 1) { //if there are subExpressions, then determine the string we want to //substitute in. if (numSubEx != 0) { fMemoryManager->deallocate((XMLCh*)curRepString); curRepString = subInExp(replaceString, matchString, subEx->elementAt(i)); } result.append(curRepString); } } fMemoryManager->deallocate((XMLCh*)curRepString); return XMLString::replicate(result.getRawBuffer(), fMemoryManager); }// ---------------------------------------------------------------------------// RegularExpression: Helpers methods// ---------------------------------------------------------------------------int RegularExpression::getOptionValue(const XMLCh ch) { int ret = 0; switch (ch) { case chLatin_i: ret = IGNORE_CASE; break; case chLatin_m: ret = MULTIPLE_LINE; break; case chLatin_s: ret = SINGLE_LINE; break; case chLatin_x: ret = EXTENDED_COMMENT; break; case chLatin_u: ret = USE_UNICODE_CATEGORY; break; case chLatin_w: ret = UNICODE_WORD_BOUNDARY; break; case chLatin_F: ret = PROHIBIT_FIXED_STRING_OPTIMIZATION; break; case chLatin_H: ret = PROHIBIT_HEAD_CHARACTER_OPTIMIZATION; break; case chLatin_X: ret = XMLSCHEMA_MODE; break; case chComma: ret = SPECIAL_COMMA; break; default: break; } return ret;}int RegularExpression::match(Context* const context, const Op* const operations , int offset, const short direction){ const Op* tmpOp = operations; bool ignoreCase = isSet(fOptions, IGNORE_CASE); while (true) { if (tmpOp == 0) break; if (offset > context->fLimit || offset < context->fStart) return -1; switch(tmpOp->getOpType()) { case Op::O_CHAR: if (!matchChar(context, tmpOp->getData(), offset, direction, ignoreCase)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_DOT: if (!matchDot(context, offset, direction)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_RANGE: case Op::O_NRANGE: if (!matchRange(context, tmpOp, offset, direction, ignoreCase)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_ANCHOR: if (!matchAnchor(context, tmpOp->getData(), offset)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_BACKREFERENCE: if (!matchBackReference(context, tmpOp->getData(), offset, direction, ignoreCase)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_STRING: if (!matchString(context, tmpOp->getLiteral(), offset, direction, ignoreCase)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_CLOSURE: { XMLInt32 id = tmpOp->getData(); if (id >= 0) { int prevOffset = context->fOffsets[id]; if (prevOffset < 0 || prevOffset != offset) { context->fOffsets[id] = offset; } else { context->fOffsets[id] = -1; tmpOp = tmpOp->getNextOp(); break; } } int ret = match(context, tmpOp->getChild(), offset, direction); if (id >= 0) { context->fOffsets[id] = -1; } if (ret >= 0) return ret; tmpOp = tmpOp->getNextOp(); } break; case Op::O_QUESTION: { int ret = match(context, tmpOp->getChild(), offset, direction); if (ret >= 0) return ret; tmpOp = tmpOp->getNextOp(); } break; case Op::O_NONGREEDYCLOSURE: case Op::O_NONGREEDYQUESTION: { int ret = match(context,tmpOp->getNextOp(),offset,direction); if (ret >= 0) return ret; tmpOp = tmpOp->getChild(); } break; case Op::O_UNION: { return matchUnion(context, tmpOp, offset, direction); } case Op::O_CAPTURE: if (context->fMatch != 0 && tmpOp->getData() != 0) return matchCapture(context, tmpOp, offset, direction); tmpOp = tmpOp->getNextOp(); break; case Op::O_LOOKAHEAD: if (0 > match(context, tmpOp->getChild(), offset, 1)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_NEGATIVELOOKAHEAD: if (0 <= match(context, tmpOp->getChild(), offset, 1)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_LOOKBEHIND: if (0 > match(context, tmpOp->getChild(), offset, -1)) return - 1; tmpOp = tmpOp->getNextOp(); break; case Op::O_NEGATIVELOOKBEHIND: if (0 <= match(context, tmpOp->getChild(), offset, -1)) return -1; tmpOp = tmpOp->getNextOp(); break; case Op::O_INDEPENDENT: case Op::O_MODIFIER: { int ret = (tmpOp->getOpType() == Op::O_INDEPENDENT) ? match(context, tmpOp->getChild(), offset, direction) : matchModifier(context, tmpOp, offset, direction); if (ret < 0) return ret; offset = ret; tmpOp = tmpOp->getNextOp(); } break; case Op::O_CONDITION: if (tmpOp->getRefNo() >= fNoGroups) return -1; if (matchCondition(context, tmpOp, offset, direction)) tmpOp = tmpOp->getYesFlow(); else if (tmpOp->getNoFlow() != 0) tmpOp = tmpOp->getNoFlow(); else tmpOp = tmpOp->getNextOp(); break; } } return offset;}bool RegularExpression::matchChar(Context* const context, const XMLInt32 ch, int& offset, const short direction, const bool ignoreCase){ int tmpOffset = direction > 0 ? offset : offset - 1; if (tmpOffset >= context->fLimit || tmpOffset < 0) return false; XMLInt32 strCh = 0; if (!context->nextCh(strCh, tmpOffset, direction)) return false; bool match = ignoreCase ? matchIgnoreCase(ch, strCh) : (ch == strCh); if (!match) return false; offset = (direction > 0) ? ++tmpOffset : tmpOffset; return true;}bool RegularExpression::matchDot(Context* const context, int& offset, const short direction){ int tmpOffset = direction > 0 ? offset : offset - 1; if (tmpOffset >= context->fLimit || tmpOffset < 0) return false; XMLInt32 strCh = 0; if (!context->nextCh(strCh, tmpOffset, direction)) return false; if (!isSet(fOptions, SINGLE_LINE)) { if (direction > 0 && RegxUtil::isEOLChar(strCh)) return false; if (direction <= 0 && !RegxUtil::isEOLChar(strCh) ) return false; } offset = (direction > 0) ? ++tmpOffset : tmpOffset; return true;}bool RegularExpression::matchRange(Context* const context, const Op* const op, int& offset, const short direction, const bool ignoreCase){ int tmpOffset = direction > 0 ? offset : offset - 1; if (tmpOffset >= context->fLimit || tmpOffset < 0) return false; XMLInt32 strCh = 0; if (!context->nextCh(strCh, tmpOffset, direction)) return false; RangeToken* tok = (RangeToken *) op->getToken(); bool match = false; if (ignoreCase) { //REVISIT we should match ignoring case, but for now //we will do a normal match //tok = tok->getCaseInsensitiveToken(); //if (!token->match(strCh)) { // if (strCh > 0x10000) // return -1; // Do case insensitive matching - uppercase match // or lowercase match //} match = tok->match(strCh); } else match = tok->match(strCh); if (!match) return false; offset = (direction > 0) ? ++tmpOffset : tmpOffset; return true;}bool RegularExpression::matchAnchor(Context* const context, const XMLInt32 ch, const int offset){ switch ((XMLCh) ch) { case chLatin_A: if (offset != context->fStart) return false; break; case chLatin_B: if (context->fLength == 0) break; { int after = getWordType(context->fString, context->fStart, context->fLimit, offset); if (after == WT_IGNORE || after == getPreviousWordType(context->fString, context->fStart, context->fLimit, offset)) break; } return false; case chLatin_b: if (context->fLength == 0) return false; { int after = getWordType(context->fString, context->fStart, context->fLimit, offset); if (after == WT_IGNORE || after == getPreviousWordType(context->fString, context->fStart , context->fLimit, offset)) return false; } break; case chLatin_Z: case chDollarSign: if ( (XMLCh) ch == chDollarSign && isSet(fOptions, MULTIPLE_LINE)) { if (!(offset == context->fLimit || (offset < context->fLimit && RegxUtil::isEOLChar(context->fString[offset])))) return false; } else { if (!(offset == context->fLimit || (offset+1 == context->fLimit && RegxUtil::isEOLChar(context->fString[offset])) || (offset+2 == context->fLimit && context->fString[offset] == chCR && context->fString[offset+1] == chLF))) return false; } break; case chLatin_z: if (offset != context->fLimit) return false; break; case chAt: case chCaret: if ( (XMLCh) ch == chCaret && !isSet(fOptions, MULTIPLE_LINE)) { if (offset != context->fStart) return false; } else { if (!(offset == context->fStart || (offset > context->fStart && RegxUtil::isEOLChar(context->fString[offset-1])))) return false; } break; case chOpenAngle: if (context->fLength == 0 || offset == context->fLimit) return false;
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -