?? regularexpression.cpp
字號:
if (getWordType(context->fString, context->fStart, context->fLimit, offset) != WT_LETTER || getPreviousWordType(context->fString, context->fStart, context->fLimit, offset) != WT_OTHER) return false; break; case chCloseAngle: if (context->fLength == 0 || offset == context->fStart) return false; if (getWordType(context->fString, context->fStart, context->fLimit, offset) != WT_OTHER || getPreviousWordType(context->fString, context->fStart, context->fLimit, offset) != WT_LETTER) return false; break; } return true;}bool RegularExpression::matchBackReference(Context* const context, const XMLInt32 refNo, int& offset, const short direction, const bool ignoreCase){ if (refNo <=0 || refNo >= fNoGroups) ThrowXMLwithMemMgr(IllegalArgumentException, XMLExcepts::Regex_BadRefNo, fMemoryManager); if (context->fMatch->getStartPos(refNo) < 0 || context->fMatch->getEndPos(refNo) < 0) return false; int start = context->fMatch->getStartPos(refNo); int length = context->fMatch->getEndPos(refNo) - start; int tmpOffset = (direction > 0) ? offset : offset - length; if (context->fLimit - tmpOffset < length) return false; bool match = ignoreCase ? XMLString::regionIMatches(context->fString,tmpOffset, context->fString,start,length) : XMLString::regionMatches(context->fString, tmpOffset, context->fString, start,length); if (!match) return false; offset = (direction > 0) ? offset + length : offset - length; return true;}bool RegularExpression::matchString(Context* const context, const XMLCh* const literal, int& offset, const short direction, const bool ignoreCase){ int length = XMLString::stringLen(literal); int tmpOffset = (direction > 0) ? offset : offset - length; if (context->fLimit - tmpOffset < length) return false; bool match = ignoreCase ? XMLString::regionIMatches(context->fString, tmpOffset, literal, 0, length) : XMLString::regionMatches(context->fString, tmpOffset, literal, 0, length); if (match) { offset = direction > 0 ? offset + length : offset - length; } return match;}int RegularExpression::matchCapture(Context* const context, const Op* const op, int offset, const short direction){ // No check is made for nullness of fMatch as the function is only called if // fMatch is not null. XMLInt32 index = op->getData(); int save = (index > 0) ? context->fMatch->getStartPos(index) : context->fMatch->getEndPos(-index); if (index > 0) { context->fMatch->setStartPos(index, offset); int ret = match(context, op->getNextOp(), offset, direction); if (ret < 0) context->fMatch->setStartPos(index, save); return ret; } context->fMatch->setEndPos(-index, offset); int ret = match(context, op->getNextOp(), offset, direction); if (ret < 0) context->fMatch->setEndPos(-index, save); return ret;}bool RegularExpression::matchCondition(Context* const context, const Op* const op, int offset, const short direction){ int refNo = op->getRefNo(); if ( refNo > 0) return (context->fMatch->getStartPos(refNo) >= 0 && context->fMatch->getEndPos(refNo) >= 0); return (0 <= match(context, op->getConditionFlow(), offset, direction));}int RegularExpression::parseOptions(const XMLCh* const options){ if (options == 0) return 0; int opts = 0; int length = XMLString::stringLen(options); for (int i=0; i < length; i++) { int v = getOptionValue(options[i]); if (v == 0) ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Regex_UnknownOption, options, fMemoryManager); opts |= v; } return opts;}void RegularExpression::compile(const Token* const token) { if (fOperations != 0) return; fNoClosures = 0; fOperations = compile(token, 0, false);}Op* RegularExpression::compile(const Token* const token, Op* const next, const bool reverse) { Op* ret = 0; const unsigned short tokenType = token->getTokenType(); switch(tokenType) { case Token::T_DOT: case Token::T_CHAR: case Token::T_ANCHOR: case Token::T_RANGE: case Token::T_NRANGE: case Token::T_STRING: case Token::T_BACKREFERENCE: case Token::T_EMPTY: ret = compileSingle(token, next, tokenType); break; case Token::T_CONCAT: ret = compileConcat(token, next, reverse); break; case Token::T_UNION: ret = compileUnion(token, next, reverse); break; case Token::T_CLOSURE: case Token::T_NONGREEDYCLOSURE: ret = compileClosure(token, next, reverse, tokenType); break; case Token::T_PAREN: ret = compileParenthesis(token, next, reverse); break; case Token::T_LOOKAHEAD: case Token::T_NEGATIVELOOKAHEAD: ret = compileLook(token, next, false, tokenType); break; case Token::T_LOOKBEHIND: case Token::T_NEGATIVELOOKBEHIND: ret = compileLook(token, next, true, tokenType); break; case Token::T_INDEPENDENT: case Token::T_MODIFIERGROUP: ret = compileLook(token, next, reverse, tokenType); break; case Token::T_CONDITION: ret = compileCondition(token, next, reverse); break; default: ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_UnknownTokenType, fMemoryManager); break; // this line to be deleted } return ret;}/* * Helper for Replace. This method prepares the replacement string by substituting * in actual values for parenthesized sub expressions. * * An error will be thrown if: * 1) repString references an undefined subExpression * 2) there is an unescaped chDollar which is not followed by a digit * */const XMLCh* RegularExpression::subInExp(const XMLCh* const repString, const XMLCh* const origString, const Match* subEx){ int numSubExp = subEx->getNoGroups() - 1; if (numSubExp == 0) return XMLString::replicate(repString, fMemoryManager); bool notEscaped = true; XMLBuffer newString(1023, fMemoryManager); XMLCh indexStr[2]; //holds the string rep of a indexStr[1] = chNull; int index = -1; for (const XMLCh* ptr = repString; *ptr != chNull; ptr++){ if ((*ptr == chDollarSign) && notEscaped) { ptr++; //check that after the $ is a digit if (!XMLString::isDigit(*ptr)){ //invalid replace string - $ must be followed by a digit ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_InvalidRepPattern, fMemoryManager); } indexStr[0] = *ptr; //get the digit index = XMLString::parseInt(indexStr, fMemoryManager); //convert it to an int //now check that the index is legal if (index > numSubExp){ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_InvalidRepPattern, fMemoryManager); } int start = subEx->getStartPos(index); int end = subEx->getEndPos(index); //now copy the substring into the new string for (int i=start; i<end; i++){ newString.append(origString[i]); } } else { //if you have a slash and then a character that's not a $ or /, //then it's an invalid replace string if (!notEscaped && (*ptr != chDollarSign && *ptr != chBackSlash)){ ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_InvalidRepPattern, fMemoryManager); } if (*ptr == chBackSlash){ notEscaped = false; continue; }else notEscaped = true; newString.append(*ptr); } } return XMLString::replicate(newString.getRawBuffer(), fMemoryManager); }/* * Prepares for matching. This method is called just before starting matching */void RegularExpression::prepare() { XMLMutexLock lockInit(&fMutex); compile(fTokenTree); fMinLength = fTokenTree->getMinLength(); fFirstChar = 0; if (!isSet(fOptions, PROHIBIT_HEAD_CHARACTER_OPTIMIZATION) && !isSet(fOptions, XMLSCHEMA_MODE)) { RangeToken* rangeTok = fTokenFactory->createRange(); int result = fTokenTree->analyzeFirstCharacter(rangeTok, fOptions, fTokenFactory); if (result == Token::FC_TERMINAL) { rangeTok->compactRanges(); fFirstChar = rangeTok; } } if (fOperations != 0 && fOperations->getNextOp() == 0 && (fOperations->getOpType() == Op::O_STRING || fOperations->getOpType() == Op::O_CHAR) ) { fFixedStringOnly = true; if (fOperations->getOpType() == Op::O_STRING) { fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = XMLString::replicate(fOperations->getLiteral(), fMemoryManager); } else{ XMLInt32 ch = fOperations->getData(); if ( ch >= 0x10000) { // add as constant fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = RegxUtil::decomposeToSurrogates(ch, fMemoryManager); } else { XMLCh* dummyStr = (XMLCh*) fMemoryManager->allocate(2 * sizeof(XMLCh));//new XMLCh[2]; dummyStr[0] = (XMLCh) fOperations->getData(); dummyStr[1] = chNull; fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = dummyStr; } } fBMPattern = new (fMemoryManager) BMPattern(fFixedString, 256, isSet(fOptions, IGNORE_CASE), fMemoryManager); } else if (!isSet(fOptions, XMLSCHEMA_MODE) && !isSet(fOptions, PROHIBIT_FIXED_STRING_OPTIMIZATION)) { int fixedOpts = 0; Token* tok = fTokenTree->findFixedString(fOptions, fixedOpts); fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = (tok == 0) ? 0 : XMLString::replicate(tok->getString(), fMemoryManager); if (fFixedString != 0 && XMLString::stringLen(fFixedString) < 2) { fMemoryManager->deallocate(fFixedString);//delete [] fFixedString; fFixedString = 0; } if (fFixedString != 0) { fBMPattern = new (fMemoryManager) BMPattern(fFixedString, 256, isSet(fixedOpts, IGNORE_CASE)); } }}unsigned short RegularExpression::getCharType(const XMLCh ch) { if (!isSet(fOptions, UNICODE_WORD_BOUNDARY)) { if (isSet(fOptions, USE_UNICODE_CATEGORY)) { if (fWordRange == 0) { fWordRange = fTokenFactory->getRange(fgUniIsWord); if (fWordRange == 0) ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Regex_RangeTokenGetError, fgUniIsWord, fMemoryManager); } return fWordRange->match(ch) ? WT_LETTER : WT_OTHER; } return RegxUtil::isWordChar(ch); } switch (XMLUniCharacter::getType(ch)) { case XMLUniCharacter::UPPERCASE_LETTER: case XMLUniCharacter::LOWERCASE_LETTER: case XMLUniCharacter::TITLECASE_LETTER: case XMLUniCharacter::MODIFIER_LETTER: case XMLUniCharacter::OTHER_LETTER: case XMLUniCharacter::LETTER_NUMBER: case XMLUniCharacter::DECIMAL_DIGIT_NUMBER: case XMLUniCharacter::OTHER_NUMBER: case XMLUniCharacter::COMBINING_SPACING_MARK: return WT_LETTER; case XMLUniCharacter::FORMAT: case XMLUniCharacter::NON_SPACING_MARK: case XMLUniCharacter::ENCLOSING_MARK: return WT_IGNORE; case XMLUniCharacter::CONTROL: switch (ch) { case chHTab: case chLF: case chVTab: case chFF: case chCR: return WT_OTHER; default: return WT_IGNORE; } } return WT_OTHER;}XERCES_CPP_NAMESPACE_END/** * End of file RegularExpression.cpp */
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -