?? regularexpression.cpp
字號:
/* * Copyright 2001-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* * $Log: RegularExpression.cpp,v $ * Revision 1.23 2004/09/08 13:56:47 peiyongz * Apache License Version 2.0 * * Revision 1.22 2004/01/13 20:05:00 peiyongz * revert code back to previous version * * Revision 1.19 2003/12/24 15:24:15 cargilld * More updates to memory management so that the static memory manager. * * Revision 1.18 2003/12/17 05:16:59 neilg * ensure all uses of ArrayJanitor use a memory manager * * Revision 1.17 2003/12/17 00:18:37 cargilld * Update to memory management so that the static memory manager (one used to call Initialize) is only for static data. * * Revision 1.16 2003/12/16 12:25:48 cargilld * Change a conditional expression to an if-else to avoid a compiler problem. * * Revision 1.15 2003/10/01 16:32:40 neilg * improve handling of out of memory conditions, bug #23415. Thanks to David Cargill. * * Revision 1.14 2003/08/14 02:57:27 knoaman * Code refactoring to improve performance of validation. * * Revision 1.13 2003/05/25 21:42:41 knoaman * Allocate/Deallocate Context::xxx only when necessary. * * Revision 1.12 2003/05/18 14:02:06 knoaman * Memory manager implementation: pass per instance manager. * * Revision 1.11 2003/05/16 21:37:00 knoaman * Memory manager implementation: Modify constructors to pass in the memory manager. * * Revision 1.10 2003/05/16 06:01:57 knoaman * Partial implementation of the configurable memory manager. * * Revision 1.9 2003/05/16 00:03:10 knoaman * Partial implementation of the configurable memory manager. * * Revision 1.8 2003/05/15 18:42:54 knoaman * Partial implementation of the configurable memory manager. * * Revision 1.7 2003/05/12 10:08:22 gareth * The correct file this time. * * Revision 1.5 2002/12/18 13:01:02 gareth * New functionality - tokenize and replace. Fixed REVISIT for case insensitive match. Patch by Jennifer Schachter. * * Revision 1.4 2002/11/04 15:17:00 tng * C++ Namespace Support. * * Revision 1.3 2002/10/15 18:56:02 knoaman * [Bug 13604] while loop never terminates. * * Revision 1.2 2002/03/18 19:29:53 knoaman * Change constant names to eliminate possible conflict with user defined ones. * * Revision 1.1.1.1 2002/02/01 22:22:30 peiyongz * sane_include * * Revision 1.6 2002/01/02 20:09:11 knoaman * Fix for regular expression patterns that begin with ".". * * Revision 1.5 2001/10/09 12:20:25 tng * Leak fix: Need to delete fMatch if adopted. * * Revision 1.4 2001/05/11 21:50:58 knoaman * Schema updates and fixes. * * Revision 1.3 2001/05/11 13:26:46 tng * Copyright update. * * Revision 1.2 2001/05/03 18:17:42 knoaman * Some design changes: * o Changed the TokenFactory from a single static instance, to a * normal class. Each RegularExpression object will have its own * instance of TokenFactory, and that instance will be passed to * other classes that need to use a TokenFactory to create Token * objects (with the exception of RangeTokenMap). * o Added a new class RangeTokenMap to map a the different ranges * in a given category to a specific RangeFactory object. In the old * design RangeFactory had dual functionality (act as a Map, and as * a factory for creating RangeToken(s)). The RangeTokenMap will * have its own copy of the TokenFactory. There will be only one * instance of the RangeTokenMap class, and that instance will be * lazily deleted when XPlatformUtils::Terminate is called. * * Revision 1.1 2001/03/02 19:22:52 knoaman * Schema: Regular expression handling part I * */// ---------------------------------------------------------------------------// Includes// ---------------------------------------------------------------------------#include <xercesc/util/regx/RegularExpression.hpp>#include <xercesc/util/PlatformUtils.hpp>#include <xercesc/util/regx/RegxUtil.hpp>#include <xercesc/util/regx/Match.hpp>#include <xercesc/util/regx/RangeToken.hpp>#include <xercesc/util/regx/RegxDefs.hpp>#include <xercesc/util/regx/XMLUniCharacter.hpp>#include <xercesc/util/regx/ParserForXMLSchema.hpp>#include <xercesc/util/Janitor.hpp>#include <xercesc/util/ParseException.hpp>#include <xercesc/util/IllegalArgumentException.hpp>#include <xercesc/framework/XMLBuffer.hpp>#include <xercesc/util/OutOfMemoryException.hpp>XERCES_CPP_NAMESPACE_BEGIN// ---------------------------------------------------------------------------// Static member data initialization// ---------------------------------------------------------------------------const unsigned int RegularExpression::MARK_PARENS = 1;const unsigned int RegularExpression::IGNORE_CASE = 2;const unsigned int RegularExpression::SINGLE_LINE = 4;const unsigned int RegularExpression::MULTIPLE_LINE = 8;const unsigned int RegularExpression::EXTENDED_COMMENT = 16;const unsigned int RegularExpression::USE_UNICODE_CATEGORY = 32;const unsigned int RegularExpression::UNICODE_WORD_BOUNDARY = 64;const unsigned int RegularExpression::PROHIBIT_HEAD_CHARACTER_OPTIMIZATION = 128;const unsigned int RegularExpression::PROHIBIT_FIXED_STRING_OPTIMIZATION = 256;const unsigned int RegularExpression::XMLSCHEMA_MODE = 512;const unsigned int RegularExpression::SPECIAL_COMMA = 1024;const unsigned short RegularExpression::WT_IGNORE = 0;const unsigned short RegularExpression::WT_LETTER = 1;const unsigned short RegularExpression::WT_OTHER = 2;RangeToken* RegularExpression::fWordRange = 0;// ---------------------------------------------------------------------------// RegularExpression::Context: Constructors and Destructor// ---------------------------------------------------------------------------RegularExpression::Context::Context(MemoryManager* const manager) : fAdoptMatch(false) , fStart(0) , fLimit(0) , fLength(0) , fSize(0) , fStringMaxLen(0) , fOffsets(0) , fMatch(0) , fString(0) , fMemoryManager(manager){}RegularExpression::Context::~Context(){ if (fOffsets) fMemoryManager->deallocate(fOffsets);//delete [] fOffsets; fMemoryManager->deallocate(fString);//delete [] fString; if (fAdoptMatch) delete fMatch;}// ---------------------------------------------------------------------------// RegularExpression::Context: Public methods// ---------------------------------------------------------------------------void RegularExpression::Context::reset(const XMLCh* const string , const int stringLen , const int start , const int limit , const int noClosures){ if (stringLen > fStringMaxLen || !fString) { fStringMaxLen = stringLen; if (fString) fMemoryManager->deallocate(fString); fString = XMLString::replicate(string, fMemoryManager); } else { memcpy(fString, string, (stringLen + 1) * sizeof(XMLCh)); } fStart = start; fLimit = limit; fLength = fLimit - fStart; if (fAdoptMatch) delete fMatch; fMatch = 0; if (fSize != noClosures) { if (fOffsets) fMemoryManager->deallocate(fOffsets);//delete [] fOffsets; fOffsets = (int*) fMemoryManager->allocate(noClosures * sizeof(int));//new int[noClosures]; } fSize = noClosures; for (int i = 0; i< fSize; i++) fOffsets[i] = -1;}bool RegularExpression::Context::nextCh(XMLInt32& ch, int& offset, const short direction){ ch = fString[offset]; if (RegxUtil::isHighSurrogate(ch)) { if ((offset + 1 < fLimit) && (direction > 0) && RegxUtil::isLowSurrogate(fString[offset+1])) { ch = RegxUtil::composeFromSurrogate(ch, fString[++offset]); } else return false; } else if (RegxUtil::isLowSurrogate(ch)) { if ((offset - 1 >= 0) && (direction <= 0) && RegxUtil::isHighSurrogate(fString[offset-1])) { ch = RegxUtil::composeFromSurrogate(fString[--offset], ch); } else return false; } return true;}// ---------------------------------------------------------------------------// RegularExpression: Constructors and Destructors// ---------------------------------------------------------------------------RegularExpression::RegularExpression(const char* const pattern, MemoryManager* const manager) :fHasBackReferences(false), fFixedStringOnly(false), fNoGroups(0), fMinLength(0), fNoClosures(0), fOptions(0), fBMPattern(0), fPattern(0), fFixedString(0), fOperations(0), fTokenTree(0), fFirstChar(0), fOpFactory(manager), fTokenFactory(0), fMemoryManager(manager){ try { XMLCh* tmpBuf = XMLString::transcode(pattern, fMemoryManager); ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager); setPattern(tmpBuf); } catch(const OutOfMemoryException&) { throw; } catch (...) { cleanUp(); throw; }}RegularExpression::RegularExpression(const char* const pattern, const char* const options, MemoryManager* const manager) :fHasBackReferences(false), fFixedStringOnly(false), fNoGroups(0), fMinLength(0), fNoClosures(0), fOptions(0), fBMPattern(0), fPattern(0), fFixedString(0), fOperations(0), fTokenTree(0), fFirstChar(0), fOpFactory(manager), fTokenFactory(0), fMemoryManager(manager){ try { XMLCh* tmpBuf = XMLString::transcode(pattern, fMemoryManager); ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager); XMLCh* tmpOptions = XMLString::transcode(options, fMemoryManager); ArrayJanitor<XMLCh> janOps(tmpOptions, fMemoryManager); setPattern(tmpBuf, tmpOptions); } catch(const OutOfMemoryException&) { throw; } catch (...) { cleanUp(); throw; }}RegularExpression::RegularExpression(const XMLCh* const pattern, MemoryManager* const manager) :fHasBackReferences(false), fFixedStringOnly(false), fNoGroups(0), fMinLength(0), fNoClosures(0), fOptions(0), fBMPattern(0), fPattern(0), fFixedString(0), fOperations(0), fTokenTree(0), fFirstChar(0), fOpFactory(manager), fTokenFactory(0), fMemoryManager(manager){ try { setPattern(pattern); } catch(const OutOfMemoryException&) { throw; } catch (...) { cleanUp(); throw; }}RegularExpression::RegularExpression(const XMLCh* const pattern, const XMLCh* const options, MemoryManager* const manager) :fHasBackReferences(false), fFixedStringOnly(false), fNoGroups(0), fMinLength(0), fNoClosures(0), fOptions(0), fBMPattern(0), fPattern(0), fFixedString(0), fOperations(0), fTokenTree(0), fFirstChar(0), fOpFactory(manager), fTokenFactory(0), fMemoryManager(manager){ try { setPattern(pattern, options); } catch(const OutOfMemoryException&) { throw; } catch (...) { cleanUp(); throw; }}RegularExpression::~RegularExpression() { cleanUp();}// ---------------------------------------------------------------------------// RegularExpression: Setter methods// ---------------------------------------------------------------------------void RegularExpression::setPattern(const XMLCh* const pattern, const XMLCh* const options) { fTokenFactory = new (fMemoryManager) TokenFactory(fMemoryManager); fOptions = parseOptions(options); fPattern = XMLString::replicate(pattern, fMemoryManager); // the following construct causes an error in an Intel 7.1 32 bit compiler for // red hat linux 7.2 // (when an exception is thrown the wrong object is deleted) //RegxParser* regxParser = isSet(fOptions, XMLSCHEMA_MODE) // ? new (fMemoryManager) ParserForXMLSchema(fMemoryManager) // : new (fMemoryManager) RegxParser(fMemoryManager); RegxParser* regxParser; if (isSet(fOptions, XMLSCHEMA_MODE)) { regxParser = new (fMemoryManager) ParserForXMLSchema(fMemoryManager); } else { regxParser = new (fMemoryManager) RegxParser(fMemoryManager); } if (regxParser) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -