?? pcre_exec.c
字號(hào):
/************************************************** Perl-Compatible Regular Expressions **************************************************//* PCRE is a library of functions to support regular expressions whose syntaxand semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Copyright (c) 1997-2007 University of Cambridge-----------------------------------------------------------------------------Redistribution and use in source and binary forms, with or withoutmodification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Cambridge nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THEIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSEARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BELIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, ORCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OFSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESSINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER INCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THEPOSSIBILITY OF SUCH DAMAGE.-----------------------------------------------------------------------------*//* This module contains pcre_exec(), the externally visible function that doespattern matching using an NFA algorithm, trying to mimic Perl as closely aspossible. There are also some static supporting functions. */#ifdef HAVE_CONFIG_H#include "config.h"#endif#define NLBLOCK md /* Block containing newline information */#define PSSTART start_subject /* Field containing processed string start */#define PSEND end_subject /* Field containing processed string end */#include "pcre_internal.h"/* Undefine some potentially clashing cpp symbols */#undef min#undef max/* Flag bits for the match() function */#define match_condassert 0x01 /* Called to check a condition assertion */#define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group *//* Non-error returns from the match() function. Error returns are externallydefined PCRE_ERROR_xxx codes, which are all negative. */#define MATCH_MATCH 1#define MATCH_NOMATCH 0/* Special internal returns from the match() function. Make them sufficientlynegative to avoid the external error codes. */#define MATCH_COMMIT (-999)#define MATCH_PRUNE (-998)#define MATCH_SKIP (-997)#define MATCH_THEN (-996)/* Maximum number of ints of offset to save on the stack for recursive calls.If the offset vector is bigger, malloc is used. This should be a multiple of 3,because the offset vector is always a multiple of 3 long. */#define REC_STACK_SAVE_MAX 30/* Min and max values for the common repeats; for the maxima, 0 => infinity */static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };#ifdef DEBUG/************************************************** Debugging function to print chars **************************************************//* Print a sequence of chars in printable format, stopping at the end of thesubject if the requested.Arguments: p points to characters length number to print is_subject TRUE if printing from within md->start_subject md pointer to matching data block, if is_subject is TRUEReturns: nothing*/static voidpchars(const uschar *p, int length, BOOL is_subject, match_data *md){unsigned int c;if (is_subject && length > md->end_subject - p) length = md->end_subject - p;while (length-- > 0) if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);}#endif/************************************************** Match a back-reference **************************************************//* If a back reference hasn't been set, the length that is passed is greaterthan the number of characters left in the string, so the match fails.Arguments: offset index into the offset vector eptr points into the subject length length to be matched md points to match data block ims the ims flagsReturns: TRUE if matched*/static BOOLmatch_ref(int offset, register USPTR eptr, int length, match_data *md, unsigned long int ims){USPTR p = md->start_subject + md->offset_vector[offset];#ifdef DEBUGif (eptr >= md->end_subject) printf("matching subject <null>");else { printf("matching subject "); pchars(eptr, length, TRUE, md); }printf(" against backref ");pchars(p, length, FALSE, md);printf("\n");#endif/* Always fail if not enough characters left */if (length > md->end_subject - eptr) return FALSE;/* Separate the caselesss case for speed */if ((ims & PCRE_CASELESS) != 0) { while (length-- > 0) if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }else { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }return TRUE;}/******************************************************************************************************************************************************* RECURSION IN THE match() FUNCTIONThe match() function is highly recursive, though not every recursive callincreases the recursive depth. Nevertheless, some regular expressions can causeit to recurse to a great depth. I was writing for Unix, so I just let it callitself recursively. This uses the stack for saving everything that has to besaved for a recursive call. On Unix, the stack can be large, and this worksfine.It turns out that on some non-Unix-like systems there are problems withprograms that use a lot of stack. (This despite the fact that every last chiphas oodles of memory these days, and techniques for extending the stack havebeen known for decades.) So....There is a fudge, triggered by defining NO_RECURSE, which avoids recursivecalls by keeping local variables that need to be preserved in blocks of memoryobtained from malloc() instead instead of on the stack. Macros are used toachieve this so that the actual code doesn't look very different to what italways used to.The original heap-recursive code used longjmp(). However, it seems that thiscan be very slow on some operating systems. Following a suggestion from StanSwitzer, the use of longjmp() has been abolished, at the cost of having toprovide a unique number for each call to RMATCH. There is no way of generatinga sequence of numbers at compile time in C. I have given them names, to makethem stand out more clearly.Crude tests on x86 Linux show a small speedup of around 5-8%. However, onFreeBSD, avoiding longjmp() more than halves the time taken to run the standardtests. Furthermore, not using longjmp() means that local dynamic variablesdon't have indeterminate values; this has meant that the frame size can bereduced because the result can be "passed back" by straight setting of thevariable instead of being passed in the frame.*******************************************************************************************************************************************************//* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURNbelow must be updated in sync. */enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, RM51, RM52, RM53, RM54 };/* These versions of the macros use the stack, as normal. There are debuggingversions and production versions. Note that the "rw" argument of RMATCH isn'tactuall used in this definition. */#ifndef NO_RECURSE#define REGISTER register#ifdef DEBUG#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \ { \ printf("match() called in line %d\n", __LINE__); \ rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \ printf("to line %d\n", __LINE__); \ }#define RRETURN(ra) \ { \ printf("match() returned %d from line %d ", ra, __LINE__); \ return ra; \ }#else#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \ rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)#define RRETURN(ra) return ra#endif#else/* These versions of the macros manage a private stack on the heap. Note thatthe "rd" argument of RMATCH isn't actually used in this definition. It's the mdargument of match(), which never changes. */#define REGISTER#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\ {\ heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\ frame->Xwhere = rw; \ newframe->Xeptr = ra;\ newframe->Xecode = rb;\ newframe->Xmstart = mstart;\ newframe->Xoffset_top = rc;\ newframe->Xims = re;\ newframe->Xeptrb = rf;\ newframe->Xflags = rg;\ newframe->Xrdepth = frame->Xrdepth + 1;\ newframe->Xprevframe = frame;\ frame = newframe;\ DPRINTF(("restarting from line %d\n", __LINE__));\ goto HEAP_RECURSE;\ L_##rw:\ DPRINTF(("jumped back to line %d\n", __LINE__));\ }#define RRETURN(ra)\ {\ heapframe *newframe = frame;\ frame = newframe->Xprevframe;\ (pcre_stack_free)(newframe);\ if (frame != NULL)\ {\ rrc = ra;\ goto HEAP_RETURN;\ }\ return ra;\ }/* Structure for remembering the local variables in a private frame */typedef struct heapframe { struct heapframe *Xprevframe; /* Function arguments that may change */ const uschar *Xeptr; const uschar *Xecode; const uschar *Xmstart; int Xoffset_top; long int Xims; eptrblock *Xeptrb; int Xflags; unsigned int Xrdepth; /* Function local variables */ const uschar *Xcallpat; const uschar *Xcharptr; const uschar *Xdata; const uschar *Xnext; const uschar *Xpp; const uschar *Xprev; const uschar *Xsaved_eptr; recursion_info Xnew_recursive; BOOL Xcur_is_word; BOOL Xcondition; BOOL Xprev_is_word; unsigned long int Xoriginal_ims;#ifdef SUPPORT_UCP int Xprop_type; int Xprop_value; int Xprop_fail_result; int Xprop_category; int Xprop_chartype; int Xprop_script; int Xoclength; uschar Xocchars[8];#endif int Xctype; unsigned int Xfc; int Xfi; int Xlength; int Xmax; int Xmin; int Xnumber; int Xoffset; int Xop; int Xsave_capture_last; int Xsave_offset1, Xsave_offset2, Xsave_offset3; int Xstacksave[REC_STACK_SAVE_MAX]; eptrblock Xnewptrb; /* Where to jump back to */ int Xwhere;} heapframe;#endif/******************************************************************************************************************************************************//************************************************** Match from current position **************************************************//* This function is called recursively in many circumstances. Whenever itreturns a negative (error) response, the outer incarnation must also return thesame response.Performance note: It might be tempting to extract commonly used fields from themd structure (e.g. utf8, end_subject) into individual variables to improveperformance. Tests using gcc on a SPARC disproved this; in the first case, itmade performance worse.Arguments: eptr pointer to current character in subject ecode pointer to current position in compiled code mstart pointer to the current match start position (can be modified by encountering \K) offset_top current top pointer md pointer to "static" info for the match ims current /i, /m, and /s options eptrb pointer to chain of blocks containing eptr at start of brackets - for testing for empty matches flags can contain match_condassert - this is an assertion condition match_cbegroup - this is the start of an unlimited repeat group that can match an empty string rdepth the recursion depthReturns: MATCH_MATCH if matched ) these values are >= 0 MATCH_NOMATCH if failed to match ) a negative PCRE_ERROR_xxx value if aborted by an error condition (e.g. stopped by repeated call or recursion limit)*/static intmatch(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart, int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, int flags, unsigned int rdepth){/* These variables do not need to be preserved over recursion in this function,so they can be ordinary variables in all cases. Mark some of them with"register" because they are used a lot in loops. */register int rrc; /* Returns from recursive calls */register int i; /* Used for loops not involving calls to RMATCH() */
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -