?? pcre_exec.c
字號(hào):
register unsigned int c; /* Character values not kept over RMATCH() calls */register BOOL utf8; /* Local copy of UTF-8 flag for speed */BOOL minimize, possessive; /* Quantifier options *//* When recursion is not being used, all "local" variables that have to bepreserved over calls to RMATCH() are part of a "frame" which is obtained fromheap storage. Set up the top-level frame here; others are obtained from theheap whenever RMATCH() does a "recursion". See the macro definitions above. */#ifdef NO_RECURSEheapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));frame->Xprevframe = NULL; /* Marks the top level *//* Copy in the original argument variables */frame->Xeptr = eptr;frame->Xecode = ecode;frame->Xmstart = mstart;frame->Xoffset_top = offset_top;frame->Xims = ims;frame->Xeptrb = eptrb;frame->Xflags = flags;frame->Xrdepth = rdepth;/* This is where control jumps back to to effect "recursion" */HEAP_RECURSE:/* Macros make the argument variables come from the current frame */#define eptr frame->Xeptr#define ecode frame->Xecode#define mstart frame->Xmstart#define offset_top frame->Xoffset_top#define ims frame->Xims#define eptrb frame->Xeptrb#define flags frame->Xflags#define rdepth frame->Xrdepth/* Ditto for the local variables */#ifdef SUPPORT_UTF8#define charptr frame->Xcharptr#endif#define callpat frame->Xcallpat#define data frame->Xdata#define next frame->Xnext#define pp frame->Xpp#define prev frame->Xprev#define saved_eptr frame->Xsaved_eptr#define new_recursive frame->Xnew_recursive#define cur_is_word frame->Xcur_is_word#define condition frame->Xcondition#define prev_is_word frame->Xprev_is_word#define original_ims frame->Xoriginal_ims#ifdef SUPPORT_UCP#define prop_type frame->Xprop_type#define prop_value frame->Xprop_value#define prop_fail_result frame->Xprop_fail_result#define prop_category frame->Xprop_category#define prop_chartype frame->Xprop_chartype#define prop_script frame->Xprop_script#define oclength frame->Xoclength#define occhars frame->Xocchars#endif#define ctype frame->Xctype#define fc frame->Xfc#define fi frame->Xfi#define length frame->Xlength#define max frame->Xmax#define min frame->Xmin#define number frame->Xnumber#define offset frame->Xoffset#define op frame->Xop#define save_capture_last frame->Xsave_capture_last#define save_offset1 frame->Xsave_offset1#define save_offset2 frame->Xsave_offset2#define save_offset3 frame->Xsave_offset3#define stacksave frame->Xstacksave#define newptrb frame->Xnewptrb/* When recursion is being used, local variables are allocated on the stack andget preserved during recursion in the normal way. In this environment, fi andi, and fc and c, can be the same variables. */#else /* NO_RECURSE not defined */#define fi i#define fc c#ifdef SUPPORT_UTF8 /* Many of these variables are used only */const uschar *charptr; /* in small blocks of the code. My normal */#endif /* style of coding would have declared */const uschar *callpat; /* them within each of those blocks. */const uschar *data; /* However, in order to accommodate the */const uschar *next; /* version of this code that uses an */USPTR pp; /* external "stack" implemented on the */const uschar *prev; /* heap, it is easier to declare them all */USPTR saved_eptr; /* here, so the declarations can be cut */ /* out in a block. The only declarations */recursion_info new_recursive; /* within blocks below are for variables */ /* that do not have to be preserved over */BOOL cur_is_word; /* a recursive call to RMATCH(). */BOOL condition;BOOL prev_is_word;unsigned long int original_ims;#ifdef SUPPORT_UCPint prop_type;int prop_value;int prop_fail_result;int prop_category;int prop_chartype;int prop_script;int oclength;uschar occhars[8];#endifint ctype;int length;int max;int min;int number;int offset;int op;int save_capture_last;int save_offset1, save_offset2, save_offset3;int stacksave[REC_STACK_SAVE_MAX];eptrblock newptrb;#endif /* NO_RECURSE *//* These statements are here to stop the compiler complaining about unitializedvariables. */#ifdef SUPPORT_UCPprop_value = 0;prop_fail_result = 0;#endif/* This label is used for tail recursion, which is used in a few cases evenwhen NO_RECURSE is not defined, in order to reduce the amount of stack that isused. Thanks to Ian Taylor for noticing this possibility and sending theoriginal patch. */TAIL_RECURSE:/* OK, now we can get on with the real code of the function. Recursive callsare specified by the macro RMATCH and RRETURN is used to return. WhenNO_RECURSE is *not* defined, these just turn into a recursive call to match()and a "return", respectively (possibly with some debugging if DEBUG isdefined). However, RMATCH isn't like a function call because it's quite acomplicated macro. It has to be used in one particular way. This shouldn't,however, impact performance when true recursion is being used. */#ifdef SUPPORT_UTF8utf8 = md->utf8; /* Local copy of the flag */#elseutf8 = FALSE;#endif/* First check that we haven't called match() too many times, or that wehaven't exceeded the recursive call limit. */if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);original_ims = ims; /* Save for resetting on ')' *//* At the start of a group with an unlimited repeat that may match an emptystring, the match_cbegroup flag is set. When this is the case, add the currentsubject pointer to the chain of such remembered pointers, to be checked when wehit the closing ket, in order to break infinite loops that match no characters.When match() is called in other circumstances, don't add to the chain. Thematch_cbegroup flag must NOT be used with tail recursion, because the memoryblock that is used is on the stack, so a new one may be required for eachmatch(). */if ((flags & match_cbegroup) != 0) { newptrb.epb_saved_eptr = eptr; newptrb.epb_prev = eptrb; eptrb = &newptrb; }/* Now start processing the opcodes. */for (;;) { minimize = possessive = FALSE; op = *ecode; /* For partial matching, remember if we ever hit the end of the subject after matching at least one subject character. */ if (md->partial && eptr >= md->end_subject && eptr > mstart) md->hitend = TRUE; switch(op) { case OP_FAIL: RRETURN(MATCH_NOMATCH); case OP_PRUNE: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, eptrb, flags, RM51); if (rrc != MATCH_NOMATCH) RRETURN(rrc); RRETURN(MATCH_PRUNE); case OP_COMMIT: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, eptrb, flags, RM52); if (rrc != MATCH_NOMATCH) RRETURN(rrc); RRETURN(MATCH_COMMIT); case OP_SKIP: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, eptrb, flags, RM53); if (rrc != MATCH_NOMATCH) RRETURN(rrc); md->start_match_ptr = eptr; /* Pass back current position */ RRETURN(MATCH_SKIP); case OP_THEN: RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, eptrb, flags, RM54); if (rrc != MATCH_NOMATCH) RRETURN(rrc); RRETURN(MATCH_THEN); /* Handle a capturing bracket. If there is space in the offset vector, save the current subject position in the working slot at the top of the vector. We mustn't change the current values of the data slot, because they may be set from a previous iteration of this group, and be referred to by a reference inside the group. If the bracket fails to match, we need to restore this value and also the values of the final offsets, in case they were set by a previous iteration of the same bracket. If there isn't enough space in the offset vector, treat this as if it were a non-capturing bracket. Don't worry about setting the flag for the error case here; that is handled in the code for KET. */ case OP_CBRA: case OP_SCBRA: number = GET2(ecode, 1+LINK_SIZE); offset = number << 1;#ifdef DEBUG printf("start bracket %d\n", number); printf("subject="); pchars(eptr, 16, TRUE, md); printf("\n");#endif if (offset < md->offset_max) { save_offset1 = md->offset_vector[offset]; save_offset2 = md->offset_vector[offset+1]; save_offset3 = md->offset_vector[md->offset_end - number]; save_capture_last = md->capture_last; DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); md->offset_vector[md->offset_end - number] = eptr - md->start_subject; flags = (op == OP_SCBRA)? match_cbegroup : 0; do { RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, eptrb, flags, RM1); if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); md->capture_last = save_capture_last; ecode += GET(ecode, 1); } while (*ecode == OP_ALT); DPRINTF(("bracket %d failed\n", number)); md->offset_vector[offset] = save_offset1; md->offset_vector[offset+1] = save_offset2; md->offset_vector[md->offset_end - number] = save_offset3; RRETURN(MATCH_NOMATCH); } /* FALL THROUGH ... Insufficient room for saving captured contents. Treat as a non-capturing bracket. */ /* VVVVVVVVVVVVVVVVVVVVVVVVV */ /* VVVVVVVVVVVVVVVVVVVVVVVVV */ DPRINTF(("insufficient capture room: treat as non-capturing\n")); /* VVVVVVVVVVVVVVVVVVVVVVVVV */ /* VVVVVVVVVVVVVVVVVVVVVVVVV */ /* Non-capturing bracket. Loop for all the alternatives. When we get to the final alternative within the brackets, we would return the result of a recursive call to match() whatever happened. We can reduce stack usage by turning this into a tail recursion, except in the case when match_cbegroup is set.*/ case OP_BRA: case OP_SBRA: DPRINTF(("start non-capturing bracket\n")); flags = (op >= OP_SBRA)? match_cbegroup : 0; for (;;) { if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */ { if (flags == 0) /* Not a possibly empty group */ { ecode += _pcre_OP_lengths[*ecode]; DPRINTF(("bracket 0 tail recursion\n")); goto TAIL_RECURSE; } /* Possibly empty group; can't use tail recursion. */ RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, eptrb, flags, RM48); RRETURN(rrc); } /* For non-final alternatives, continue the loop for a NOMATCH result; otherwise return. */ RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, eptrb, flags, RM2); if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode, 1); } /* Control never reaches here. */ /* Conditional group: compilation checked that there are no more than two branches. If the condition is false, skipping the first branch takes us past the end if there is only one branch, but that's OK because that is exactly what going to the ket would do. As there is only one branch to be obeyed, we can use tail recursion to avoid using another stack frame. */ case OP_COND: case OP_SCOND: if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */ { offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ condition = md->recursive != NULL && (offset == RREF_ANY || offset == md->recursive->group_num); ecode += condition? 3 : GET(ecode, 1); } else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */ { offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */ condition = offset < offset_top && md->offset_vector[offset] >= 0; ecode += condition? 3 : GET(ecode, 1); } else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */ { condition = FALSE; ecode += GET(ecode, 1); } /* The condition is an assertion. Call match() to evaluate it - setting the final argument match_condassert causes it to stop at the end of an assertion. */ else { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, match_condassert, RM3); if (rrc == MATCH_MATCH) { condition = TRUE; ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); while (*ecode == OP_ALT) ecode += GET(ecode, 1); } else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) { RRETURN(rrc); /* Need braces because of following else */ } else { condition = FALSE; ecode += GET(ecode, 1); } } /* We are now at the branch that is to be obeyed. As there is only one, we can use tail recursion to avoid using another stack frame, except when match_cbegroup is required for an unlimited repeat of a possibly empty group. If the second alternative doesn't exist, we can just plough on. */ if (condition || *ecode == OP_ALT) { ecode += 1 + LINK_SIZE; if (op == OP_SCOND) /* Possibly empty group */ { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49); RRETURN(rrc); }
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -