?? pcre_compile.c
字號:
so the syntax is guaranteed to be correct, but we need to check the values.Arguments: p pointer to first char after '{' minp pointer to int for min maxp pointer to int for max returned as -1 if no max errorcodeptr points to error code variableReturns: pointer to '}' on success; current ptr on error, with errorcodeptr set non-zero*/static const uschar *read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr){int min = 0;int max = -1;/* Read the minimum value and do a paranoid check: a negative value indicatesan integer overflow. */while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';if (min < 0 || min > 65535) { *errorcodeptr = ERR5; return p; }/* Read the maximum value if there is one, and again do a paranoid on its size.Also, max must not be less than min. */if (*p == '}') max = min; else { if (*(++p) != '}') { max = 0; while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; if (max < 0 || max > 65535) { *errorcodeptr = ERR5; return p; } if (max < min) { *errorcodeptr = ERR4; return p; } } }/* Fill in the required variables, and pass back the pointer to the terminating'}'. */*minp = min;*maxp = max;return p;}/************************************************** Find forward referenced subpattern **************************************************//* This function scans along a pattern's text looking for capturingsubpatterns, and counting them. If it finds a named pattern that matches thename it is given, it returns its number. Alternatively, if the name is NULL, itreturns when it reaches a given numbered subpattern. This is used for forwardreferences to subpatterns. We know that if (?P< is encountered, the name willbe terminated by '>' because that is checked in the first pass.Arguments: ptr current position in the pattern count current count of capturing parens so far encountered name name to seek, or NULL if seeking a numbered subpattern lorn name length, or subpattern number if name is NULL xmode TRUE if we are in /x modeReturns: the number of the named subpattern, or -1 if not found*/static intfind_parens(const uschar *ptr, int count, const uschar *name, int lorn, BOOL xmode){const uschar *thisname;for (; *ptr != 0; ptr++) { int term; /* Skip over backslashed characters and also entire \Q...\E */ if (*ptr == '\\') { if (*(++ptr) == 0) return -1; if (*ptr == 'Q') for (;;) { while (*(++ptr) != 0 && *ptr != '\\'); if (*ptr == 0) return -1; if (*(++ptr) == 'E') break; } continue; } /* Skip over character classes */ if (*ptr == '[') { while (*(++ptr) != ']') { if (*ptr == 0) return -1; if (*ptr == '\\') { if (*(++ptr) == 0) return -1; if (*ptr == 'Q') for (;;) { while (*(++ptr) != 0 && *ptr != '\\'); if (*ptr == 0) return -1; if (*(++ptr) == 'E') break; } continue; } } continue; } /* Skip comments in /x mode */ if (xmode && *ptr == '#') { while (*(++ptr) != 0 && *ptr != '\n'); if (*ptr == 0) return -1; continue; } /* An opening parens must now be a real metacharacter */ if (*ptr != '(') continue; if (ptr[1] != '?' && ptr[1] != '*') { count++; if (name == NULL && count == lorn) return count; continue; } ptr += 2; if (*ptr == 'P') ptr++; /* Allow optional P */ /* We have to disambiguate (?<! and (?<= from (?<name> */ if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') && *ptr != '\'') continue; count++; if (name == NULL && count == lorn) return count; term = *ptr++; if (term == '<') term = '>'; thisname = ptr; while (*ptr != term) ptr++; if (name != NULL && lorn == ptr - thisname && strncmp((const char *)name, (const char *)thisname, lorn) == 0) return count; }return -1;}/************************************************** Find first significant op code **************************************************//* This is called by several functions that scan a compiled expression lookingfor a fixed first character, or an anchoring op code etc. It skips over thingsthat do not influence this. For some calls, a change of option is important.For some calls, it makes sense to skip negative forward and all backwardassertions, and also the \b assertion; for others it does not.Arguments: code pointer to the start of the group options pointer to external options optbit the option bit whose changing is significant, or zero if none are skipassert TRUE if certain assertions are to be skippedReturns: pointer to the first significant opcode*/static const uschar*first_significant_code(const uschar *code, int *options, int optbit, BOOL skipassert){for (;;) { switch ((int)*code) { case OP_OPT: if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit)) *options = (int)code[1]; code += 2; break; case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: if (!skipassert) return code; do code += GET(code, 1); while (*code == OP_ALT); code += _pcre_OP_lengths[*code]; break; case OP_WORD_BOUNDARY: case OP_NOT_WORD_BOUNDARY: if (!skipassert) return code; /* Fall through */ case OP_CALLOUT: case OP_CREF: case OP_RREF: case OP_DEF: code += _pcre_OP_lengths[*code]; break; default: return code; } }/* Control never reaches here */}/************************************************** Find the fixed length of a pattern **************************************************//* Scan a pattern and compute the fixed length of subject that will match it,if the length is fixed. This is needed for dealing with backward assertions.In UTF8 mode, the result is in characters rather than bytes.Arguments: code points to the start of the pattern (the bracket) options the compiling optionsReturns: the fixed length, or -1 if there is no fixed length, or -2 if \C was encountered*/static intfind_fixedlength(uschar *code, int options){int length = -1;register int branchlength = 0;register uschar *cc = code + 1 + LINK_SIZE;/* Scan along the opcodes for this branch. If we get to the end of thebranch, check the length against that of the other branches. */for (;;) { int d; register int op = *cc; switch (op) { case OP_CBRA: case OP_BRA: case OP_ONCE: case OP_COND: d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options); if (d < 0) return d; branchlength += d; do cc += GET(cc, 1); while (*cc == OP_ALT); cc += 1 + LINK_SIZE; break; /* Reached end of a branch; if it's a ket it is the end of a nested call. If it's ALT it is an alternation in a nested call. If it is END it's the end of the outer call. All can be handled by the same code. */ case OP_ALT: case OP_KET: case OP_KETRMAX: case OP_KETRMIN: case OP_END: if (length < 0) length = branchlength; else if (length != branchlength) return -1; if (*cc != OP_ALT) return length; cc += 1 + LINK_SIZE; branchlength = 0; break; /* Skip over assertive subpatterns */ case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: do cc += GET(cc, 1); while (*cc == OP_ALT); /* Fall through */ /* Skip over things that don't match chars */ case OP_REVERSE: case OP_CREF: case OP_RREF: case OP_DEF: case OP_OPT: case OP_CALLOUT: case OP_SOD: case OP_SOM: case OP_EOD: case OP_EODN: case OP_CIRC: case OP_DOLL: case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: cc += _pcre_OP_lengths[*cc]; break; /* Handle literal characters */ case OP_CHAR: case OP_CHARNC: case OP_NOT: branchlength++; cc += 2;#ifdef SUPPORT_UTF8 if ((options & PCRE_UTF8) != 0) { while ((*cc & 0xc0) == 0x80) cc++; }#endif break; /* Handle exact repetitions. The count is already in characters, but we need to skip over a multibyte character in UTF8 mode. */ case OP_EXACT: branchlength += GET2(cc,1); cc += 4;#ifdef SUPPORT_UTF8 if ((options & PCRE_UTF8) != 0) { while((*cc & 0x80) == 0x80) cc++; }#endif break; case OP_TYPEEXACT: branchlength += GET2(cc,1); if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2; cc += 4; break; /* Handle single-char matchers */ case OP_PROP: case OP_NOTPROP: cc += 2; /* Fall through */ case OP_NOT_DIGIT: case OP_DIGIT: case OP_NOT_WHITESPACE: case OP_WHITESPACE: case OP_NOT_WORDCHAR: case OP_WORDCHAR: case OP_ANY: branchlength++; cc++; break; /* The single-byte matcher isn't allowed */ case OP_ANYBYTE: return -2; /* Check a class for variable quantification */#ifdef SUPPORT_UTF8 case OP_XCLASS: cc += GET(cc, 1) - 33; /* Fall through */ case OP_CLASS: case OP_NCLASS: cc += 33; switch (*cc) { case OP_CRSTAR: case OP_CRMINSTAR: case OP_CRQUERY: case OP_CRMINQUERY: return -1; case OP_CRRANGE: case OP_CRMINRANGE: if (GET2(cc,1) != GET2(cc,3)) return -1; branchlength += GET2(cc,1); cc += 5; break; default: branchlength++; } break; /* Anything else is variable length */ default: return -1; } }/* Control never gets here */}/************************************************** Scan compiled regex for numbered bracket **************************************************//* This little function scans through a compiled pattern until it finds acapturing bracket with the given number.Arguments: code points to start of expression utf8 TRUE in UTF-8 mode number the required bracket number
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -