?? pcre_internal.h
字號:
#define ESC_n '\n'
#endif
#ifndef ESC_r
#define ESC_r '\r'
#endif
/* We can't officially use ESC_t because it is a POSIX reserved identifier
(presumably because of all the others like size_t). */
#ifndef ESC_tee
#define ESC_tee '\t'
#endif
/* Codes for different types of Unicode property */
#define PT_ANY 0 /* Any property - matches all chars */
#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */
#define PT_GC 2 /* General characteristic (e.g. L) */
#define PT_PC 3 /* Particular characteristic (e.g. Lu) */
#define PT_SC 4 /* Script (e.g. Han) */
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain UTF-8 characters with values greater than 255. */
#define XCL_NOT 0x01 /* Flag: this is a negative class */
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
#define XCL_END 0 /* Marks end of individual items */
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
/* These are escaped items that aren't just an encoding of a particular data
value such as \n. They must have non-zero values, as check_escape() returns
their negation. Also, they must appear in the same order as in the opcode
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
corresponds to "." rather than an escape sequence. The final one must be
ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).
There are two tests in the code for an escape greater than ESC_b and less than
ESC_Z to detect the types that may be repeated. These are the types that
consume characters. If any new escapes are put in between that don't consume a
character, that code will have to change. */
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
OP_EOD must correspond in order to the list of escapes immediately above.
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
that follow must also be updated to match. There is also a table called
"coptable" in pcre_dfa_exec.c that must be updated. */
enum {
OP_END, /* 0 End of pattern */
/* Values corresponding to backslashed metacharacters */
OP_SOD, /* 1 Start of data: \A */
OP_SOM, /* 2 Start of match (subject + offset): \G */
OP_SET_SOM, /* 3 Set start of match (\K) */
OP_NOT_WORD_BOUNDARY, /* 4 \B */
OP_WORD_BOUNDARY, /* 5 \b */
OP_NOT_DIGIT, /* 6 \D */
OP_DIGIT, /* 7 \d */
OP_NOT_WHITESPACE, /* 8 \S */
OP_WHITESPACE, /* 9 \s */
OP_NOT_WORDCHAR, /* 10 \W */
OP_WORDCHAR, /* 11 \w */
OP_ANY, /* 12 Match any character */
OP_ANYBYTE, /* 13 Match any byte (\C); different to OP_ANY for UTF-8 */
OP_NOTPROP, /* 14 \P (not Unicode property) */
OP_PROP, /* 15 \p (Unicode property) */
OP_ANYNL, /* 16 \R (any newline sequence) */
OP_NOT_HSPACE, /* 17 \H (not horizontal whitespace) */
OP_HSPACE, /* 18 \h (horizontal whitespace) */
OP_NOT_VSPACE, /* 19 \V (not vertical whitespace) */
OP_VSPACE, /* 20 \v (vertical whitespace) */
OP_EXTUNI, /* 21 \X (extended Unicode sequence */
OP_EODN, /* 22 End of data or \n at end of data: \Z. */
OP_EOD, /* 23 End of data: \z */
OP_OPT, /* 24 Set runtime options */
OP_CIRC, /* 25 Start of line - varies with multiline switch */
OP_DOLL, /* 26 End of line - varies with multiline switch */
OP_CHAR, /* 27 Match one character, casefully */
OP_CHARNC, /* 28 Match one character, caselessly */
OP_NOT, /* 29 Match one character, not the following one */
OP_STAR, /* 30 The maximizing and minimizing versions of */
OP_MINSTAR, /* 31 these six opcodes must come in pairs, with */
OP_PLUS, /* 32 the minimizing one second. */
OP_MINPLUS, /* 33 This first set applies to single characters.*/
OP_QUERY, /* 34 */
OP_MINQUERY, /* 35 */
OP_UPTO, /* 36 From 0 to n matches */
OP_MINUPTO, /* 37 */
OP_EXACT, /* 38 Exactly n matches */
OP_POSSTAR, /* 39 Possessified star */
OP_POSPLUS, /* 40 Possessified plus */
OP_POSQUERY, /* 41 Posesssified query */
OP_POSUPTO, /* 42 Possessified upto */
OP_NOTSTAR, /* 43 The maximizing and minimizing versions of */
OP_NOTMINSTAR, /* 44 these six opcodes must come in pairs, with */
OP_NOTPLUS, /* 45 the minimizing one second. They must be in */
OP_NOTMINPLUS, /* 46 exactly the same order as those above. */
OP_NOTQUERY, /* 47 This set applies to "not" single characters. */
OP_NOTMINQUERY, /* 48 */
OP_NOTUPTO, /* 49 From 0 to n matches */
OP_NOTMINUPTO, /* 50 */
OP_NOTEXACT, /* 51 Exactly n matches */
OP_NOTPOSSTAR, /* 52 Possessified versions */
OP_NOTPOSPLUS, /* 53 */
OP_NOTPOSQUERY, /* 54 */
OP_NOTPOSUPTO, /* 55 */
OP_TYPESTAR, /* 56 The maximizing and minimizing versions of */
OP_TYPEMINSTAR, /* 57 these six opcodes must come in pairs, with */
OP_TYPEPLUS, /* 58 the minimizing one second. These codes must */
OP_TYPEMINPLUS, /* 59 be in exactly the same order as those above. */
OP_TYPEQUERY, /* 60 This set applies to character types such as \d */
OP_TYPEMINQUERY, /* 61 */
OP_TYPEUPTO, /* 62 From 0 to n matches */
OP_TYPEMINUPTO, /* 63 */
OP_TYPEEXACT, /* 64 Exactly n matches */
OP_TYPEPOSSTAR, /* 65 Possessified versions */
OP_TYPEPOSPLUS, /* 66 */
OP_TYPEPOSQUERY, /* 67 */
OP_TYPEPOSUPTO, /* 68 */
OP_CRSTAR, /* 69 The maximizing and minimizing versions of */
OP_CRMINSTAR, /* 70 all these opcodes must come in pairs, with */
OP_CRPLUS, /* 71 the minimizing one second. These codes must */
OP_CRMINPLUS, /* 72 be in exactly the same order as those above. */
OP_CRQUERY, /* 73 These are for character classes and back refs */
OP_CRMINQUERY, /* 74 */
OP_CRRANGE, /* 75 These are different to the three sets above. */
OP_CRMINRANGE, /* 76 */
OP_CLASS, /* 77 Match a character class, chars < 256 only */
OP_NCLASS, /* 78 Same, but the bitmap was created from a negative
class - the difference is relevant only when a UTF-8
character > 255 is encountered. */
OP_XCLASS, /* 79 Extended class for handling UTF-8 chars within the
class. This does both positive and negative. */
OP_REF, /* 80 Match a back reference */
OP_RECURSE, /* 81 Match a numbered subpattern (possibly recursive) */
OP_CALLOUT, /* 82 Call out to external function if provided */
OP_ALT, /* 83 Start of alternation */
OP_KET, /* 84 End of group that doesn't have an unbounded repeat */
OP_KETRMAX, /* 85 These two must remain together and in this */
OP_KETRMIN, /* 86 order. They are for groups the repeat for ever. */
/* The assertions must come before BRA, CBRA, ONCE, and COND.*/
OP_ASSERT, /* 87 Positive lookahead */
OP_ASSERT_NOT, /* 88 Negative lookahead */
OP_ASSERTBACK, /* 89 Positive lookbehind */
OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
OP_REVERSE, /* 91 Move pointer back - used in lookbehind assertions */
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
OP_ONCE, /* 92 Atomic group */
OP_BRA, /* 93 Start of non-capturing bracket */
OP_CBRA, /* 94 Start of capturing bracket */
OP_COND, /* 95 Conditional group */
/* These three must follow the previous three, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
OP_SBRA, /* 96 Start of non-capturing bracket, check empty */
OP_SCBRA, /* 97 Start of capturing bracket, check empty */
OP_SCOND, /* 98 Conditional group, check empty */
OP_CREF, /* 99 Used to hold a capture number as condition */
OP_RREF, /* 100 Used to hold a recursion number as condition */
OP_DEF, /* 101 The DEFINE condition */
OP_BRAZERO, /* 102 These two must remain together and in this */
OP_BRAMINZERO, /* 103 order. */
/* These are backtracking control verbs */
OP_PRUNE, /* 104 */
OP_SKIP, /* 105 */
OP_THEN, /* 106 */
OP_COMMIT, /* 107 */
/* These are forced failure and success verbs */
OP_FAIL, /* 108 */
OP_ACCEPT /* 109 */
};
/* This macro defines textual names for all the opcodes. These are used only
for debugging. The macro is referenced only in pcre_printint.c. */
#define OP_NAME_LIST \
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
"extuni", "\\Z", "\\z", \
"Opt", "^", "$", "char", "charnc", "not", \
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
"*+","++", "?+", "{", \
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
"*+","++", "?+", "{", \
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
"*+","++", "?+", "{", \
"*", "*?", "+", "+?", "?", "??", "{", "{", \
"class", "nclass", "xclass", "Ref", "Recurse", "Callout", \
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
"AssertB", "AssertB not", "Reverse", \
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT"
/* This macro defines the length of fixed length operations in the compiled
regex. The lengths are used when searching for specific things, and also in the
debugging printing of a compiled regex. We use a macro so that it can be
defined close to the definitions of the opcodes themselves.
As things have been extended, some of these are no longer fixed lenths, but are
minima instead. For example, the length of a single-character repeat may vary
in UTF-8 mode. The code that uses this table must know about such things. */
#define OP_LENGTHS \
1, /* End */ \
1, 1, 1, 1, 1, /* \A, \G, \K, \B, \b */ \
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ \
1, 1, /* Any, Anybyte */ \
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
2, /* Char - the minimum length */ \
2, /* Charnc - the minimum length */ \
2, /* not */ \
/* Positive single-char repeats ** These are */ \
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \
2, 2, 2, 4, /* *+, ++, ?+, upto+ */ \
/* Negative single-char repeats - only for chars < 256 */ \
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
4, 4, 4, /* NOT upto, minupto, exact */ \
2, 2, 2, 4, /* Possessive *, +, ?, upto */ \
/* Positive type repeats */ \
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
4, 4, 4, /* Type upto, minupto, exact */ \
2, 2, 2, 4, /* Possessive *+, ++, ?+, upto+ */ \
/* Character class & ref repeats */ \
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
5, 5, /* CRRANGE, CRMINRANGE */ \
33, /* CLASS */ \
33, /* NCLASS */ \
0, /* XCLASS - variable length */ \
3, /* REF */ \
1+LINK_SIZE, /* RECURSE */ \
2+2*LINK_SIZE, /* CALLOUT */ \
1+LINK_SIZE, /* Alt */ \
1+LINK_SIZE, /* Ket */ \
1+LINK_SIZE, /* KetRmax */ \
1+LINK_SIZE, /* KetRmin */ \
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -