?? rx.h
字號:
* subexpression measurement or backreferencing. */struct re_se_params{ enum re_side_effects se; int op1; int op2;};typedef unsigned reg_syntax_t;struct re_pattern_buffer{ struct rx rx; reg_syntax_t syntax; /* See below for syntax bit definitions. */ unsigned int no_sub:1; /* If set, don't return register offsets. */ unsigned int not_bol:1; /* If set, the anchors ('^' and '$') don't */ unsigned int not_eol:1; /* match at the ends of the string. */ unsigned int newline_anchor:1;/* If true, an anchor at a newline matches.*/ unsigned int least_subs:1; /* If set, and returning registers, return * as few values as possible. Only * backreferenced groups and group 0 (the whole * match) will be returned. */ /* If true, this says that the matcher should keep registers on its * backtracking stack. For many patterns, we can easily determine that * this isn't necessary. */ unsigned int match_regs_on_stack:1; unsigned int search_regs_on_stack:1; /* is_anchored and begbuf_only are filled in by rx_compile. */ unsigned int is_anchored:1; /* Anchorded by ^? */ unsigned int begbuf_only:1; /* Anchored to char position 0? */ /* If REGS_UNALLOCATED, allocate space in the `regs' structure * for `max (RE_NREGS, re_nsub + 1)' groups. * If REGS_REALLOCATE, reallocate space if necessary. * If REGS_FIXED, use what's there. */#define REGS_UNALLOCATED 0#define REGS_REALLOCATE 1#define REGS_FIXED 2 unsigned int regs_allocated:2; /* Either a translate table to apply to all characters before * comparing them, or zero for no translation. The translation * is applied to a pattern when it is compiled and to a string * when it is matched. */ unsigned char * translate; /* If this is a valid pointer, it tells rx not to store the extents of * certain subexpressions (those corresponding to non-zero entries). * Passing 0x1 is the same as passing an array of all ones. Passing 0x0 * is the same as passing an array of all zeros. * The array should contain as many entries as their are subexps in the * regexp. */ char * syntax_parens; /* Number of subexpressions found by the compiler. */ size_t re_nsub; void * buffer; /* Malloced memory for the nfa. */ unsigned long allocated; /* Size of that memory. */ /* Pointer to a fastmap, if any, otherwise zero. re_search uses * the fastmap, if there is one, to skip over impossible * starting points for matches. */ char *fastmap; unsigned int fastmap_accurate:1; /* These three are internal. */ unsigned int can_match_empty:1; struct rx_nfa_state * start; /* The nfa starting state. */ /* This is the list of iterator bounds for {lo,hi} constructs. * The memory pointed to is part of the rx->buffer. */ struct re_se_params *se_params; /* This is a bitset representation of the fastmap. * This is a true fastmap that already takes the translate * table into account. */ rx_Bitset fastset;};/* Type for byte offsets within the string. POSIX mandates this. */typedef int regoff_t;/* This is the structure we store register match data in. See regex.texinfo for a full description of what registers match. */struct re_registers{ unsigned num_regs; regoff_t *start; regoff_t *end;};typedef struct re_pattern_buffer regex_t;/* POSIX specification for registers. Aside from the different names than `re_registers', POSIX uses an array of structures, instead of a structure of arrays. */typedef struct{ regoff_t rm_so; /* Byte offset from string's start to substring's start. */ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */} regmatch_t;/* The following bits are used to determine the regexp syntax we recognize. The set/not-set meanings are chosen so that Emacs syntax remains the value 0. The bits are given in alphabetical order, and the definitions shifted by one from the previous bit; thus, when we add or remove a bit, only one other definition need change. *//* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)/* If this bit is not set, then + and ? are operators, and \+ and \? are literals. If set, then \+ and \? are operators and + and ? are literals. */#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)/* If this bit is set, then character classes are supported. They are: [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. If not set, then character classes are not supported. */#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)/* If this bit is set, then ^ and $ are always anchors (outside bracket expressions, of course). If this bit is not set, then it depends: ^ is an anchor if it is at the beginning of a regular expression or after an open-group or an alternation operator; $ is an anchor if it is at the end of a regular expression, or before a close-group or an alternation operator. This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because POSIX draft 11.2 says that * etc. in leading positions is undefined. We already implemented a previous draft which made those constructs invalid, though, so we haven't changed the code back. */#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)/* If this bit is set, then special characters are always special regardless of where they are in the pattern. If this bit is not set, then special characters are special only in some contexts; otherwise they are ordinary. Specifically, * + ? and intervals are only special when not after the beginning, open-group, or alternation operator. */#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)/* If this bit is set, then *, +, ?, and { cannot be first in an re or immediately after an alternation or begin-group operator. */#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)/* If this bit is set, then . matches newline. If not set, then it doesn't. */#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)/* If this bit is set, then . doesn't match NUL. If not set, then it does. */#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)/* If this bit is set, nonmatching lists [^...] do not match newline. If not set, they do. */#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)/* If this bit is set, either \{...\} or {...} defines an interval, depending on RE_NO_BK_BRACES. If not set, \{, \}, {, and } are literals. */#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)/* If this bit is set, +, ? and | aren't recognized as operators. If not set, they are. */#define RE_LIMITED_OPS (RE_INTERVALS << 1)/* If this bit is set, newline is an alternation operator. If not set, newline is literal. */#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)/* If this bit is set, then `{...}' defines an interval, and \{ and \} are literals. If not set, then `\{...\}' defines an interval. */#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)/* If this bit is set, (...) defines a group, and \( and \) are literals. If not set, \(...\) defines a group, and ( and ) are literals. */#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)/* If this bit is set, then \<digit> matches <digit>. If not set, then \<digit> is a back-reference. */#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)/* If this bit is set, then | is an alternation operator, and \| is literal. If not set, then \| is an alternation operator, and | is literal. */#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)/* If this bit is set, then an ending range point collating higher than the starting range point, as in [z-a], is invalid. If not set, then when ending range point collates higher than the starting range point, the range is ignored. */#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)/* If this bit is set, then an unmatched ) is ordinary. If not set, then an unmatched ) is invalid. */#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)/* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is stored in the pattern buffer, so changing this does not affect already-compiled regexps. */extern reg_syntax_t re_syntax_options;/* Define combinations of the above bits for the standard possibilities. (The [[[ comments delimit what gets put into the Texinfo file, so don't delete them!) */ /* [[[begin syntaxes]]] */#define RE_SYNTAX_EMACS 0#define RE_SYNTAX_AWK \ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | RE_NO_BK_VAR | RE_NO_EMPTY_RANGES \ | RE_UNMATCHED_RIGHT_PAREN_ORD)#define RE_SYNTAX_POSIX_AWK \ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)#define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ | RE_NEWLINE_ALT)#define RE_SYNTAX_EGREP \ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ | RE_NO_BK_VBAR)#define RE_SYNTAX_POSIX_EGREP \ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC/* Syntax bits common to both basic and extended POSIX regex syntax. */#define _RE_SYNTAX_POSIX_COMMON \ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ | RE_INTERVALS | RE_NO_EMPTY_RANGES)#define RE_SYNTAX_POSIX_BASIC \ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this isn't minimal, since other operators, such as \`, aren't disabled. */#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)#define RE_SYNTAX_POSIX_EXTENDED \ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ | RE_UNMATCHED_RIGHT_PAREN_ORD)/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ | RE_NO_BK_PARENS | RE_NO_BK_REFS \ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)/* [[[end syntaxes]]] *//* Maximum number of duplicates an interval can allow. Some systems (erroneously) define this in other header files, but we want our value, so remove any previous define. */#ifdef RE_DUP_MAX#undef RE_DUP_MAX#endif#define RE_DUP_MAX ((1 << 15) - 1) /* POSIX `cflags' bits (i.e., information for `regcomp'). *//* If this bit is set, then use extended regular expression syntax. If not set, then use basic regular expression syntax. */#define REG_EXTENDED 1/* If this bit is set, then ignore case when matching. If not set, then case is significant. */#define REG_ICASE (REG_EXTENDED << 1) /* If this bit is set, then anchors do not match at newline characters in the string. If not set, then anchors do match at newlines. */#define REG_NEWLINE (REG_ICASE << 1)/* If this bit is set, then report only success or fail in regexec. If not set, then returns differ between not matching and errors. */#define REG_NOSUB (REG_NEWLINE << 1)/* POSIX `eflags' bits (i.e., information for regexec). *//* If this bit is set, then the beginning-of-line operator doesn't match the beginning of the string (presumably because it's not the beginning of a line). If not set, then the beginning-of-line operator does match the beginning of the string. */#define REG_NOTBOL 1/* Like REG_NOTBOL, except for the end-of-line. */#define REG_NOTEOL (1 << 1)/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, * `re_match_2' returns information about at least this many registers * the first time a `regs' structure is passed. * * Also, this is the greatest number of backreferenced subexpressions * allowed in a pattern being matched without caller-supplied registers. */#ifndef RE_NREGS#define RE_NREGS 30#endifextern int rx_cache_bound;extern char rx_version_string[];#ifdef RX_WANT_RX_DEFS/* This is decls to the interesting subsystems and lower layers * of rx. Everything which doesn't have a public counterpart in * regex.c is declared here. */#ifdef __STDC__typedef void (*rx_hash_freefn) (struct rx_hash_item * it);#else /* ndef __STDC__ */typedef void (*rx_hash_freefn) ();#endif /* ndef __STDC__ */#ifdef __STDC__RX_DECL int rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b);RX_DECL int rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b);RX_DECL int rx_bitset_empty (int size, rx_Bitset set);RX_DECL void rx_bitset_null (int size, rx_Bitset b);RX_DECL void rx_bitset_universe (int size, rx_Bitset b);RX_DECL void rx_bitset_complement (int size, rx_Bitset b);RX_DECL void rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b);RX_DECL void rx_bitset_union (int size, rx_Bitset a, rx_Bitset b);RX_DECL void rx_bitset_intersection (int size, rx_Bitset a, rx_Bitset b);RX_DECL void rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b);RX_DECL void rx_bitset_revdifference (int size, rx_Bitset a, rx_Bitset b);RX_DECL void rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b);RX_DECL unsigned long rx_bitset_hash (int size, rx_Bitset b);RX_DECL struct rx_hash_item * rx_hash_find (struct rx_hash * table, unsigned long hash, void * value, struct rx_hash_rules * rules);RX_DECL struct rx_hash_item * rx_hash_store (struct rx_hash * table, unsigned long hash, void * value, struct rx_hash_rules * rules);RX_DECL void rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules);RX_DECL void rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn, struct rx_hash_rules * rules);RX_DECL rx_Bitset rx_cset (struct rx *rx);RX_DECL rx_Bitset rx_copy_cset (struct rx *rx, rx_Bitset a);RX_DECL void rx_free_cset (struct rx * rx, rx_Bitset c);RX_DECL struct rexp_node * rexp_node (struct rx *rx, enum rexp_node_type type);RX_DECL struct rexp_node * rx_mk_r_cset (struct rx * rx, rx_Bitset b);RX_DECL struct rexp_node * rx_mk_r_concat (struct rx * rx, struct rexp_node * a, struct rexp_node * b);RX_DECL struct rexp_node * rx_mk_r_alternate (struct rx * rx, struct rexp_node * a, struct rexp_node * b);RX_DECL struct rexp_node * rx_mk_r_opt (struct rx * rx,
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -