?? rx.h
字號:
* and we need a way to validate it. * * Fortunately, even if this set is flushed from the cache, it is * not freed. It just goes on the free-list of supersets. * So we can still examine it. * * So to validate a starting set memo, check to see if the * starts_for field still points back to the struct rx in question, * and if the ID matches the rx sequence number. */ struct rx * starts_for; /* This is used to link into a hash bucket so these objects can * be `hash-consed'. */ struct rx_hash_item hash_item;};#define rx_protect_superset(RX,CON) (++(CON)->refs)/* The terminology may be confusing (rename this structure?). * Every character occurs in at most one rx_super_edge per super-state. * But, that structure might have more than one option, indicating a point * of non-determinism. * * In other words, this structure holds a list of superstate edges * sharing a common starting state and character label. The edges * are in the field OPTIONS. All superstate edges sharing the same * starting state and character are in this list. */struct rx_super_edge{ struct rx_super_edge *next; struct rx_inx rx_backtrack_frame; int cset_size; rx_Bitset cset; struct rx_distinct_future *options;};/* A superstate is a set of nfa states (RX_SUPERSET) along * with a transition table. Superstates are built on demand and reclaimed * without warning. To protect a superstate from this ghastly fate, * use LOCK_SUPERSTATE. */struct rx_superstate{ int rx_id; /* c.f. the id field of rx_superset */ int locks; /* protection from reclamation */ /* Within a superstate cache, all the superstates are kept in a big * queue. The tail of the queue is the state most likely to be * reclaimed. The *recyclable fields hold the queue position of * this state. */ struct rx_superstate * next_recyclable; struct rx_superstate * prev_recyclable; /* The supernfa edges that exist in the cache and that have * this state as their destination are kept in this list: */ struct rx_distinct_future * transition_refs; /* The list of nfa states corresponding to this superstate: */ struct rx_superset * contents; /* The list of edges in the cache beginning from this state. */ struct rx_super_edge * edges; /* A tail of the recyclable queue is marked as semifree. A semifree * state has no incoming next_char transitions -- any transition * into a semifree state causes a complex dispatch with the side * effect of rescuing the state from its semifree state. * * An alternative to this might be to make next_char more expensive, * and to move a state to the head of the recyclable queue whenever * it is entered. That way, popular states would never be recycled. * * But unilaterally making next_char more expensive actually loses. * So, incoming transitions are only made expensive for states near * the tail of the recyclable queue. The more cache contention * there is, the more frequently a state will have to prove itself * and be moved back to the front of the queue. If there is less * contention, then popular states just aggregate in the front of * the queue and stay there. */ int is_semifree; /* This keeps track of the size of the transition table for this * state. There is a half-hearted attempt to support variable sized * superstates. */ int trans_size; /* Indexed by characters... */ struct rx_inx transitions[RX_TAIL_ARRAY];};/* A list of distinct futures define the edges that leave from a * given superstate on a given character. c.f. rx_super_edge. */struct rx_distinct_future{ struct rx_distinct_future * next_same_super_edge[2]; struct rx_distinct_future * next_same_dest; struct rx_distinct_future * prev_same_dest; struct rx_superstate * present; /* source state */ struct rx_superstate * future; /* destination state */ struct rx_super_edge * edge; /* The future_frame holds the instruction that should be executed * after all the side effects are done, when it is time to complete * the transition to the next state. * * Normally this is a next_char instruction, but it may be a * cache_miss instruction as well, depending on whether or not * the superstate is in the cache and semifree. * * If this is the only future for a given superstate/char, and * if there are no side effects to be performed, this frame is * not used (directly) at all. Instead, its contents are copied * into the transition table of the starting state of this dist. future. */ struct rx_inx future_frame; struct rx_inx side_effects_frame; struct rx_se_list * effects;};#define rx_lock_superstate(R,S) ((S)->locks++)#define rx_unlock_superstate(R,S) (--(S)->locks)/* This page destined for rx.h */struct rx_blocklist{ struct rx_blocklist * next; int bytes;};struct rx_freelist{ struct rx_freelist * next;};struct rx_cache;#ifdef __STDC__typedef void (*rx_morecore_fn)(struct rx_cache *);#elsetypedef void (*rx_morecore_fn)();#endif/* You use this to control the allocation of superstate data * during matching. Most of it should be initialized to 0. * * A MORECORE function is necessary. It should allocate * a new block of memory or return 0. * A default that uses malloc is called `rx_morecore'. * * The number of SUPERSTATES_ALLOWED indirectly limits how much memory * the system will try to allocate. The default is 128. Batch style * applications that are very regexp intensive should use as high a number * as possible without thrashing. * * The LOCAL_CSET_SIZE is the number of characters in a character set. * It is therefore the number of entries in a superstate transition table. * Generally, it should be 256. If your character set has 16 bits, * it is better to translate your regexps into equivalent 8 bit patterns. */struct rx_cache{ struct rx_hash_rules superset_hash_rules; /* Objects are allocated by incrementing a pointer that * scans across rx_blocklists. */ struct rx_blocklist * memory; struct rx_blocklist * memory_pos; int bytes_left; char * memory_addr; rx_morecore_fn morecore; /* Freelists. */ struct rx_freelist * free_superstates; struct rx_freelist * free_transition_classes; struct rx_freelist * free_discernable_futures; struct rx_freelist * free_supersets; struct rx_freelist * free_hash; /* Two sets of superstates -- those that are semifreed, and those * that are being used. */ struct rx_superstate * lru_superstate; struct rx_superstate * semifree_superstate; struct rx_superset * empty_superset; int superstates; int semifree_superstates; int hits; int misses; int superstates_allowed; int local_cset_size; void ** instruction_table; struct rx_hash superset_table;};/* The lowest-level search function supports arbitrarily fragmented * strings and (optionally) suspendable/resumable searches. * * Callers have to provide a few hooks. */#ifndef __GNUC__#ifdef __STDC__#define __const__ const#else#define __const__#endif#endif/* This holds a matcher position */struct rx_string_position{ __const__ unsigned char * pos; /* The current pos. */ __const__ unsigned char * string; /* The current string burst. */ __const__ unsigned char * end; /* First invalid position >= POS. */ int offset; /* Integer address of the current burst. */ int size; /* Current string's size. */ int search_direction; /* 1 or -1 */ int search_end; /* First position to not try. */};enum rx_get_burst_return{ rx_get_burst_continuation, rx_get_burst_error, rx_get_burst_ok, rx_get_burst_no_more};/* A call to get burst should make POS valid. It might be invalid * if the STRING field doesn't point to a burst that actually * contains POS. * * GET_BURST should take a clue from SEARCH_DIRECTION (1 or -1) as to * whether or not to pad to the left. Padding to the right is always * appropriate, but need not go past the point indicated by STOP. * * If a continuation is returned, then the reentering call to * a search function will retry the get_burst. */#ifdef __STDC__typedef enum rx_get_burst_return (*rx_get_burst_fn) (struct rx_string_position * pos, void * app_closure, int stop); #elsetypedef enum rx_get_burst_return (*rx_get_burst_fn) ();#endifenum rx_back_check_return{ rx_back_check_continuation, rx_back_check_error, rx_back_check_pass, rx_back_check_fail};/* Back_check should advance the position it is passed * over rparen - lparen characters and return pass iff * the characters starting at POS match those indexed * by [LPAREN..RPAREN]. * * If a continuation is returned, then the reentering call to * a search function will retry the back_check. */#ifdef __STDC__typedef enum rx_back_check_return (*rx_back_check_fn) (struct rx_string_position * pos, int lparen, int rparen, unsigned char * translate, void * app_closure, int stop); #elsetypedef enum rx_back_check_return (*rx_back_check_fn) ();#endif/* A call to fetch_char should return the character at POS or POS + 1. * Returning continuations here isn't supported. OFFSET is either 0 or 1 * and indicates which characters is desired. */#ifdef __STDC__typedef int (*rx_fetch_char_fn) (struct rx_string_position * pos, int offset, void * app_closure, int stop);#elsetypedef int (*rx_fetch_char_fn) ();#endifenum rx_search_return{ rx_search_continuation = -4, rx_search_error = -3, rx_search_soft_fail = -2, /* failed by running out of string */ rx_search_fail = -1 /* failed only by reaching failure states */ /* return values >= 0 indicate the position of a successful match */};/* regex.h * * The remaining declarations replace regex.h. *//* This is an array of error messages corresponding to the error codes. */extern __const__ char *re_error_msg[];/* If any error codes are removed, changed, or added, update the `re_error_msg' table in regex.c. */typedef enum{ REG_NOERROR = 0, /* Success. */ REG_NOMATCH, /* Didn't find a match (for regexec). */ /* POSIX regcomp return error codes. (In the order listed in the standard.) */ REG_BADPAT, /* Invalid pattern. */ REG_ECOLLATE, /* Not implemented. */ REG_ECTYPE, /* Invalid character class name. */ REG_EESCAPE, /* Trailing backslash. */ REG_ESUBREG, /* Invalid back reference. */ REG_EBRACK, /* Unmatched left bracket. */ REG_EPAREN, /* Parenthesis imbalance. */ REG_EBRACE, /* Unmatched \{. */ REG_BADBR, /* Invalid contents of \{\}. */ REG_ERANGE, /* Invalid range end. */ REG_ESPACE, /* Ran out of memory. */ REG_BADRPT, /* No preceding re for repetition op. */ /* Error codes we've added. */ REG_EEND, /* Premature end. */ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */} reg_errcode_t;/* The regex.c support, as a client of rx, defines a set of possible * side effects that can be added to the edge lables of nfa edges. * Here is the list of sidef effects in use. */enum re_side_effects{#define RX_WANT_SE_DEFS 1#undef RX_DEF_SE#undef RX_DEF_CPLX_SE#define RX_DEF_SE(IDEM, NAME, VALUE) NAME VALUE,#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) NAME VALUE,#include "rx.h"#undef RX_DEF_SE#undef RX_DEF_CPLX_SE#undef RX_WANT_SE_DEFS re_floogle_flap = 65533};/* These hold paramaters for the kinds of side effects that are possible * in the supported pattern languages. These include things like the * numeric bounds of {} operators and the index of paren registers for
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -