?? rx.h
字號:
#if !defined(RXH) || defined(RX_WANT_SE_DEFS)#define RXH/* Copyright (C) 1992, 1993 Free Software Foundation, Inc.This file is part of the librx library.Librx is free software; you can redistribute it and/or modify it underthe terms of the GNU Library General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.Librx is distributed in the hope that it will be useful, but WITHOUTANY WARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public Licensefor more details.You should have received a copy of the GNU Library General PublicLicense along with this software; see the file COPYING.LIB. If not,write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA02139, USA. *//* t. lord Wed Sep 23 18:20:57 1992 */#ifndef RX_WANT_SE_DEFS/* This page: Bitsets */#ifndef RX_subsettypedef unsigned int RX_subset;#define RX_subset_bits (32)#define RX_subset_mask (RX_subset_bits - 1)#endiftypedef RX_subset * rx_Bitset;#ifdef __STDC__typedef void (*rx_bitset_iterator) (void *, int member_index);#elsetypedef void (*rx_bitset_iterator) ();#endif#define rx_bitset_subset(N) ((N) / RX_subset_bits)#define rx_bitset_subset_val(B,N) ((B)[rx_bitset_subset(N)])#define RX_bitset_access(B,N,OP) \ ((B)[rx_bitset_subset(N)] OP rx_subset_singletons[(N) & RX_subset_mask])#define RX_bitset_member(B,N) RX_bitset_access(B, N, &)#define RX_bitset_enjoin(B,N) RX_bitset_access(B, N, |=)#define RX_bitset_remove(B,N) RX_bitset_access(B, N, &= ~)#define RX_bitset_toggle(B,N) RX_bitset_access(B, N, ^= )#define rx_bitset_numb_subsets(N) (((N) + RX_subset_bits - 1) / RX_subset_bits)#define rx_sizeof_bitset(N) (rx_bitset_numb_subsets(N) * sizeof(RX_subset))/* This page: Splay trees. */#ifdef __STDC__typedef int (*rx_sp_comparer) (void * a, void * b);#elsetypedef int (*rx_sp_comparer) ();#endifstruct rx_sp_node { void * key; void * data; struct rx_sp_node * kids[2];};#ifdef __STDC__typedef void (*rx_sp_key_data_freer) (struct rx_sp_node *);#elsetypedef void (*rx_sp_key_data_freer) ();#endif/* giant inflatable hash trees */struct rx_hash_item{ struct rx_hash_item * next_same_hash; struct rx_hash * table; unsigned long hash; void * data; void * binding;};struct rx_hash{ struct rx_hash * parent; int refs; struct rx_hash * children[13]; struct rx_hash_item * buckets [13]; int bucket_size [13];};struct rx_hash_rules;#ifdef __STDC__/* should return like == */typedef int (*rx_hash_eq)(void *, void *);typedef struct rx_hash * (*rx_alloc_hash)(struct rx_hash_rules *);typedef void (*rx_free_hash)(struct rx_hash *, struct rx_hash_rules *);typedef struct rx_hash_item * (*rx_alloc_hash_item)(struct rx_hash_rules *, void *);typedef void (*rx_free_hash_item)(struct rx_hash_item *, struct rx_hash_rules *);#elsetypedef int (*rx_hash_eq)();typedef struct rx_hash * (*rx_alloc_hash)();typedef void (*rx_free_hash)();typedef struct rx_hash_item * (*rx_alloc_hash_item)();typedef void (*rx_free_hash_item)();#endifstruct rx_hash_rules{ rx_hash_eq eq; rx_alloc_hash hash_alloc; rx_free_hash free_hash; rx_alloc_hash_item hash_item_alloc; rx_free_hash_item free_hash_item;};/* Forward declarations */struct rx_cache;struct rx_superset;struct rx;struct rx_se_list;/* * GLOSSARY * * regexp * regular expression * expression * pattern - a `regular' expression. The expression * need not be formally regular -- it can contain * constructs that don't correspond to purely regular * expressions. * * buffer * string - the string (or strings) being searched or matched. * * pattern buffer - a structure of type `struct re_pattern_buffer' * This in turn contains a `struct rx', which holds the * NFA compiled from a pattern, as well as some of the state * of a matcher using the pattern. * * NFA - nondeterministic finite automata. Some people * use this term to a member of the class of * regular automata (those corresponding to a regular * language). However, in this code, the meaning is * more general. The automata used by Rx are comperable * in power to what are usually called `push down automata'. * * Two NFA are built by rx for every pattern. One is built * by the compiler. The other is built from the first, on * the fly, by the matcher. The latter is called the `superstate * NFA' because its states correspond to sets of states from * the first NFA. (Joe Keane gets credit for the name * `superstate NFA'). * * NFA edges * epsilon edges * side-effect edges - The NFA compiled from a pattern can have three * kinds of edges. Epsilon edges can be taken freely anytime * their source state is reached. Character set edges can be * taken when their source state is reached and when the next * character in the buffer is a member of the set. Side effect * edges imply a transition that can only be taken after the * indicated side effect has been successfully accomplished. * Some examples of side effects are: * * Storing the current match position to record the * location of a parentesized subexpression. * * Advancing the matcher over N characters if they * match the N characters previously matched by a * parentesized subexpression. * * Both of those kinds of edges occur in the NFA generated * by the pattern: \(.\)\1 * * Epsilon and side effect edges are similar. Unfortunately, * some of the code uses the name `epsilon edge' to mean * both epsilon and side effect edges. For example, the * function has_non_idempotent_epsilon_path computes the existance * of a non-trivial path containing only a mix of epsilon and * side effect edges. In that case `nonidempotent epsilon' is being * used to mean `side effect'. *//* LOW LEVEL PATTERN BUFFERS *//* Suppose that from some NFA state, more than one path through * side-effect edges is possible. In what order should the paths * be tried? A function of type rx_se_list_order answers that * question. It compares two lists of side effects, and says * which list comes first. */ #ifdef __STDC__typedef int (*rx_se_list_order) (struct rx *, struct rx_se_list *, struct rx_se_list *);#elsetypedef int (*rx_se_list_order) ();#endif/* Struct RX holds a compiled regular expression - that is, an nfa * ready to be converted on demand to a more efficient superstate nfa. * This is for the low level interface. The high-level interfaces enclose * this in a `struct re_pattern_buffer'. */struct rx{ /* The compiler assigns a unique id to every pattern. * Like sequence numbers in X, there is a subtle bug here * if you use Rx in a system that runs for a long time. * But, because of the way the caches work out, it is almost * impossible to trigger the Rx version of this bug. * * The id is used to validate superstates found in a cache * of superstates. It isn't sufficient to let a superstate * point back to the rx for which it was compiled -- the caller * may be re-using a `struct rx' in which case the superstate * is not really valid. So instead, superstates are validated * by checking the sequence number of the pattern for which * they were built. */ int rx_id; /* This is memory mgt. state for superstates. This may be * shared by more than one struct rx. */ struct rx_cache * cache; /* Every regex defines the size of its own character set. * A superstate has an array of this size, with each element * a `struct rx_inx'. So, don't make this number too large. * In particular, don't make it 2^16. */ int local_cset_size; /* After the NFA is built, it is copied into a contiguous region * of memory (mostly for compatability with GNU regex). * Here is that region, and it's size: */ void * buffer; unsigned long allocated; /* Clients of RX can ask for some extra storage in the space pointed * to by BUFFER. The field RESERVED is an input parameter to the * compiler. After compilation, this much space will be available * at (buffer + allocated - reserved) */ unsigned long reserved; /* --------- The remaining fields are for internal use only. --------- */ /* --------- But! they must be initialized to 0. --------- */ /* NODEC is the number of nodes in the NFA with non-epsilon * transitions. */ int nodec; /* EPSNODEC is the number of nodes with only epsilon transitions. */ int epsnodec; /* The sum (NODEC + EPSNODEC) is the total number of states in the * compiled NFA. */ /* Lists of side effects as stored in the NFA are `hash consed'..meaning * that lists with the same elements are ==. During compilation, * this table facilitates hash-consing. */ struct rx_hash se_list_memo; /* Lists of NFA states are also hashed. */ struct rx_hash set_list_memo; /* The compiler and matcher must build a number of instruction frames. * The format of these frames is fixed (c.f. struct rx_inx). The values * of the instructions is not fixed. * * An enumerated type (enum rx_opcode) defines the set of instructions * that the compiler or matcher might generate. When filling an instruction * frame, the INX field is found by indexing this instruction table * with an opcode: */ void ** instruction_table; /* The list of all states in an NFA. * During compilation, the NEXT field of NFA states links this list. * After compilation, all the states are compacted into an array, * ordered by state id numbers. At that time, this points to the base * of that array. */ struct rx_nfa_state *nfa_states; /* Every nfa begins with one distinguished starting state: */ struct rx_nfa_state *start; /* This orders the search through super-nfa paths. * See the comment near the typedef of rx_se_list_order. */ rx_se_list_order se_list_cmp; struct rx_superset * start_set;};/* SYNTAX TREES *//* Compilation is in stages. * * In the first stage, a pattern specified by a string is * translated into a syntax tree. Later stages will convert * the syntax tree into an NFA optimized for conversion to a * superstate-NFA. * * This page is about syntax trees. */enum rexp_node_type{ r_cset, /* Match from a character set. `a' or `[a-z]'*/ r_concat, /* Concat two subexpressions. `ab' */ r_alternate, /* Choose one of two subexpressions. `a\|b' */ r_opt, /* Optional subexpression. `a?' */ r_star, /* Repeated subexpression. `a*' */ /* A 2phase-star is a variation on a repeated subexpression. * In this case, there are two subexpressions. The first, if matched, * begins a repitition (otherwise, the whole expression is matches the * empth string). * * After matching the first subexpression, a 2phase star either finishes, * or matches the second subexpression. If the second subexpression is * matched, then the whole construct repeats. * * 2phase stars are used in two circumstances. First, they * are used as part of the implementation of POSIX intervals (counted * repititions). Second, they are used to implement proper star * semantics when the repeated subexpression contains paths of * only side effects. See rx_compile for more information. */ r_2phase_star, /* c.f. "typedef void * rx_side_effect" */ r_side_effect, /* This is an extension type: It is for transient use in source->source * transformations (implemented over syntax trees). */ r_data};/* A side effect is a matcher-specific action associated with * transitions in the NFA. The details of side effects are up * to the matcher. To the compiler and superstate constructors * side effects are opaque: */typedef void * rx_side_effect;/* Nodes in a syntax tree are of this type: */
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -