?? stop.c
字號:
/********************************* stop.c ******************************** Purpose: Stop list filter finite state machine generator and driver. Provenence: Written by and unit tested by C. Fox, 1990. Changed by C. Fox, July 1991. - added ANSI C declarations - branch tested to 95% coverage Notes: This module implements a fast finite state machine generator, and a driver, for implementing stop list filters. The strlist module is a simple string array data type implementation.**/#include <stdio.h>#include <ctype.h>#include <string.h>#include <malloc.h>#include "stop.h"#include "strlist.h"/******************************************************************************/#define FALSE 0#define TRUE 1#define EOS '\0' /************** Character Classification ***************/ /* Tokenizing requires that ASCII be broken into character */ /* classes distinguished for tokenizing. Delimiter chars */ /* separate tokens. Digits and letters make up the body */ /* of search terms. */typedef enum { DELIM_CH, /* whitespace, punctuation, etc. */ DIGIT_CH, /* the digits */ LETTER_CH, /* upper and lower case */ } CharClassType;static CharClassType char_class[128] = { /* ^@ */ DELIM_CH, /* ^A */ DELIM_CH, /* ^B */ DELIM_CH, /* ^C */ DELIM_CH, /* ^D */ DELIM_CH, /* ^E */ DELIM_CH, /* ^F */ DELIM_CH, /* ^G */ DELIM_CH, /* ^H */ DELIM_CH, /* ^I */ DELIM_CH, /* ^J */ DELIM_CH, /* ^K */ DELIM_CH, /* ^L */ DELIM_CH, /* ^M */ DELIM_CH, /* ^N */ DELIM_CH, /* ^O */ DELIM_CH, /* ^P */ DELIM_CH, /* ^Q */ DELIM_CH, /* ^R */ DELIM_CH, /* ^S */ DELIM_CH, /* ^T */ DELIM_CH, /* ^U */ DELIM_CH, /* ^V */ DELIM_CH, /* ^W */ DELIM_CH, /* ^X */ DELIM_CH, /* ^Y */ DELIM_CH, /* ^Z */ DELIM_CH, /* ^[ */ DELIM_CH, /* ^\ */ DELIM_CH, /* ^] */ DELIM_CH, /* ^^ */ DELIM_CH, /* ^_ */ DELIM_CH, /* */ DELIM_CH, /* ! */ DELIM_CH, /* " */ DELIM_CH, /* # */ DELIM_CH, /* $ */ DELIM_CH, /* % */ DELIM_CH, /* & */ DELIM_CH, /* ' */ DELIM_CH, /* ( */ DELIM_CH, /* ) */ DELIM_CH, /* * */ DELIM_CH, /* + */ DELIM_CH, /* , */ DELIM_CH, /* - */ DELIM_CH, /* . */ DELIM_CH, /* / */ DELIM_CH, /* 0 */ DIGIT_CH, /* 1 */ DIGIT_CH, /* 2 */ DIGIT_CH, /* 3 */ DIGIT_CH, /* 4 */ DIGIT_CH, /* 5 */ DIGIT_CH, /* 6 */ DIGIT_CH, /* 7 */ DIGIT_CH, /* 8 */ DIGIT_CH, /* 9 */ DIGIT_CH, /* : */ DELIM_CH, /* ; */ DELIM_CH, /* < */ DELIM_CH, /* = */ DELIM_CH, /* > */ DELIM_CH, /* ? */ DELIM_CH, /* @ */ DELIM_CH, /* A */ LETTER_CH, /* B */ LETTER_CH, /* C */ LETTER_CH, /* D */ LETTER_CH, /* E */ LETTER_CH, /* F */ LETTER_CH, /* G */ LETTER_CH, /* H */ LETTER_CH, /* I */ LETTER_CH, /* J */ LETTER_CH, /* K */ LETTER_CH, /* L */ LETTER_CH, /* M */ LETTER_CH, /* N */ LETTER_CH, /* O */ LETTER_CH, /* P */ LETTER_CH, /* Q */ LETTER_CH, /* R */ LETTER_CH, /* S */ LETTER_CH, /* T */ LETTER_CH, /* U */ LETTER_CH, /* V */ LETTER_CH, /* W */ LETTER_CH, /* X */ LETTER_CH, /* Y */ LETTER_CH, /* Z */ LETTER_CH, /* [ */ DELIM_CH, /* \ */ DELIM_CH, /* ] */ DELIM_CH, /* ^ */ DELIM_CH, /* _ */ DELIM_CH, /* ` */ DELIM_CH, /* a */ LETTER_CH, /* b */ LETTER_CH, /* c */ LETTER_CH, /* d */ LETTER_CH, /* e */ LETTER_CH, /* f */ LETTER_CH, /* g */ LETTER_CH, /* h */ LETTER_CH, /* i */ LETTER_CH, /* j */ LETTER_CH, /* k */ LETTER_CH, /* l */ LETTER_CH, /* m */ LETTER_CH, /* n */ LETTER_CH, /* o */ LETTER_CH, /* p */ LETTER_CH, /* q */ LETTER_CH, /* r */ LETTER_CH, /* s */ LETTER_CH, /* t */ LETTER_CH, /* u */ LETTER_CH, /* v */ LETTER_CH, /* w */ LETTER_CH, /* x */ LETTER_CH, /* y */ LETTER_CH, /* z */ LETTER_CH, /* { */ DELIM_CH, /* | */ DELIM_CH, /* } */ DELIM_CH, /* ~ */ DELIM_CH, /* ^? */ DELIM_CH, }; /************** Character Case Conversion **************/ /* Term text must be accumulated in a single case. This */ /* array is used to convert letter case but otherwise */ /* preserve characters. */static char convert_case[128] = { /* ^@ */ 0, /* ^A */ 0, /* ^B */ 0, /* ^C */ 0, /* ^D */ 0, /* ^E */ 0, /* ^F */ 0, /* ^G */ 0, /* ^H */ 0, /* ^I */ 0, /* ^J */ 0, /* ^K */ 0, /* ^L */ 0, /* ^M */ 0, /* ^N */ 0, /* ^O */ 0, /* ^P */ 0, /* ^Q */ 0, /* ^R */ 0, /* ^S */ 0, /* ^T */ 0, /* ^U */ 0, /* ^V */ 0, /* ^W */ 0, /* ^X */ 0, /* ^Y */ 0, /* ^Z */ 0, /* ^[ */ 0, /* ^\ */ 0, /* ^] */ 0, /* ^^ */ 0, /* ^_ */ 0, /* */ ' ', /* ! */ '!', /* " */ '"', /* # */ '#', /* $ */ '$', /* % */ '%', /* & */ '&', /* ' */ '\'', /* ( */ '(', /* ) */ ')', /* * */ '*', /* + */ '+', /* , */ ',', /* - */ '-', /* . */ '.', /* / */ '/', /* 0 */ '0', /* 1 */ '1', /* 2 */ '2', /* 3 */ '3', /* 4 */ '4', /* 5 */ '5', /* 6 */ '6', /* 7 */ '7', /* 8 */ '8', /* 9 */ '9', /* : */ ':', /* ; */ ';', /* < */ '<', /* = */ '=', /* > */ '>', /* ? */ '?', /* @ */ '@', /* A */ 'a', /* B */ 'b', /* C */ 'c', /* D */ 'd', /* E */ 'e', /* F */ 'f', /* G */ 'g', /* H */ 'h', /* I */ 'i', /* J */ 'j', /* K */ 'k', /* L */ 'l', /* M */ 'm', /* N */ 'n', /* O */ 'o', /* P */ 'p', /* Q */ 'q', /* R */ 'r', /* S */ 's', /* T */ 't', /* U */ 'u', /* V */ 'v', /* W */ 'w', /* X */ 'x', /* Y */ 'y', /* Z */ 'z', /* [ */ '[', /* \ */ 92, /* ] */ ']', /* ^ */ '^', /* _ */ '_', /* ` */ '`', /* a */ 'a', /* b */ 'b', /* c */ 'c', /* d */ 'd', /* e */ 'e', /* f */ 'f', /* g */ 'g', /* h */ 'h', /* i */ 'i', /* j */ 'j', /* k */ 'k', /* l */ 'l', /* m */ 'm', /* n */ 'n', /* o */ 'o', /* p */ 'p', /* q */ 'q', /* r */ 'r', /* s */ 's', /* t */ 't', /* u */ 'u', /* v */ 'v', /* w */ 'w', /* x */ 'x', /* y */ 'y', /* z */ 'z', /* { */ '{', /* | */ '|', /* } */ '}', /* ~ */ '~', /* ^? */ 0, };#define DEAD_STATE -1 /* used to block a DFA */#define TABLE_INCREMENT 256 /* used to grow tables */ /************************* Hashing **************************/ /* Sets of suffixes labeling states during the DFA construction */ /* are hashed to speed searching. The hashing function uses an */ /* entire integer variable range as its hash table size; in an */ /* effort to get a good spread through this range, hash values */ /* start big, and are incremented by a lot with every new word */ /* in the list. The collision rate is low using this method */#define HASH_START 5775863#define HASH_INCREMENT 38873647 /************** State Label Binary Search Tree **************/ /* During DFA construction, all states must be searched by */ /* their labels to make sure that the minimum number of states */ /* are used. This operation is sped up by hashing the labels */ /* to a signature value, then storing the signatures and labels */ /* in a binary search tree. The tree is destroyed once the DFA */ /* is fully constructed. */typedef struct TreeNode { StrList label; /* state label used as search key */ unsigned signature; /* hashed label to speed searching */ int state; /* whose label is representd by node */ struct TreeNode *left; /* left binary search subtree */ struct TreeNode *right; /* right binary search subtree */ } SearchTreeNode, *SearchTree; /********************* DFA State Table **********************/ /* The state table is an array of structures holding a state */ /* label, a count of the arcs out of the state, a pointer into */ /* the arc table for these arcs, and a final state flag. The */ /* label field is used only during machine construction. */typedef struct { StrList label; /* for this state - used during build */ int num_arcs; /* for this state in the arc table */ int arc_offset; /* for finding arcs in the arc table */ short is_final; /* TRUE iff this is a final state */ } StateTableEntry, *StateTable; /********************** DFA Arc Table ***********************/ /* The arc table lists all transitions for all states in a DFA */ /* in compacted form. Each state's transitions are offset from */ /* the start of the table, then listed in arc label order. */ /* Transitions are found by a linear search of the sub-section */ /* of the table for a given state. */typedef struct { char label; /* character label on an out-arrow */ int target; /* the target state for the out-arrow */ } ArcTableEntry, *ArcTable; /********************** DFA Structure ***********************/ /* A DFA is represented as a pointer to a structure holding the */ /* machine's state and transition tables, and bookkeepping */ /* counters. The tables are arrays whose space is malloc'd, */ /* then realloc'd if more space is required. Once a machine is */ /* constructed, the table space is realloc'd one last time to */ /* fit the needs of the machine exactly. */typedef struct _DfaStruct { int num_states; /* in the DFA (and state table) */ int max_states; /* now allocated in the state table */ int num_arcs; /* in the arc table for this machine */ int max_arcs; /* now allocated in the arc table */ StateTable state_table; /* the compacted DFA state table */ ArcTable arc_table; /* the compacted DFA transition table */ SearchTree tree; /* storing state labels used in build */ } DFAStruct;/******************************************************************************//************************* Function Declarations **************************/#ifdef __STDC__static char *GetMemory( char *ptr, int num_bytes );static void DestroyTree( SearchTree tree );static int GetState( DFA machine, StrList label, unsigned signature );static void AddArc( DFA machine, int state, char arc_label, StrList state_label, unsigned state_signature );extern DFA BuildDFA( StrList words );extern char *GetTerm( FILE *stream, DFA machine, int size, char *output );#elsestatic char *GetMemory();static void DestroyTree();static int GetState();static void AddArc();extern DFA BuildDFA();extern char *GetTerm();#endif/******************************************************************************//************************ Private Function Definitions ********************//*FN*************************************************************************** GetMemory( ptr, num_bytes ) Returns: char * -- new/expanded block of memory Purpose: Rationalize memory allocation and handle errors Plan: Part 1: Allocate memory with supplied allocation functions Part 2: Handle any errors Part 3: Return the allocated block of memory Notes: None.**/static char *GetMemory( ptr, num_bytes ) char *ptr; /* in: expanded block; NULL if nonesuch */ int num_bytes; /* in: number of bytes to allocate */ { char *memory; /* temporary for holding results */ /* Part 1: Allocate memory with supplied allocation functions */ if ( NULL == ptr ) memory = malloc( (unsigned)num_bytes ); else memory = realloc( ptr, (unsigned)num_bytes ); /* Part 2: Handle any errors */ if ( NULL == memory ) { (void)fprintf( stderr, "malloc failure--aborting\n" ); exit(1); } /* Part 3: Return the allocated block of memory */ return( memory ); } /* GetMemory *//*FN*************************************************************************** DestroyTree( tree ) Returns: void Purpose: Destroy a binary search tree created during machine construction Plan: Part 1: Return right away of there is no tree Part 2: Deallocate the subtrees Part 3: Deallocate the root Notes: None.**/static voidDestroyTree( tree ) SearchTree tree; /* in: search tree destroyed */ { /* Part 1: Return right away of there is no tree */ if ( NULL == tree ) return; /* Part 2: Deallocate the subtrees */ if ( NULL != tree->left ) DestroyTree( tree->left ); if ( NULL != tree->right ) DestroyTree( tree->right ); /* Part 3: Deallocate the root */ tree->left = tree->right = NULL; (void)free( (char *)tree ); } /* DestroyTree *//*FN*************************************************************************** GetState( machine, label, signature ) Returns: int -- state with the given label Purpose: Search a machine and return the state with a given state label Plan: Part 1: Search the tree for the requested state
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -