?? stop.c
字號:
Part 2: If not found, add the label to the tree Part 3: Return the state number Notes: This machine always returns a state with the given label because if the machine does not have a state with the given label, then one is created.**/static intGetState( machine, label, signature ) DFA machine; /* in: DFA whose state labels are searched */ StrList label; /* in: state label searched for */ unsigned signature; /* in: signature of the label requested */ { SearchTree *ptr; /* pointer to a search tree link field */ SearchTree new_node; /* for a newly added search tree node */ /* Part 1: Search the tree for the requested state */ ptr = &(machine->tree); while ( (NULL != *ptr) && ( (signature != (*ptr)->signature) || !StrListEqual((StrList)label,(*ptr)->label)) ) ptr = (signature <= (*ptr)->signature) ? &(*ptr)->left : &(*ptr)->right; /* Part 2: If not found, add the label to the tree */ if ( NULL == *ptr ) { /* create a new node and fill in its fields */ new_node = (SearchTree)GetMemory( NULL, sizeof(SearchTreeNode) ); new_node->signature = signature; new_node->label = (StrList)label; new_node->state = machine->num_states; new_node->left = new_node->right = NULL; /* allocate more states if needed, set up the new state */ if ( machine->num_states == machine->max_states ) { machine->max_states += TABLE_INCREMENT; machine->state_table = (StateTable)GetMemory( (char *)machine->state_table, machine->max_states*sizeof(StateTableEntry)); } machine->state_table[machine->num_states].label = (StrList)label; machine->num_states++; /* hook the new node into the binary search tree */ *ptr = new_node; } else StrListDestroy( (StrList)label ); /* Part 3: Return the state number */ return( (*ptr)->state ); } /* GetState *//*FN*************************************************************************** AddArc( machine, state, arc_label, state_label, state_signature ) Returns: void Purpose: Add an arc between two states in a DFA Plan: Part 1: Search for the target state among existing states Part 2: Make sure the arc table is big enough Part 3: Add the new arc Notes: None.**/static voidAddArc( machine, state, arc_label, state_label, state_signature ) DFA machine; /* in/out: machine with an arc added */ int state; /* in: with an out arc added */ char arc_label; /* in: label on the new arc */ StrList state_label; /* in: label on the target state */ unsigned state_signature; /* in: label hash signature to speed searching */ { register int target; /* destination state for the new arc */ /* Part 1: Search for the target state among existing states */ StrListSort( state_label ); target = GetState( machine, state_label, state_signature ); /* Part 2: Make sure the arc table is big enough */ if ( machine->num_arcs == machine->max_arcs ) { machine->max_arcs += TABLE_INCREMENT; machine->arc_table = (ArcTable)GetMemory( (char *)machine->arc_table, machine->max_arcs * sizeof(ArcTableEntry) ); } /* Part 3: Add the new arc */ machine->arc_table[machine->num_arcs].label = arc_label; machine->arc_table[machine->num_arcs].target = target; machine->num_arcs++; machine->state_table[state].num_arcs++; } /* AddArc *//*FN*************************************************************************** BuildDFA( words ) Returns: DFA -- newly created finite state machine Purpose: Build a DFA to recognize a list of words Plan: Part 1: Allocate space and initialize variables Part 2: Make and label the DFA start state Part 3: Main loop - build the state and arc tables Part 4: Deallocate the binary search tree and the state labels Part 5: Reallocate the tables to squish them down Part 6: Return the newly constructed DFA Notes: None.**/DFABuildDFA( words ) StrList words; /* in: that the machine is built to recognize */ { DFA machine; /* local for easier access to machine */ register int state; /* current state's state number */ char arc_label; /* for the current arc when adding arcs */ char *string; /* element in a set of state labels */ char ch; /* the first character in a new string */ StrList current_label; /* set of strings labeling a state */ StrList target_label; /* labeling the arc target state */ unsigned target_signature; /* hashed label for binary search tree */ register int i; /* for looping through strings */ /* Part 1: Allocate space and initialize variables */ machine = (DFA)GetMemory( NULL, sizeof(DFAStruct) ); machine->max_states = TABLE_INCREMENT; machine->state_table = (StateTable)GetMemory(NULL, machine->max_states*sizeof(StateTableEntry)); machine->num_states = 0; machine->max_arcs = TABLE_INCREMENT; machine->arc_table = (ArcTable)GetMemory( NULL, machine->max_arcs * sizeof(ArcTableEntry) ); machine->num_arcs = 0; machine->tree = NULL; /* Part 2: Make and label the DFA start state */ StrListUnique( words ); /* sort and unique the list */ machine->state_table[0].label = words; machine->num_states = 1; /* Part 3: Main loop - build the state and arc tables */ for ( state = 0; state < machine->num_states; state++ ) { /* The current state has nothing but a label, so */ /* the first order of business is to set up some */ /* of its other major fields */ machine->state_table[state].is_final = FALSE; machine->state_table[state].arc_offset = machine->num_arcs; machine->state_table[state].num_arcs = 0; /* Add arcs to the arc table for the current state */ /* based on the state's derived set. Also set the */ /* state's final flag if the empty string is found */ /* in the suffix list */ current_label = machine->state_table[state].label; target_label = StrListCreate(); target_signature = HASH_START; arc_label = EOS; for ( i = 0; i < StrListSize(current_label); i++ ) { /* get the next string in the label and lop it */ string = StrListPeek( current_label, i ); ch = *string++; /* the empty string means mark this state as final */ if ( EOS == ch ) { machine->state_table[state].is_final = TRUE; continue; } /* make sure we have a legitimate arc_label */ if ( EOS == arc_label ) arc_label = ch; /* if the first character is new, then we must */ /* add an arc for the previous first character */ if ( ch != arc_label ) { AddArc(machine, state, arc_label, target_label, target_signature); target_label = StrListCreate(); target_signature = HASH_START; arc_label = ch; } /* add the current suffix to the target state label */ StrListAppend( target_label, string ); target_signature += (*string + 1) * HASH_INCREMENT; while ( *string ) target_signature += *string++; } /* On loop exit we have not added an arc for the */ /* last bunch of suffixes, so we must do so, as */ /* long as the last set of suffixes is not empty */ /* (which happens when the current state label */ /* is the singleton set of the empty string). */ if ( 0 < StrListSize(target_label) ) AddArc( machine, state, arc_label, target_label, target_signature ); } /* Part 4: Deallocate the binary search tree and the state labels */ DestroyTree( machine->tree ); machine->tree = NULL; for ( i = 0; i < machine->num_states; i++ ) { StrListDestroy( machine->state_table[i].label ); machine->state_table[i].label = NULL; } /* Part 5: Reallocate the tables to squish them down */ machine->state_table = (StateTable)GetMemory( (char *)machine->state_table, machine->num_states * sizeof(StateTableEntry) ); machine->arc_table = (ArcTable)GetMemory( (char *)machine->arc_table, machine->num_arcs * sizeof(ArcTableEntry) ); /* Part 6: Return the newly constructed DFA */ return( machine ); } /* BuildDFA *//*FN*************************************************************************** GetTerm( stream, machine, size, output ) Returns: char * -- NULL if stream is exhausted, otherwise output buffer Purpose: Get the next token from an input stream, filtering stop words Plan: Part 1: Return NULL immediately if there is no input Part 2: Initialize the local variables Part 3: Main Loop: Put an unfiltered word into the output buffer Part 4: Return the output buffer Notes: This routine runs the DFA provided as the machine parameter, and collects the text of any term in the output buffer. If a stop word is recognized in this process, it is skipped. Care is also taken to be sure not to overrun the output buffer.**/char *GetTerm( stream, machine, size, output ) FILE *stream; /* in: source of input characters */ DFA machine; /* in: finite state machine driving process */ int size; /* in: bytes in the output buffer */ char *output; /* in/out: where the next token in placed */ { char *outptr; /* for scanning through the output buffer */ int ch; /* current character during input scan */ register int state; /* current state during DFA execution */ /* Part 1: Return NULL immediately if there is no input */ if ( EOF == (ch = getc(stream)) ) return( NULL ); /* Part 2: Initialize the local variables */ outptr = output; /* Part 3: Main Loop: Put an unfiltered word into the output buffer */ do { /* scan past any leading delimiters */ while ( (EOF != ch ) && ((DELIM_CH == char_class[ch]) || (DIGIT_CH == char_class[ch])) ) ch = getc( stream ); /* start the machine in its start state */ state = 0; /* copy input to output until reaching a delimiter, and also */ /* run the DFA on the input to watch for filtered words */ while ( (EOF != ch) && (DELIM_CH != char_class[ch]) ) { if ( outptr == (output+size-1) ) { outptr = output; state = 0; } *outptr++ = convert_case[ch]; if ( DEAD_STATE != state ) { register int i; /* for scanning through arc labels */ int arc_start; /* where the arc label list starts */ int arc_end; /* where the arc label list ends */ arc_start = machine->state_table[state].arc_offset; arc_end = arc_start + machine->state_table[state].num_arcs; for ( i = arc_start; i < arc_end; i++ ) if ( convert_case[ch] == machine->arc_table[i].label ) { state = machine->arc_table[i].target; break; } if ( i == arc_end ) state = DEAD_STATE; } ch = getc( stream ); } /* start from scratch if a stop word is recognized */ if ( (DEAD_STATE != state) && machine->state_table[state].is_final ) outptr = output; /* terminate the output buffer */ *outptr = EOS; } while ( (EOF != ch) && !*output ); /* Part 4: Return the output buffer */ return( output ); } /* GetTerm */
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -