?? bnfa_search.c
字號:
if( !t ) { return 0; } while(t && (t->key < BNFA_MAX_ALPHABET_SIZE ) ) { full[ t->key ] = t->next_state; tcnt++; t = t->next; } return tcnt; }}/** Add pattern characters to the initial upper case trie* unless Exact has been specified, in which case all patterns* are assumed to be case specific.*/static int _bnfa_add_pattern_states (bnfa_struct_t * bnfa, bnfa_pattern_t * p) { int state, next, n; unsigned char * pattern; bnfa_match_node_t * pmn; n = p->n; pattern = p->casepatrn; state = 0; /* * Match up pattern with existing states */ for (; n > 0; pattern++, n--) { if( bnfa->bnfaCaseMode == BNFA_CASE ) next = _bnfa_list_get_next_state(bnfa,state,*pattern); else next = _bnfa_list_get_next_state(bnfa,state,xlatcase[*pattern]); if( next == BNFA_FAIL_STATE || next == 0 ) { break; } state = next; } /* * Add new states for the rest of the pattern bytes, 1 state per byte, uppercase */ for (; n > 0; pattern++, n--) { bnfa->bnfaNumStates++; if( bnfa->bnfaCaseMode == BNFA_CASE ) { if( _bnfa_list_put_next_state(bnfa,state,*pattern,bnfa->bnfaNumStates) < 0 ) return -1; } else { if( _bnfa_list_put_next_state(bnfa,state,xlatcase[*pattern],bnfa->bnfaNumStates) < 0 ) return -1; } state = bnfa->bnfaNumStates; if ( bnfa->bnfaNumStates >= bnfa->bnfaMaxStates ) { return -1; } } /* Add a pattern to the list of patterns terminated at this state */ pmn = (bnfa_match_node_t*)BNFA_MALLOC(sizeof(bnfa_match_node_t),bnfa->matchlist_memory); if( !pmn ) { return -1; } pmn->data = p; pmn->next = bnfa->bnfaMatchList[state]; bnfa->bnfaMatchList[state] = pmn; return 0;}/** Build a non-deterministic finite automata using Aho-Corasick construction* The keyword trie must already be built via _bnfa_add_pattern_states()*/ static int _bnfa_build_nfa (bnfa_struct_t * bnfa) { int r, s, i; QUEUE q, *queue = &q; bnfa_state_t * FailState = bnfa->bnfaFailState; bnfa_match_node_t ** MatchList = bnfa->bnfaMatchList; bnfa_match_node_t * mlist; bnfa_match_node_t * px; /* Init a Queue */ queue_init (queue); /* Add the state 0 transitions 1st, * the states at depth 1, fail to state 0 */ for (i = 0; i < bnfa->bnfaAlphabetSize; i++) { /* note that state zero deos not fail, * it just returns 0..nstates-1 */ s = _bnfa_list_get_next_state(bnfa,0,i); if( s ) /* don't bother adding state zero */ { if( queue_add (queue, s) ) { return -1; } FailState[s] = 0; } } /* Build the fail state successive layer of transitions */ while (queue_count (queue) > 0) { r = queue_remove (queue); /* Find Final States for any Failure */ for(i = 0; i<bnfa->bnfaAlphabetSize; i++) { int fs, next; s = _bnfa_list_get_next_state(bnfa,r,i); if( s == BNFA_FAIL_STATE ) continue; if( queue_add (queue, s) ) { return -1; } fs = FailState[r]; /* * Locate the next valid state for 'i' starting at fs */ while( (next=_bnfa_list_get_next_state(bnfa,fs,i)) == BNFA_FAIL_STATE ) { fs = FailState[fs]; } /* * Update 's' state failure state to point to the next valid state */ FailState[s] = next; /* * Copy 'next'states MatchList into 's' states MatchList, * we just create a new list nodes, the patterns are not copied. */ for( mlist = MatchList[next];mlist;mlist = mlist->next) { /* Dup the node, don't copy the data */ px = (bnfa_match_node_t*)BNFA_MALLOC(sizeof(bnfa_match_node_t),bnfa->matchlist_memory); if( !px ) { return 0; } px->data = mlist->data; px->next = MatchList[s]; /* insert at head */ MatchList[s] = px; } } } /* Clean up the queue */ queue_free (queue); return 0;}#ifdef ALLOW_NFA_FULL/** Conver state machine to full format*/static int _bnfa_conv_list_to_full(bnfa_struct_t * bnfa) { int k; bnfa_state_t * p; bnfa_state_t ** NextState = bnfa->bnfaNextState; for(k=0;k<bnfa->bnfaNumStates;k++) { p = BNFA_MALLOC(sizeof(bnfa_state_t)*bnfa->bnfaAlphabetSize,bnfa->nextstate_memory); if(!p) { return -1; } _bnfa_list_conv_row_to_full( bnfa, (bnfa_state_t)k, p ); NextState[k] = p; /* now we have a full format row vector */ } return 0;}#endif/** Convert state machine to csparse format** Merges state/transition/failure arrays into one.** For each state we use a state-word followed by the transition list for the state* sw(state 0 )...tl(state 0) sw(state 1)...tl(state1) sw(state2)...tl(state2) ....* * The transition and failure states are replaced with the start index of transition state,* this eliminates the NextState[] lookup....** The compaction of multiple arays into a single array reduces the total number of* states that can be handled since the max index is 2^24-1, whereas without compaction* we had 2^24-1 states. */static int _bnfa_conv_list_to_csparse_array(bnfa_struct_t * bnfa) { int m, k, i, nc; bnfa_state_t state; bnfa_state_t * FailState = (bnfa_state_t *)bnfa->bnfaFailState; bnfa_state_t * ps; /* transition list */ bnfa_state_t * pi; /* state indexes into ps */ bnfa_state_t ps_index=0; unsigned nps; bnfa_state_t full[BNFA_MAX_ALPHABET_SIZE]; /* count total state transitions, account for state and control words */ nps = 0; for(k=0;k<bnfa->bnfaNumStates;k++) { nps++; /* state word */ nps++; /* control word */ /* count transitions */ nc = 0; _bnfa_list_conv_row_to_full(bnfa, (bnfa_state_t)k, full ); for( i=0; i<bnfa->bnfaAlphabetSize; i++ ) { state = full[i] & BNFA_SPARSE_MAX_STATE; if( state != 0 ) { nc++; } } /* add in transition count */ if( (k == 0 && bnfa->bnfaForceFullZeroState) || nc > BNFA_SPARSE_MAX_ROW_TRANSITIONS ) { nps += BNFA_MAX_ALPHABET_SIZE; } else { for( i=0; i<bnfa->bnfaAlphabetSize; i++ ) { state = full[i] & BNFA_SPARSE_MAX_STATE; if( state != 0 ) { nps++; } } } } /* check if we have too many states + transitions */ if( nps > BNFA_SPARSE_MAX_STATE ) { /* Fatal */ return -1; } /* Alloc The Transition List - we need an array of bnfa_state_t items of size 'nps' */ ps = BNFA_MALLOC( nps*sizeof(bnfa_state_t),bnfa->nextstate_memory); if( !ps ) { /* Fatal */ return -1; } bnfa->bnfaTransList = ps; /* State Index list for pi - we need an array of bnfa_state_t items of size 'NumStates' */ pi = BNFA_MALLOC( bnfa->bnfaNumStates*sizeof(bnfa_state_t),bnfa->nextstate_memory); if( !pi ) { /* Fatal */ return -1; } /* Build the Transition List Array */ for(k=0;k<bnfa->bnfaNumStates;k++) { pi[k] = ps_index; /* save index of start of state 'k' */ ps[ ps_index ] = k; /* save the state were in as the 1st word */ ps_index++; /* skip past state word */ /* conver state 'k' to full format */ _bnfa_list_conv_row_to_full(bnfa, (bnfa_state_t)k, full ); /* count transitions */ nc = 0; for( i=0; i<bnfa->bnfaAlphabetSize; i++ ) { state = full[i] & BNFA_SPARSE_MAX_STATE; if( state != 0 ) { nc++; } } /* add a full state or a sparse state */ if( (k == 0 && bnfa->bnfaForceFullZeroState) || nc > BNFA_SPARSE_MAX_ROW_TRANSITIONS ) { /* set the control word */ ps[ps_index] = BNFA_SPARSE_FULL_BIT; ps[ps_index] |= FailState[k] & BNFA_SPARSE_MAX_STATE; if( bnfa->bnfaMatchList[k] ) { ps[ps_index] |= BNFA_SPARSE_MATCH_BIT; } ps_index++; /* copy the transitions */ _bnfa_list_conv_row_to_full(bnfa, (bnfa_state_t)k, &ps[ps_index] ); ps_index += BNFA_MAX_ALPHABET_SIZE; /* add in 256 transitions */ } else { /* set the control word */ ps[ps_index] = nc<<BNFA_SPARSE_COUNT_SHIFT ; ps[ps_index] |= FailState[k]&BNFA_SPARSE_MAX_STATE; if( bnfa->bnfaMatchList[k] ) { ps[ps_index] |= BNFA_SPARSE_MATCH_BIT; } ps_index++; /* add in the transitions */ for( m=0, i=0; i<bnfa->bnfaAlphabetSize && m<nc; i++ ) { state = full[i] & BNFA_SPARSE_MAX_STATE; if( state != 0 ) { ps[ps_index++] = (i<<BNFA_SPARSE_VALUE_SHIFT) | state; m++; } } } } /* sanity check we have not overflowed our buffer */ if( ps_index > nps ) { /* Fatal */ return -1; } /* Replace Transition states with Transition Indices. This allows us to skip using NextState[] to locate the next state This limits us to <16M transitions due to 24 bit state sizes, and the fact we have now converted next-state fields to next-index fields in this array, and we have merged the next-state and state arrays. */ ps_index=0; for(k=0; k< bnfa->bnfaNumStates; k++ ) { if( pi[k] >= nps ) { /* Fatal */ return -1; } //ps_index = pi[k]; /* get index of next state */ ps_index++; /* skip state id */ /* Full Format */ if( ps[ps_index] & BNFA_SPARSE_FULL_BIT ) { /* Do the fail-state */ ps[ps_index] = ( ps[ps_index] & 0xff000000 ) | ( pi[ ps[ps_index] & BNFA_SPARSE_MAX_STATE ] ) ; ps_index++; /* Do the transition-states */ for(i=0;i<BNFA_MAX_ALPHABET_SIZE;i++) { ps[ps_index] = ( ps[ps_index] & 0xff000000 ) | ( pi[ ps[ps_index] & BNFA_SPARSE_MAX_STATE ] ) ; ps_index++; } } /* Sparse Format */ else { nc = (ps[ps_index] & BNFA_SPARSE_COUNT_BITS)>>BNFA_SPARSE_COUNT_SHIFT; /* Do the cw = [cb | fail-state] */ ps[ps_index] = ( ps[ps_index] & 0xff000000 ) | ( pi[ ps[ps_index] & BNFA_SPARSE_MAX_STATE ] ); ps_index++; /* Do the transition-states */ for(i=0;i<nc;i++) { ps[ps_index] = ( ps[ps_index] & 0xff000000 ) | ( pi[ ps[ps_index] & BNFA_SPARSE_MAX_STATE ] ); ps_index++; } } /* check for buffer overflow again */ if( ps_index > nps ) { /* Fatal */ return -1; } } BNFA_FREE(pi,bnfa->bnfaNumStates*sizeof(bnfa_state_t),bnfa->nextstate_memory); return 0;}/** Print the state machine - rather verbose*/void bnfaPrint(bnfa_struct_t * bnfa) { int k; bnfa_match_node_t ** MatchList = bnfa->bnfaMatchList; bnfa_match_node_t * mlist; int ps_index=0; bnfa_state_t * ps=0; if( !bnfa ) return; if( !bnfa->bnfaNumStates ) return; if( bnfa->bnfaFormat ==BNFA_SPARSE ) { printf("Print NFA-SPARSE state machine : %d active states\n", bnfa->bnfaNumStates); ps = bnfa->bnfaTransList; if( !ps ) return; }#ifdef ALLOW_NFA_FULL else if( bnfa->bnfaFormat ==BNFA_FULL ) { printf("Print NFA-FULL state machine : %d active states\n", bnfa->bnfaNumStates); }#endif for(k=0;k<bnfa->bnfaNumStates;k++) { printf(" state %-4d fmt=%d ",k,bnfa->bnfaFormat); if( bnfa->bnfaFormat == BNFA_SPARSE ) { unsigned i,cw,fs,nt,fb,mb; ps_index++; /* skip state number */ cw = ps[ps_index]; /* control word */ fb = (cw & BNFA_SPARSE_FULL_BIT)>>BNFA_SPARSE_VALUE_SHIFT; /* full storage bit */ mb = (cw & BNFA_SPARSE_MATCH_BIT)>>BNFA_SPARSE_VALUE_SHIFT; /* matching state bit */ nt = (cw & BNFA_SPARSE_COUNT_BITS)>>BNFA_SPARSE_VALUE_SHIFT;/* number of transitions 0-63 */ fs = (cw & BNFA_SPARSE_MAX_STATE)>>BNFA_SPARSE_VALUE_SHIFT; /* fail state */ ps_index++; /* skip control word */ printf("mb=%3u fb=%3u fs=%-4u ",mb,fb,fs); if( fb ) { printf(" nt=%-3d : ",bnfa->bnfaAlphabetSize); for( i=0; i<(unsigned)bnfa->bnfaAlphabetSize; i++, ps_index++ ) { if( ps[ps_index] == 0 ) continue; if( isprint(i) ) printf("%3c->%-6d\t",i,ps[ps_index]); else printf("%3d->%-6d\t",i,ps[ps_index]); } } else { printf(" nt=%-3d : ",nt); for( i=0; i<nt; i++, ps_index++ ) { if( isprint(ps[ps_index]>>BNFA_SPARSE_VALUE_SHIFT) ) printf("%3c->%-6d\t",ps[ps_index]>>BNFA_SPARSE_VALUE_SHIFT,ps[ps_index] & BNFA_SPARSE_MAX_STATE); else printf("%3d->%-6d\t",ps[ps_index]>>BNFA_SPARSE_VALUE_SHIFT,ps[ps_index] & BNFA_SPARSE_MAX_STATE); } } }#ifdef ALLOW_NFA_FULL else if( bnfa->bnfaFormat == BNFA_FULL ) { int i; bnfa_state_t state; bnfa_state_t * p; bnfa_state_t ** NextState;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -