?? ngram_decode.c
字號:
/** * @file ngram_decode.c * * <JA> * @brief N-gram澄唯に答づく肌帽胳徒盧∈媽2パス∷ * * Julius のN-gramを脫いたスタックデコ〖ディング(媽2パス)において· * 肌に儡魯しうる帽胳の礁圭を瘋年する. * * 涂えられた鷗倡傅簿棱の幌眉フレ〖ムを徒盧し·帽胳トレリス懼で * その徒盧フレ〖ム件收に姜眉が賂哼する帽胳の礁圭を· * そのN-gram叫附澄唯とともに手す. * * Julius では ngram_firstwords(), ngram_nextwords(), ngram_acceptable() が * それぞれ媽2パスのメイン簇眶 wchmm_fbs() から鈣び叫される. なお· * Julian ではこれらの簇眶の洛わりに dfa_decode.c の簇眶が脫いられる. * </JA> * * <EN> * @brief N-gram based word prediction for the 2nd pass. * * These functions returns next word candidates in the 2nd recognition * pass of Julius, i.e. N-gram based stack decoding. * * Given a partial sentence hypothesis, it first estimate the beginning frame * of the hypothesis based on the word trellis. Then the words in the word * trellis around the estimated frame are extracted from the word trellis. * They will be returned with their N-gram probabilities. * * In Julius, ngram_firstwords(), ngram_nextwords() and ngram_acceptable() * are called from main search function wchmm_fbs(). In Julian, * corresponding functions in dfa_decode.c will be used instead. * </EN> * * @author Akinobu Lee * @date Fri Jul 8 14:57:51 2005 * * $Revision: 1.3 $ * *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include <julius/julius.h>/** * <JA> * 肌帽胳鉻輸ソ〖ト脫 qsort コ〖ルバック簇眶. * * @param a [in] 妥燎1 * @param b [in] 妥燎2 * * @return aの帽胳ID > bの帽胳ID なら1, 嫡なら -1, 票じなら 0 を手す. * </JA> * <EN> * qsort callback function to sort next word candidates by their word ID. * * @param a [in] element 1 * @param b [in] element 2 * * @return 1 if word id of a > that of b, -1 if negative, 0 if equal. * </EN> */static intcompare_nw(NEXTWORD **a, NEXTWORD **b){ if ((*a)->id > (*b)->id) return 1; if ((*a)->id < (*b)->id) return -1; return 0;}/** * <JA> * 肌帽胳鉻輸リスト柒から帽胳を浮瑚する. * * @param nw [in] 肌帽胳鉻輸リスト * @param w [in] 浮瑚する帽胳のID * @param num [in] 肌帽胳鉻輸リストの墓さ * * @return 斧つかった眷圭その肌帽胳鉻輸菇隴攣へのポインタ·斧つからなければ * NULL を手す. * </JA> * <EN> * Find a word from list of next word candidates. * * @param nw [in] list of next word candidates * @param w [in] word id to search for * @param num [in] length of @a nw * * @return the pointer to the NEXTWORD data if found, or NULL if not found. * </EN> *//* find next word candiate whose id 'w' */static NEXTWORD *search_nw(NEXTWORD **nw, WORD_ID w, int num){ int left,right,mid; NEXTWORD *tmp; if (num == 0) return NULL; left = 0; right = num - 1; while (left < right) { mid = (left + right) / 2; if ((nw[mid])->id < w) { left = mid + 1; } else { right = mid; } } tmp = nw[left]; if (tmp->id == w) { return tmp; } else { return NULL; }}/** * <EN> * Compute backward N-gram score from forward N-gram. * </EN> * <JA> * 稿羹きの N-gram スコアを漣羹き N-gram から換叫する. * </JA> * * @param ngram [in] N-gram data structure * @param w [in] word sequence * @param wlen [in] length of @a w * * @return the backward probability of the word w[0]. * */static LOGPROBngram_forw2back(NGRAM_INFO *ngram, WORD_ID *w, int wlen){ int i; LOGPROB p1, p2; p1 = 0.0; for(i = 1; i < ngram->n; i++) { if (i >= wlen) break; p1 += ngram_prob(ngram, i, &(w[1])); } p2 = 0.0; for(i = 0; i < ngram->n; i++) { if (i >= wlen) break; p2 += ngram_prob(ngram, i+1, w); } return(p2 - p1);}/** * <JA> * @brief 帽胳トレリスから肌帽胳鉻輸を藐叫する. * * 帽胳トレリス懼の回年したフレ〖ム懼に姜眉が賂哼するトレリス帽胳 * のリストを藐叫し·それらの肌帽胳としての N-gram 儡魯澄唯を紛換する. * そのリストを肌帽胳攫鼠菇隴攣に納裁して手す. * * @param r [in] 千急借妄インスタンス * @param nw [i/o] 肌帽胳鉻輸リスト∈藐叫馮蔡は @a oldnum 笆慣に納裁される∷ * @param oldnum [in] @a nw にすでに呈羌されている肌帽胳の眶 * @param hypo [in] 鷗倡傅の矢簿棱 * @param t [in] 回年フレ〖ム * * @return 藐叫リストを納裁したあとの @a nw に崔まれる肌帽胳の另眶. * </JA> * <EN> * @brief Extract next word candidates from word trellis. * * This function extracts the list of trellis words whose word end * has survived in the word trellis at the specified frame. * The N-gram probabilities of them are then computed and added to * the current next word candidates data. * * @param r [in] recognition process instance * @param nw [in] list of next word candidates (new words will be appended at @a oldnum) * @param oldnum [in] number of words already stored in @a nw * @param hypo [in] the source sentence hypothesis * @param t [in] specified frame * * @return the total number of words currently stored in the @a nw. * </EN> */static intpick_backtrellis_words(RecogProcess *r, NEXTWORD **nw, int oldnum, NODE *hypo, short t){ int i; WORD_ID w; LOGPROB rawscore;#ifdef WPAIR int w_old = WORD_INVALID;#endif int num; WORD_ID cnword[MAX_N]; ///< Last two non-transparent words WORD_ID cnwordrev[MAX_N]; ///< Last two non-transparent words int cnnum; ///< Num of found non-transparent words (<=2) int last_trans; ///< Num of skipped transparent words BACKTRELLIS *bt; WORD_INFO *winfo; NGRAM_INFO *ngram; LOGPROB lm_weight2, lm_penalty2, lm_penalty_trans; num = oldnum; bt = r->backtrellis; winfo = r->lm->winfo; ngram = r->lm->ngram; lm_weight2 = r->config->lmp.lm_weight2; lm_penalty2 = r->config->lmp.lm_penalty2; lm_penalty_trans = r->config->lmp.lm_penalty_trans; /* set word contexts to cnword[] from 1 considering transparent words */ if (ngram) { cnnum = 0; last_trans = 0; for(i=hypo->seqnum-1;i>=0;i--) { if (! winfo->is_transparent[hypo->seq[i]]) { cnword[cnnum+1] = hypo->seq[i]; cnnum++; if (cnnum >= ngram->n - 1) break; } else { last_trans++; } } if (ngram->dir == DIR_RL) { for(i=0;i<cnnum;i++) { cnwordrev[cnnum-1-i] = cnword[i+1]; } } /* use ngram id */ if (ngram->dir == DIR_RL) { for(i=0;i<cnnum;i++) cnwordrev[i] = winfo->wton[cnwordrev[i]]; } else { for(i=0;i<cnnum;i++) cnword[i+1] = winfo->wton[cnword[i+1]]; } } /* lookup survived words in backtrellis on time frame 't' */ for (i=0;i<bt->num[t];i++) { w = (bt->rw[t][i])->wid;#ifdef WORD_GRAPH /* only words on the word graphs are expanded */ if (!(bt->rw[t][i])->within_wordgraph) continue;#endif /* not WORD_GRAPH */#ifdef WPAIR /* some word have same word ID with different previous word, so only one will be opened (best word will be selected later by next_word() */ if (w == w_old) continue; /* backtrellis is sorted by word ID */ else w_old = w;#endif /* WPAIR */ /* skip if already exist */ if (search_nw(nw, w, oldnum) != NULL) continue; /* compute LM probability of the word */ if (ngram) { /* compute N-gram probability */ if (ngram->dir == DIR_RL) { /* just compute N-gram prob of the word candidate */ cnwordrev[cnnum] = winfo->wton[w]; rawscore = ngram_prob(ngram, cnnum + 1, cnwordrev); } else { cnword[0] = winfo->wton[w]; rawscore = ngram_forw2back(ngram, cnword, cnnum + 1); }#ifdef CLASS_NGRAM rawscore += winfo->cprob[w];#endif } if (r->lmvar == LM_NGRAM_USER) { /* call user-defined function */ /* be careful that the word context is ordered in backward direction */ rawscore = (*(r->lm->lmfunc.lmprob))(winfo, hypo->seq, hypo->seqnum, w, rawscore); } nw[num]->tre = bt->rw[t][i]; nw[num]->id = w; nw[num]->lscore = rawscore * lm_weight2 + lm_penalty2; if (winfo->is_transparent[w]) { /*nw[num]->lscore -= (LOGPROB)last_trans * TRANS_RENZOKU_PENALTY;*/ if (winfo->is_transparent[hypo->seq[hypo->seqnum-1]]) { nw[num]->lscore += lm_penalty_trans; } } /* j_printf("%d: %s added\n", num, winfo->wname[nw[num]->id]); */ num++; } return num;}/** * <JA> * @brief 帽胳トレリスから肌帽胳礁圭を瘋年する. * * 回年フレ〖ムの漣稿 lookup_range 尸に姜眉があるトレリス懼の帽胳を礁め· * 肌帽胳菇隴攣を菇蜜する. 票じ帽胳が懼淡の認跋柒に剩眶ある眷圭· * 回年フレ〖ムにもっとも奪いトレリス懼の帽胳が聯買される. * * @param r [in] 千急借妄インスタンス * @param nw [out] 肌帽胳礁圭を呈羌する菇隴攣へのポインタ * @param hypo [in] 鷗倡傅の嬸尸矢簿棱 * @param tm [in] 帽胳を玫す面看となる回年フレ〖ム * @param t_end [in] 帽胳を玫すフレ〖ムの寶眉 * * @return @a nw に呈羌された肌帽胳鉻輸の眶を手す. * </JA> * <EN> * @brief Determine next word candidates from the word trellis.
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -