?? word_align.c
字號:
/** * @file word_align.c * * <JA> * @brief 帽胳ˇ不燎ˇ覺輪帽疤のアラインメント * * ここでは·千急馮蔡に灤する掐蝸不蘭のアラインメントを叫蝸するための * 簇眶が年盜されています. * * Julius/Julian では·千急馮蔡においてその帽胳や不燎·あるいはHMMの覺輪が * それぞれ掐蝸不蘭のどの惰粗にマッチしたのかを夢ることができます. * より賴澄なアラインメントを滇めるために·Julius/Julian では千急面の * 奪擊を崔む攫鼠は脫いずに·千急が姜わった稿に評られた千急馮蔡の帽胳誤に * 灤して·あらためて forced alignment を悸乖しています. * </JA> * * <EN> * @brief Forced alignment by word / phoneme / state unit. * * This file defines functions for performing forced alignment of * recognized words. The forced alignment is implimented in Julius/Julian * to get the best matching segmentation of recognized word sequence * upon input speech. Word-level, phoneme-level and HMM state-level * alignment can be obtained. * * Julius/Julian performs the forced alignment as a post-processing of * recognition process. Recomputation of Viterbi path on the recognized * word sequence toward input speech will be done after the recognition * to get better alignment. * * </EN> * * @author Akinobu Lee * @date Sat Sep 24 16:09:46 2005 * * $Revision: 1.5 $ * *//* * Copyright (c) 1991-2007 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2007 Julius project team, Nagoya Institute of Technology * All rights reserved */#include <julius/julius.h>/** * <JA> * 涂えられた帽胳誤からHMMを息馮して矢鏈攣のHMMを菇蜜する. * * @param wseq [in] 帽胳誤 * @param num [in] @a wseq の眶 * @param has_sp_ret [out] ショ〖トポ〖ズを稿魯に賃掐しうるユニットの攫鼠 * @param num_ret [out] 菇蜜されたHMMに崔まれる不燎HMMの眶 * @param end_ret [out] アラインメントの惰磊りとなる覺輪戎規(guī)の誤 * @param per_what [in] 帽胳ˇ不燎ˇ覺輪のどの帽疤でアラインメントを艱るかを回年 * @param r [in] 千急借妄インスタンス * * @return あらたに充り燒けられた矢鏈攣をあらわすHMMモデル誤へのポインタを手す. * </JA> * <EN> * Make the whole sentence HMM from given word sequence by connecting * each phoneme HMM. * * @param wseq [in] word sequence to align * @param num [in] number of @a wseq * @param has_sp_ret [out] unit information of whether it can be followed by a short-pause * @param num_ret [out] number of HMM contained in the generated sentence HMM * @param end_ret [out] sequence of state location as alignment unit * @param per_what [in] specify the alignment unit (word / phoneme / state) * @param r [in] recognition process instance * * @return newly malloced HMM sequences. * </EN> */static HMM_Logical **make_phseq(WORD_ID *wseq, short num, boolean **has_sp_ret, int *num_ret, int **end_ret, int per_what, RecogProcess *r){ HMM_Logical **ph; /* phoneme sequence */ boolean *has_sp; int k; int phnum; /* num of above */ WORD_ID tmpw, w; int i, j, pn, st, endn; HMM_Logical *tmpp, *ret; WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; boolean enable_iwsp; /* for multipath */ winfo = r->lm->winfo; hmminfo = r->am->hmminfo; if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp; /* make ph[] from wseq[] */ /* 1. calc total phone num and malloc */ phnum = 0; for (w=0;w<num;w++) phnum += winfo->wlen[wseq[w]]; ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * phnum); if (hmminfo->multipath && enable_iwsp) { has_sp = (boolean *)mymalloc(sizeof(boolean) * phnum); } else { has_sp = NULL; } /* 2. make phoneme sequence */ st = 0; if (hmminfo->multipath) st++; pn = 0; endn = 0; for (w=0;w<num;w++) { tmpw = wseq[w]; for (i=0;i<winfo->wlen[tmpw];i++) { tmpp = winfo->wseq[tmpw][i]; /* handle cross-word context dependency */ if (r->ccd_flag) { if (w > 0 && i == 0) { /* word head */ if ((ret = get_left_context_HMM(tmpp, ph[pn-1]->name, hmminfo)) != NULL) { tmpp = ret; } /* if triphone not found, fallback to bi/mono-phone */ /* use pseudo phone when no bi-phone found in alignment... */ } if (w < num-1 && i == winfo->wlen[tmpw] - 1) { /* word tail */ if ((ret = get_right_context_HMM(tmpp, winfo->wseq[wseq[w+1]][0]->name, hmminfo)) != NULL) { tmpp = ret; } } } ph[pn] = tmpp; if (hmminfo->multipath && enable_iwsp) { if (i == winfo->wlen[tmpw] - 1) { has_sp[pn] = TRUE; } else { has_sp[pn] = FALSE; } } if (per_what == PER_STATE) { for (j=0;j<hmm_logical_state_num(tmpp)-2;j++) { (*end_ret)[endn++] = st + j; } if (hmminfo->multipath && enable_iwsp && has_sp[pn]) { for (k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) { (*end_ret)[endn++] = st + j + k; } } } st += hmm_logical_state_num(tmpp) - 2; if (hmminfo->multipath && enable_iwsp && has_sp[pn]) { st += hmm_logical_state_num(hmminfo->sp) - 2; } if (per_what == PER_PHONEME) (*end_ret)[endn++] = st - 1; pn++; } if (per_what == PER_WORD) (*end_ret)[endn++] = st - 1; } *num_ret = phnum; *has_sp_ret = has_sp; return ph;}/** * <JA> * 矢鏈攣のHMMを菇蜜し·Viterbiアラインメントを悸乖し·馮蔡を叫蝸する. * * @param words [in] 矢簿棱をあらわす帽胳誤 * @param wnum [in] @a words の墓さ * @param param [in] 掐蝸潑魔パラメ〖タ誤 * @param per_what [in] 帽胳ˇ不燎ˇ覺輪のどの帽疤でアラインメントを艱るかを回年 * @param align [out] アラインメント馮蔡を呈羌するSentence菇隴攣 * @param r [i/o] 千急借妄インスタンス * </JA> * <EN> * Build sentence HMM, call viterbi_segment() and output result. * * @param words [in] word sequence of the sentence * @param wnum [in] number of words in @a words * @param param [in] input parameter vector * @param per_what [in] specify the alignment unit (word / phoneme / state) * @param s [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * </EN> */static voiddo_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, SentenceAlign *align, RecogProcess *r){ HMM_Logical **phones; /* phoneme sequence */ boolean *has_sp; /* whether phone can follow short pause */ int k; int phonenum; /* num of above */ HMM *shmm; /* sentence HMM */ int *end_state; /* state number of word ends */ int *end_frame; /* segmented last frame of words */ LOGPROB *end_score; /* normalized score of each words */ LOGPROB allscore; /* total score of this word sequence */ WORD_ID w; int i, rlen; int end_num = 0; int *id_seq, *phloc = NULL, *stloc = NULL; int j,n,p; WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; boolean enable_iwsp; /* for multipath */ winfo = r->lm->winfo; hmminfo = r->am->hmminfo; if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp; /* initialize result storage buffer */ switch(per_what) { case PER_WORD: jlog("ALIGN: === word alignment begin ===\n"); end_num = wnum; phloc = (int *)mymalloc(sizeof(int)*wnum); i = 0; for(w=0;w<wnum;w++) { phloc[w] = i; i += winfo->wlen[words[w]]; } break; case PER_PHONEME: jlog("ALIGN: === phoneme alignment begin ===\n"); end_num = 0; for(w=0;w<wnum;w++) end_num += winfo->wlen[words[w]]; break; case PER_STATE: jlog("ALIGN: === state alignment begin ===\n"); end_num = 0; for(w=0;w<wnum;w++) { for (i=0;i<winfo->wlen[words[w]]; i++) { end_num += hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2; } if (hmminfo->multipath && enable_iwsp) { end_num += hmm_logical_state_num(hmminfo->sp) - 2; } } phloc = (int *)mymalloc(sizeof(int)*end_num); stloc = (int *)mymalloc(sizeof(int)*end_num); { n = 0; p = 0; for(w=0;w<wnum;w++) { for(i=0;i<winfo->wlen[words[w]]; i++) { for(j=0; j<hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2; j++) { phloc[n] = p; stloc[n] = j + 1; n++; } if (hmminfo->multipath && enable_iwsp && i == winfo->wlen[words[w]] - 1) { for(k=0;k<hmm_logical_state_num(hmminfo->sp)-2;k++) { phloc[n] = p; stloc[n] = j + 1 + k + end_num; n++; } } p++; } } } break; } end_state = (int *)mymalloc(sizeof(int) * end_num); /* make phoneme sequence word sequence */ phones = make_phseq(words, wnum, &has_sp, &phonenum, &end_state, per_what, r); /* build the sentence HMMs */ shmm = new_make_word_hmm(hmminfo, phones, phonenum, has_sp); if (shmm == NULL) { j_internal_error("Error: failed to make word hmm for alignment\n"); } /* call viterbi segmentation function */ allscore = viterbi_segment(shmm, param, r->wchmm->hmmwrk, hmminfo->multipath, end_state, end_num, &id_seq, &end_frame, &end_score, &rlen); /* store result to s */ align->num = rlen; align->unittype = per_what; align->begin_frame = (int *)mymalloc(sizeof(int) * rlen); align->end_frame = (int *)mymalloc(sizeof(int) * rlen); align->avgscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen); for(i=0;i<rlen;i++) { align->begin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1; align->end_frame[i] = end_frame[i]; align->avgscore[i] = end_score[i]; } switch(per_what) { case PER_WORD:
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -