?? lmodel.c
字號:
/* ----------------------------------------------------------- *//* *//* ___ *//* |_| | |_/ SPEECH *//* | | | | \ RECOGNITION *//* ========= SOFTWARE */ /* *//* *//* ----------------------------------------------------------- *//* developed at: *//* *//* Speech Vision and Robotics group *//* Cambridge University Engineering Department *//* http://svr-www.eng.cam.ac.uk/ *//* *//* main authors: Valtcho Valtchev, Steve Young, *//* Julian Odell, Gareth Moore *//* ----------------------------------------------------------- *//* Copyright: *//* *//* 1994-2002 Cambridge University *//* Engineering Department *//* *//* Use of this software is governed by a License Agreement *//* ** See the file License for the Conditions of Use ** *//* ** This banner notice must not be removed ** *//* *//* ----------------------------------------------------------- *//* File: LModel: ARPA style LM handling *//* ----------------------------------------------------------- */char *lmodel_version = "!HVER!LModel: 3.2 [CUED 09/12/02]";char *lmodel_vc_id = "$Id: LModel.c,v 1.1 2002/12/19 16:35:33 ge204 Exp $";#include "HShell.h" /* HMM ToolKit Modules */#include "HMem.h"#include "HMath.h"#include "HWave.h"#include "HLabel.h"#ifdef ULTRA_LM#include "HDict.h"#endif#include "LWMap.h"#include "LUtil.h"#include "LModel.h"#include "HLM.h"#define T_TOP 0001 /* top level tracing */#define T_LOAD 0002 /* loading of LMs */#define T_SAVE 0004 /* saving of LMs */#define T_MAPS 0010 /* word mappings */#define T_PROB 0020 /* n-gram lookup */static int trace = 0;typedef struct _AccessInfo{ int count; /* count for access */ int nboff; /* times computed using the back-off weight */ int nmiss; /* times not available */ int nhits; /* times available */ double prob; /* sum of prob returned */ double prob2; /* sum of prob^2 returned */} accessinfo;static ConfParam *cParm[MAXGLOBS]; /* config parameters */static int nParm = 0;static char *nGramName[LM_NSIZE] = { "NULLGRAM", "UNIGRAM", "BIGRAM", "TRIGRAM", "FOURGRAM", "PENTAGRAM", "HEXAGRAM","SEPTAGRAM","OCTAGRAM", "NONAGRAM","DECAGRAM","11-GRAM", "12-GRAM","13-GRAM","14-GRAM","15-GRAM"};static char *dcTypeName[] = { "Katz", "Absolute", "Linear"};static Boolean defIntID = FALSE; /* Don't use 4-byte IDs */static Boolean htkEsc = FALSE; /* Don't use HTK quoting and escapes */static Boolean natReadOrder = FALSE; /* Preserve natural read byte order */static Boolean natWriteOrder = FALSE; /* Preserve natural write byte order */extern Boolean vaxOrder; /* True if byteswapping needed to preserve SUNSO */#ifdef ULTRA_LMstatic short ultraKey[KEY_LENGTH]; /* Key used to identify ultra LMs */#endif/* EXPORT->InitLModel: initialise module */void InitLModel(void){ int i; Boolean b; Register(lmodel_version,lmodel_vc_id); nParm = GetConfig("LMODEL", TRUE, cParm, MAXGLOBS); if (nParm>0){ if (GetConfInt(cParm,nParm,"TRACE",&i)) trace = i;#ifdef HTK_TRANSCRIBER if (trace&T_PROB) trace=trace^T_PROB;#endif if (GetConfBool(cParm,nParm,"RAWMITFORMAT",&b)) htkEsc = !b; if (GetConfBool(cParm,nParm,"USEINTID",&b)) defIntID = b; if (GetConfBool(cParm,nParm,"NATURALREADORDER",&b)) natReadOrder = b; if (GetConfBool(cParm,nParm,"NATURALWRITEORDER",&b)) natWriteOrder = b; }#ifdef ULTRA_LM COMPOSE_KEY(ultraKey);#endif}/*----------------------- Input scanner ------------------------*/#define MAXSYMLEN 2048/* GetInLine: read a complete line from source */static char *GetInLine(Source *src,char *buf){ int i, c; if ((c = GetCh(src))==EOF) return NULL; i = 0; while (c!='\n' && i<MAXSYMLEN) { buf[i++] = c; c = GetCh(src); } buf[i] = '\0'; return buf;}/* SyncStr: read input until str found */void SyncStr(Source *src, char *str){ char buf[MAXSYMLEN]; do { if (GetInLine(src,buf)==NULL) HError(15450,"SyncStr: EOF searching for %s", str); } while (strcmp(buf,str)!=0);}/*----------------------- Access statistics ------------------------*/void ResetAccessInfo(BackOffLM *lm){ int i; NGramInfo *gi; AccessInfo *ai; for (gi=lm->gInfo+1,i=1; i<=lm->nSize; i++,gi++) { if ((ai=gi->aInfo)==NULL) HError(15490,"ResetAccessInfo: Access info not present"); ai->count = 0; ai->nboff = ai->nmiss = ai->nhits = 0; ai->prob = ai->prob2 = 0.0; }}/* EXPORT->AttachAccessInfo: attach and initialise access info */void AttachAccessInfo(BackOffLM *lm){ int i; NGramInfo *gi; for (gi=lm->gInfo+1,i=1; i<=lm->nSize; i++,gi++) { if (gi->aInfo!=NULL) HError(15490,"AttachAccessInfo: Access info already present"); gi->aInfo = (AccessInfo *) New(lm->heap,sizeof(AccessInfo)); } ResetAccessInfo(lm);}/* ShowStats: print back-off statistics */static void ShowStats(FILE *f, AccessInfo *acs, char *lmstr){ int count; float f1, f2, f3; double a, b, avg, stdev; count = (acs->count>0) ? acs->count : 1; a = acs->prob / (double) count; b = acs->prob2 / (double) count; avg = a; stdev = sqrt(b - a*a); f1 = 100.0 * (float) acs->nhits / (float) count; f2 = 100.0 * (float) acs->nboff / (float) count; f3 = 100.0 * (float) acs->nmiss / (float) count; fprintf(f,"%10s %10d %5.1f%% %5.1f%% %5.1f%% %8.2f %8.2f\n", lmstr, acs->count, f1, f2, f3, avg, stdev);}/* EXPORT -> PrintTotalAccessStats: print access statistics */void PrintTotalAccessStats(FILE *f,BackOffLM *lm){ int i; NGramInfo *gi; static char *lmstr[] = { "nullgram", "unigram", "bigram", "trigram", "fourgram", "pentagram", "hexagram", "septagram", "octagram", "nonagram", "decagram" }; static int max_text = 10; /* size of lmstr[] array */ char tmpstr[10]; fprintf(f,"%10s %10s %6s %6s %6s %8s %8s\n", "Lang model", "requested", "exact", "backed", "n/a", "mean", "stdev"); for (gi=lm->gInfo+2,i=2; i<=lm->nSize; i++, gi++) ShowStats(f,gi->aInfo, i<=max_text?lmstr[i]:(sprintf(tmpstr, "%d", i), tmpstr));}/*----------------------- float compression ----------------------*/#define MIN_PROB -8.0#ifdef LM_COMPACTstatic UShort Prob2Shrt(float f){ if (f < MIN_PROB) return USHRT_MAX; return (f / MIN_PROB * (float) (USHRT_MAX-1));}static float Shrt2Prob(UShort s){ if (s == USHRT_MAX) return LZERO; return ((float) s / (float) (USHRT_MAX-1)) * MIN_PROB;}#endif/*-------------------------- LM access ----------------------------*//* EXPORT-> CmpSE: qsort comparison for short LM entries */int CmpSE(const void *p1, const void *p2){ if (((SMEntry *)p1)->ndx < ((SMEntry *)p2)->ndx) return -1; if (((SMEntry *)p1)->ndx > ((SMEntry *)p2)->ndx) return +1; return 0;}/* EXPORT-> CmpFE: qsort comparison for full LM entries */int CmpFE(const void *p1, const void *p2){ if (((FLEntry *)p1)->ndx < ((FLEntry *)p2)->ndx) return -1; if (((FLEntry *)p1)->ndx > ((FLEntry *)p2)->ndx) return +1; return 0;}/* EXPORT-> FindSE: find SEntry in a sorted list */SMEntry *FindSE(SMEntry *sptr, int lo, int hi, LM_Id key){ int cen; LM_Id cmp; if (sptr==NULL) return NULL; hi--; if ((key < sptr[lo].ndx) || (key > sptr[hi].ndx)) return NULL; do { cen = (lo + hi) / 2; cmp = sptr[cen].ndx; if (key == cmp) return sptr+cen; if (key > cmp) lo = cen+1; else hi = cen-1; } while (lo <= hi); return NULL;}/* EXPORT-> FindFE: find FEntry in a sorted list */FLEntry *FindFE(FLEntry *fptr, int lo, int hi, LM_Id key){ int cen; LM_Id cmp; if (fptr==NULL) return NULL; hi--; if ((key < fptr[lo].ndx) || (key > fptr[hi].ndx)) return NULL; do { cen = (lo + hi) / 2; cmp = fptr[cen].ndx; if (key == cmp) return fptr+cen; if (key > cmp) lo = cen+1; else hi = cen-1; } while (lo <= hi); return NULL;}/* FindSE1: find bigram entry in a sorted list, also return index */static SMEntry *FindSE1(SMEntry *sptr, int lo, int hi, LM_Id key, int *fcen){ int cen; LM_Id cmp; hi--; if ((key < sptr[lo].ndx) || (key > sptr[hi].ndx)) return NULL; do { cen = (lo + hi) / 2; cmp = sptr[cen].ndx; if (key == cmp) { *fcen = cen; return sptr+cen; } if (key > cmp) lo = cen+1; else hi = cen-1; } while (lo <= hi); return NULL;}/* -------------------- Ultra format I/O ---------------------- */#ifdef ULTRA_LMstatic CNEntry *qs_cneBuf; /* global table of read CNEntry *//* FRead: fread spec function for Source src */static size_t FRead(void *ptr, size_t size, size_t nitems, Source *src){ int nr,i; unsigned char *c; nr = fread(ptr,size,nitems,src->f);#ifdef HTK_CRYPT if (src->crypt!=NULL) { for (c=ptr,i=0; i<size*nr; i++,c++) *c = DecryptChar(src->crypt,*c); }#endif src->chcount+=nr*size; return nr;}#ifdef LMPROB_SHORT/* The following compress/decompress LOG10 float to/from short.*/#define PROB_LOG_TO_SHORT(prob) \ ((int) (-prob/0.0002+0.5) > 65534 ? 65535 : (int) (-prob/0.0002+0.5))#define PROB_SHORT_TO_LOG(prob) \ (prob<=65534 ? -prob*0.0002 : LZERO)#define BOWT_LOG_TO_SHORT(bowt) \ ((floor(bowt/0.0002+0.5)>32766)?32767:\ (floor(bowt/0.0002+0.5)<-32767)?-32768:\ (int)(floor(bowt/0.0002+0.5)))#define BOWT_SHORT_TO_LOG(bowt) \ (bowt*0.0002)#else#define PROB_LOG_TO_SHORT(prob) (prob)#define PROB_SHORT_TO_LOG(prob) (prob)#define BOWT_LOG_TO_SHORT(bowt) (bowt)#define BOWT_SHORT_TO_LOG(bowt) (bowt)#endif#define CNE2FE(cndx,fe) { \ CNEntry *cne = cneBuf + cndx; \ fe->nse = cne->nse; \ fe->sea = smeTab[cndx]; \ fe->ndx = cne->word[0]; \ bowt = BOWT_SHORT_TO_LOG(cne->bowt); \ fe->bowt = (ptype==LMP_FLOAT) ? LOG10_TO_FLT(bowt) : bowt*scale; \}#define INIT_CNE(cne) { \ int i; \ cne.nse = 0; \ cne.bowt = 0.0; \ for (i=0; i<NSIZE-1; i++) cne.word[i]=0; \}static int nep_cmp(const void *v1,const void *v2){ CNEntry *n1,*n2; int res,i; res = 0; n1=qs_cneBuf + *((int *)v1); n2=qs_cneBuf + *((int *)v2); for(i=NSIZE-2;i>=0;i--) if (n1->word[i]!=n2->word[i]) { res=(n1->word[i]-n2->word[i]); break; } return(res);}static void LoadUltraNGrams(Source *src, BackOffLM *lm){ float prob,bowt,scale; Boolean newCTX; LMProbType ptype; int context[NSIZE+1]; int i,j,idx,cneCnt,seCnt; SEntry se; int *cneTab; CNEntry *cne,*cneBuf; FLEntry *cfe,*feBuf,*parent; SMEntry *sme,*smeBuf,**smeTab; Boolean mustSwap = (vaxOrder && !natReadOrder); SyncStr(src,"\\N-grams:"); scale = lm->gScale*LN10;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -