?? markpron.cpp
字號:
#include <stdio.h>
#include "markpron.h"
#include "util.h"
//dict format:
//"<word>\t<syll>"
//or "<word> <syll>"
StringStringHash g_hash_word2syll;
StringStringHash g_hash_syll2mono;
int load_dict_ex(const char *file_dict, StringStringHash *hash)
{
FILE *fp = NULL;
char *p;
char line[MAX_LINE_LEN+1];
string word;
string syll;
hash->clear();
fp = fopen(file_dict, "r");
if( !fp )
{
errMsg("cannot open file %s", file_dict);
return -1;
}
while( fgets(line, MAX_LINE_LEN, fp) )
{
if( '#' == line[0] )
continue;
p = strstr(line, "\t");
if( !p )
p = strstr(line, " ");
if( p )
{
#if 0
if( len < MAX_WORD_LEN )
{
len = p - line;
memset(tmp_buf, 0, MAX_WORD_LEN);
strncpy(tmp_buf, line, len);
}else{
wrnMsg("word in dict(%s) line %d exceed max len(%d), ignored:\n%s\n", file_dict, line_id, MAX_WORD_LEN, line);
}
#else
//stl implementation
*p = 0;
p++;
if( '\n' == p[strlen(p)-1] )
p[strlen(p)-1] = 0;
word = line;
syll = p;
hash->insert( pair<string, string>(word, syll) );
#endif
}else{
errMsg("Cannot find <tab> or <space> between word and syll in dict %s\n line:%s", file_dict, line);
}
}
fclose(fp);
return 0;
}
int load_word2syll_dict(const char *file_dict)
{
return load_dict_ex(file_dict, &g_hash_word2syll);
}
int load_syll2mono_dict(const char *file_dict)
{
return load_dict_ex(file_dict, &g_hash_syll2mono);
}
int load_dict(const char *file_word2syll, const char *file_syll2mono)
{
if( -1 == load_word2syll_dict(file_word2syll)
|| -1 == load_syll2mono_dict(file_syll2mono) )
return -1;
else
return 0;
}
// find pronounce of the word
// for left to right order
int lookup_dict(const char *word, char *pron, StringStringHash *hash)
{
iStringStringHash itr;
if( !word || !strlen(word) || !hash || !pron)
return -1;
pron[0] = 0;
char w[1024];
int pos = 0;
while(pos < strlen(word) ){
int len = strlen(word+pos);
while(len > 0)
{
memset(w, 0, 1024);
strncpy(w, word+pos, len);
itr = hash->find(w);
if( hash->end() != itr )
{
if( strlen(pron))
strcat(pron, " ");
strcat(pron, itr->second.c_str() );
if( ' ' == pron[strlen(pron)-1] )
pron[strlen(pron)-1] = 0;
break;
}else{
if( word[len-1] > 0 )
{
len --;
}else{
len -= 2;
}
}
}
pos += strlen(w);
}
return 0;
}
// find pronounce of the word
// for left to right order
int word2syll(const char *word, char *pron)
{
return lookup_dict(word, pron, &g_hash_word2syll);
}
// find pronounce of the word
// for left to right order
int syll2mono(const char *word, char *pron)
{
return lookup_dict(word, pron, &g_hash_syll2mono);
}
// find pronounce of the word
// for left to right order
// for asr application
int word2syll_sil(const char *word, char *pron)
{
char p[1024];
if( 0 == word2syll(word, p) )
{
sprintf(pron, "sil %s sil", p);
}
return 0;
}
// find mono-phone of the word
int word2mono(const char *word, char *pron)
{
char p[1024];
if( 0 == lookup_dict(word, p, &g_hash_word2syll) )
{
return lookup_dict(p, pron, &g_hash_syll2mono);
}
return -1;
}
// find mono-phone of the word
int word2mono_sil(const char *word, char *pron)
{
char p[1024];
if( 0 == word2mono(word, p) )
{
sprintf(pron, "sil %s sil", p);
return 0;
}
return -1;
}
void unload_dict()
{
g_hash_word2syll.clear();
g_hash_syll2mono.clear();
return;
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -