?? transoptions.cpp
字號(hào):
#include "TransOptions.h"
#include <iostream>
#include <strstream>
#include <cmath>
#include <algorithm>
using namespace std;
extern bool printmore;
TransOptions::TransOptions(Para para)
{
WLM = para.weight_l;
WPROBEF = para.weight_ef;
WLEXEF = para.weight_lexef;
WPROBFE = para.weight_fe;
WLEXFE = para.weight_lexfe;
WPENALTY = para.penalty;
TTABLELIMIT = para.ttable_limit;
PHRASELIMIT = para.phrase_limit; //短語(yǔ)長(zhǎng)度限制
WEIGHTW = para.word_penalty;
}
bool TransOptions::load(string fileName, LanguageModel *lm, Vocab *evocab, Vocab *cvocab)
{
ENVOCAB =evocab;
if (!ENVOCAB) {
cout << "english vocab is NULL!" << endl;
return 0;
}
CNVOCAB = cvocab;
if (!CNVOCAB) {
cout << "foreign vocab is NULL!" << endl;
return 0;
}
LM = lm;
string strTmp;
input.open(fileName.c_str(), std::ios::in);
if (!input)
{
cout << "Open Phrase Table " << fileName << " Error!" << endl;
return 0;
}
if (!LM)
{
cout << "LM is NULL!" << endl;
return 0;
}
// clock_t oldclock, newclock;
// oldclock = clock();
long i = 0;
double MAX;
double MIN;
char zero[2];
sprintf(zero, "%d", 0);
string ZERO(zero);
while (getline(input, strTmp))
{
vector<string> vecTmp;
vector<string> vecWords;
// ePhrase ePhraseTmp;
int firstSymbol = strTmp.find_first_of("|||");
int lastSymbol = strTmp.find_last_of("|||");
if ((firstSymbol == string::npos) || (lastSymbol == string::npos)) {
continue ;
}
string fPhrase(strTmp, 0, firstSymbol - 1);
string ePhrase(strTmp, firstSymbol + 4, lastSymbol - firstSymbol - 7);
vector<int> fPhraseVec;
vector<int> ePhraseVec;
CNVOCAB->getIndices(fPhrase, fPhraseVec);
ENVOCAB->getIndices(ePhrase, ePhraseVec);
//ePhraseTmp.eWords = ePhrase;
string probs(strTmp, lastSymbol + 2, strTmp.length() - lastSymbol - 1);
split(probs, vecTmp);
if (vecTmp.size() == 4) {
vecTmp.push_back(ZERO);
}
double TMcost = WPROBEF * log(atof(vecTmp[0].c_str())) + WLEXEF * log(atof(vecTmp[1].c_str())) + \
WPROBFE* log(atof(vecTmp[2].c_str())) + WLEXFE * log(atof(vecTmp[3].c_str())) + \
WPENALTY * log(atof(vecTmp[4].c_str()));
// split(ePhrase, vecWords);
double LMcost = wordsProb(ePhraseVec);
aboutEPhrase *ePhraseTmp = new aboutEPhrase();
ePhraseTmp->c = TMcost + LMcost;
ePhraseTmp->ephrase = ePhraseVec;
ePhraseTmp->pC = TMcost;
phraseTable::iterator posPT = f2eVocab.find(fPhraseVec);
if (posPT != f2eVocab.end())
{
// f2eVocab[fPhrase].push_back(ePhraseTmp);
int len = posPT->second.size();
if (len < TTABLELIMIT) {
f2eVocab[fPhraseVec].push_back(ePhraseTmp);
if ((ePhraseTmp->c - MAX > avs)) {
MAX = ePhraseTmp->c;
}
if ((ePhraseTmp->c - MIN < avs)) {
MIN = ePhraseTmp->c;
}
}
else {
MIN = (**min_element(posPT->second.begin(), posPT->second.end(), LESS())).c;
if ((ePhraseTmp->c - MIN > avs)) {
double tmp = MIN;
if ((ePhraseTmp->c - MAX > avs)) {
MAX = ePhraseTmp->c;
}
for (int i = 0; i < TTABLELIMIT; i++)
{
double test = (posPT->second)[i]->c;
if (((posPT->second)[i]->c - tmp < avs) && ((posPT->second)[i]->c - tmp > -avs))
{
delete (posPT->second)[i];
(posPT->second)[i] = ePhraseTmp;
break;
}
}
}
}
}
else
{
candiPhrase eAboutTmp;
eAboutTmp.push_back(ePhraseTmp);
f2eVocab.insert(make_pair(fPhraseVec, eAboutTmp));
MAX = ePhraseTmp->c;
MIN = ePhraseTmp->c;
}
// ePhraseTmp = NULL;
vecWords.clear();
vecTmp.clear();
}
input.clear();
input.close();
if(printmore) {
cout << "Loading phrase table finished!!!" << endl;
}
return 1;
}
inline void TransOptions::split(const string& line, vector<string>& strs)
{
istrstream ist(line.c_str());
string w;
while(ist>>w) strs.push_back(w);
}
void TransOptions::eraseSet(candiPhrase& forErase, int limit)
{
stable_sort(forErase.begin(), forErase.end(), GREATER());
forErase.erase(forErase.begin() + limit, forErase.end());
}
int TransOptions::getEPhrase(vector<int> fPhrase, PhraseSnippet& phraseSnippet, PhraseSnippetPosition& phraseSnippetPosition)
{
futureCost.resize(len);
for(int ifc = 0; ifc < len; ifc++)
{
futureCost[ifc].resize(len);
}
for(int initI = 0; initI < len; initI++)
{
for(int initJ = initI; initJ < len; initJ++)
{
futureCost[initI][initJ] = INFINITE;
}
}
//獲取TO以及future cost
for(int i = 0; i <= PHRASELIMIT; i++)
{
for(int j = 0; (j < len) && (j + i < len); j++)
{
vector<int> fPhraseSnippet;
fPosition posTmp;
posTmp.start = j;
posTmp.end = j + i;
if (j == j + i) {
fPhraseSnippet.push_back(fPhrase[j]);
}
else
{
for(int iTmp = j; iTmp <= j + i; iTmp++ )
{
fPhraseSnippet.push_back(fPhrase[iTmp]);
}
}
if (fPhraseSnippet.size() <= 0) {
continue;
}
phraseTable::iterator findT = f2eVocab.find(fPhraseSnippet);
if (findT != f2eVocab.end())
{
int si = (findT->second).size();
phraseSnippet.push_back(&(findT->second));
phraseSnippetPosition.push_back(posTmp);
futureCost[j][j + i] = (**max_element((findT->second).begin(), (findT->second).end(), LESS())).c;
}
else if ((findT == f2eVocab.end()) && (j == j + i))
{
aboutEPhrase *aboute = new aboutEPhrase;
candiPhrase *cp = new candiPhrase;
if (fPhraseSnippet[0] > 0) {
int ids = --Vocab::ID;
string cn = CNVOCAB->getWord(fPhraseSnippet[0]);
CNVOCAB->unkTMP.insert(make_pair(ids, cn));
ENVOCAB->unkTMP.insert(make_pair(ids, cn));
fPhraseSnippet.clear();
fPhraseSnippet.push_back(ids);
}
aboute->ephrase = fPhraseSnippet;
aboute->pC = 0;
aboute->c = wordsProb(fPhraseSnippet);
cp->push_back(aboute);
phraseSnippet.push_back(cp);
phraseSnippetPosition.push_back(posTmp);
futureCost[j][j + i] = (**max_element(cp->begin(), cp->end(), LESS())).c;
}
}
}
reCalculation();
if(printmore) {
cout << "print translation options ... " << endl;
int lenTO = phraseSnippet.size();
for(int iTO = 0; iTO < lenTO; iTO++)
{
candiPhrase::iterator pos;
int lenT = phraseSnippet[iTO]->size();
int fWordsLen = phraseSnippetPosition[iTO].end - phraseSnippetPosition[iTO].start + 1;
vector<int> fWords;
for(int position = phraseSnippetPosition[iTO].start; position <= phraseSnippetPosition[iTO].end; position++)
{
fWords.push_back(fPhrase[position]);
}
cout << "[ " << CNVOCAB->getWords(fWords) << " ]\t" << lenT <<endl;
for(pos = phraseSnippet[iTO]->begin(); pos != phraseSnippet[iTO]->end(); ++pos)
{
cout << "\t" << ENVOCAB->getWords((*pos)->ephrase) << ", " << (*pos)->pC << ", " << (*pos)->c << endl;
}
fWords.clear();
}
cout << "print future cost" << endl;
int lena = futureCost.size();
for(int ia = 0; ia < lena; ia++)
{
for(int j = ia; j < lena; j++)
{
cout << "future costs from " << ia << " to " << j << " is " << futureCost[ia][j] << endl;
}
}
cout << "Get translation options finished !" << endl;
}
return len;
}
void TransOptions::reCalculation()
{
int len = futureCost.size();
for(int l = 1; l < len; l++)
{
for(int i = 0; i < len - l; i++)
{
int j = i + l;
for(int k = i; k < j; k++)
{
double q = futureCost[i][k] + futureCost[k + 1][j];
if (q - futureCost[i][j] > avs) {
futureCost[i][j] = q;
}
}
}
}
}
double TransOptions::getFutureCost(int from, int to)
{
if (from <= to) {
return futureCost[from][to];
}
return 0.0;
}
void TransOptions::printFC()
{
int len = futureCost.size();
for(int i = 0; i < len; i++)
{
for(int j = i; j < len; j++)
{
cout << "future costs from " << i << " to " << j << " is " << futureCost[i][j] << endl;
}
}
}
double TransOptions::wordsProb(vector<int> Ephrase)
{
int len = Ephrase.size();
double lmscore = 0;
vector<int>::iterator pos = Ephrase.end();
for(int i = 0 ; i < len; i++)
{
double lmTmp = LM->wordProb(Ephrase) * WLM;
lmscore += lmTmp;
Ephrase.erase(--pos);
}
return lmscore + WEIGHTW * len * (-1);
}
TransOptions::~TransOptions()
{
phraseTable::iterator pos;
for(pos = f2eVocab.begin(); pos != f2eVocab.end(); ++pos)
{
candiPhrase::iterator posi;
for(posi = pos->second.begin(); posi != pos->second.end(); ++posi)
{
delete *posi;
}
}
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -