?? cnaivebayes.cpp
字號:
string FileNameTmp="";
//如果目錄的最后一個字母不是'\',則在最后加上一個'\'
int len=sTestFilesPath.size();
if (sTestFilesPath.at(len-1) != '\\')
sTestFilesPath += "\\";
string TempName = sTestFilesPath+"*.txt";
string sText="";
int nClassID=0;
long hFile;
struct _finddata_t TestFile;
if((hFile = _findfirst(TempName.c_str(), &TestFile )) == -1L)
{
cout<<"路徑無法訪問!"<<endl;
return ;
}
string Path="";
//重新寫類別文件
for(int i=0;i<m_nClassNum;i++)
{
char szTempResFile[200];
sprintf(szTempResFile,"Res\\C%d.txt",i);
DeleteFile(szTempResFile);
}
CSSPS ssps;
ssps.Init("Zssps");
do
{
FileNameTmp=TestFile.name;//訓練文件的名稱
Path=sTestFilesPath+FileNameTmp;//獲取文件路徑
string sSrcContent="";
ifstream fin1(Path.c_str());
getline(fin1,sSrcContent,'\0');
string sContent = ssps.GetVecStr (sSrcContent);
nClassID=TestTermFile(sContent);
//寫文件Ci.txt
char szResFile[20];
memset(szResFile,0,20);
sprintf(szResFile,"Res\\C%d.txt",nClassID);
ofstream Fou;
Fou.open(szResFile,ios::out|ios::app);
Fou << Path << endl;
Fou.close();
}while(! _findnext( hFile, &TestFile ) );
}
//-----------------------------------------------------------------------------------------//
// 功能: 對每篇文檔進行測試。
// 返回值: 該文檔所屬類別
//----------------------------------------------------------------------------------------//
int CNaiveBayes::TestTermFile(string sContent)
{
string::size_type pos=0, prev_pos=0;
string::size_type wordnum_pos=0;
string word;
double *pro=new double[m_nClassNum];//存儲各特征項取log后的和
memset(pro,0,m_nClassNum*sizeof(double));
wordnum_pos = sContent.find_first_of( ' ',wordnum_pos );
//每篇文章的詞數nWordNum
string sWordNum = sContent.substr( 0,wordnum_pos );
int nWordNum = atoi( sWordNum.c_str() );
pos=++wordnum_pos;
prev_pos=pos;
while((pos = sContent.find_first_of( ' ', pos ))!=string::npos)
{
//取得每個word 的key:weight
string::size_type key_pos=0, weight_pos=0;
word = sContent.substr( prev_pos, pos - prev_pos );
prev_pos = ++pos;
//取得每個word的key并計算在類中的先驗概率
key_pos = word.find_first_of( ':', key_pos );
string str_key = word.substr( 0 , key_pos );
int key = atoi(str_key.c_str());
for(int i=0;i<m_nClassNum;i++)
{
pro[i]=pro[i]+log(m_ppfTrainRes[i][key]);
}
}
//處理最后一個空格后的word
word = sContent.substr( prev_pos, pos - prev_pos );
string::size_type key_pos=0;
key_pos = word.find_first_of( ':', key_pos );
string str_key = word.substr( 0 , key_pos );
int key = atoi(str_key.c_str());
for(int i=0;i<m_nClassNum;i++)
{
pro[i]=pro[i]+log(m_ppfTrainRes[i][key])+log(m_pfPrC[i]);
}
//對測試文本進行分類
double t;
t = pro[0];
int max_pro_num = 0;
for(int s = 1 ; s < m_nClassNum ; s++ )
{
double k = pro[s];
if( t < k && k != 0 && t != 0 )
{
max_pro_num = s;
t = k;
}
}
m_pnResNum[max_pro_num]++;
return max_pro_num;
}
//-----------------------------------------------------------------------------------------//
// 功能: 讀入類的先驗概率及對應該類的特征項的先驗概率。
//----------------------------------------------------------------------------------------//
void CNaiveBayes::PrwFRead()
{
string PrcF="Pr\\Prc.txt";
ifstream prcf(PrcF.c_str());
string strPrC="";
string classID="";//存儲類序號
string classfreV="";//存儲類的先驗概率
int clsID=0;
int pos1=0;
int pos2=0;
while(getline(prcf,strPrC,'\n')!=NULL)//讀取行
{
pos1=0;
pos2=0;
if((pos2= strPrC.find_first_of( ' ',pos1 ))!=-1)
{
classID=strPrC.substr(pos1,pos2-pos1);
const char *cID=classID.c_str();
clsID=atoi(cID);
pos2++;
}
if((pos1= strPrC.find_first_of( ' ',pos2))!=-1)
{
pos1++;
classfreV=strPrC.substr(pos1,-1);
const char *cFV=classfreV.c_str();
m_pfPrC[clsID]=atof(cFV);
// cout<<clsID<<" "<<m_pfPrC[clsID]<<endl;
}
}
char PrwDir[20];
memset(PrwDir,' ',20*sizeof(char));
for (int i = 0 ; i < m_nClassNum ; i++ )
{
sprintf(PrwDir,"Pr\\PrW in C%d.txt",i);
ifstream prwf(PrwDir);
//ifstream prwf(PrwF.c_str());
string strPrW="";
string feaNum="";
string fesVal="";
int featID=0;
float featVal;
while(getline(prwf,strPrW,'\n')!=NULL)//讀取行
{
pos1=0;
pos2=0;
if((pos2= strPrW.find_first_of( '\t',pos1 ))!=-1)
{
feaNum=strPrW.substr(pos1,pos2-pos1);
pos2++;
}
const char *feaN=feaNum.c_str();
featID=atoi(feaN);
//cout<<featID<<" ";
fesVal=strPrW.substr(pos2,-1);
//cout<<fesVal.c_str()<<endl;
const char *feaV=fesVal.c_str();
//featVal=atof(feaV);
m_ppfTrainRes[i][featID]=atof(feaV);
// cout<<featID<<" "<<m_ppfTrainRes[i][featID]<<endl;
}
}
}
//-----------------------------------------------------------------------------------------//
// 功能:
//----------------------------------------------------------------------------------------//
int CNaiveBayes::InitPara(bool bPreTrain,string sTrainFilesPath)
{
InitClassInfo();
if (bPreTrain)
{
PreTrain(sTrainFilesPath);
}
//從文件_all_words.lst中讀取特征詞總個數
ifstream wfile("..\\Dic\\DF\\_all_words.lst");
string sTemp="";
getline(wfile,sTemp,' ');//讀取特征詞總個數
m_nFeatureNum=atoi(sTemp.c_str());
m_ppfTrainRes = new float *[m_nClassNum];
for(int i=0;i<m_nClassNum;i++)
{
m_ppfTrainRes[i]=new float[m_nFeatureNum];
memset(m_ppfTrainRes[i],0,m_nFeatureNum*sizeof(float));
}
return m_nClassNum;
}
int CNaiveBayes::InitClassInfo()
{
//從文件class.lst中讀取類別數、類名
ifstream cfile("class.lst");
string sTemp="";
getline(cfile,sTemp,'\n');//讀取類別數
m_nClassNum = atoi(sTemp.c_str());
sTemp="";
m_psClassName = new string[m_nClassNum];
m_pnTrainNum = new int[m_nClassNum];
int nClassIndex=0;
while(getline(cfile,sTemp,'\n')!=NULL)//讀取行
{
m_psClassName[nClassIndex]=sTemp.c_str();//將類名存入m_psClassName數組
m_mapClassName2ID[sTemp] = nClassIndex;
nClassIndex++;
sTemp="";
if (nClassIndex>=m_nClassNum)
{
break;
}
}
m_pfPrC = new double[m_nClassNum];
memset(m_pfPrC,0,m_nClassNum*sizeof(double));
return m_nClassNum;
}
bool CNaiveBayes::PreTrain(string sTrainFilesPath)
{
CSSPS ssps;
ssps.Init("Zssps");
string sSubTrainFilesPath="";
//如果目錄的最后一個字母不是'\',則在最后加上一個'\'
int len=sTrainFilesPath.size();
if (sTrainFilesPath.at(len-1) != '\\')
{
sTrainFilesPath += "\\";
}
for(int nClassIndex=0;nClassIndex<m_nClassNum;nClassIndex++)
{
sSubTrainFilesPath = sTrainFilesPath + m_psClassName[nClassIndex];
ssps.TrainFiles(sSubTrainFilesPath.c_str(), m_psClassName[nClassIndex].c_str());
}
return true;
}
int CNaiveBayes::TestAFile(string sTestFilePath)
{
string sSrcContent="";
ifstream fin1(sTestFilePath.c_str());
getline(fin1,sSrcContent,'\0');
CSSPS ssps;
ssps.Init("Zssps");
string sContent = ssps.GetVecStr (sSrcContent);
int nClassID=TestTermFile(sContent);
return nClassID;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -