?? cnaivebayes.cpp

?? 貝葉斯公式
?? CPP
?? 第 1 頁 / 共 2 頁
字號:
上一頁 12
	string FileNameTmp="";

	//如果目錄的最后一個字母不是'\',則在最后加上一個'\' 
	int len=sTestFilesPath.size(); 
	if (sTestFilesPath.at(len-1) != '\\') 
		sTestFilesPath += "\\";

	string TempName = sTestFilesPath+"*.txt";
	string sText="";
    int nClassID=0;

	long hFile;

	struct _finddata_t  TestFile;

    if((hFile = _findfirst(TempName.c_str(), &TestFile )) == -1L)
	{
    	cout<<"路徑無法訪問！"<<endl;
        return ;
	}
	

	string Path="";

	//重新寫類別文件
	for(int i=0;i<m_nClassNum;i++)
	{
		char szTempResFile[200];
		sprintf(szTempResFile,"Res\\C%d.txt",i);
		DeleteFile(szTempResFile);
	}

	CSSPS ssps;
	ssps.Init("Zssps");

	do
	{
      
		FileNameTmp=TestFile.name;//訓練文件的名稱
 		Path=sTestFilesPath+FileNameTmp;//獲取文件路徑
		string sSrcContent="";	
		ifstream fin1(Path.c_str());
		getline(fin1,sSrcContent,'\0');
		string sContent = ssps.GetVecStr (sSrcContent);
		nClassID=TestTermFile(sContent);

		 //寫文件Ci.txt
		char szResFile[20];
        memset(szResFile,0,20);
		sprintf(szResFile,"Res\\C%d.txt",nClassID);
		ofstream Fou;
		Fou.open(szResFile,ios::out|ios::app);
		Fou << Path << endl;
		Fou.close();

	}while(! _findnext( hFile, &TestFile ) );

}

//-----------------------------------------------------------------------------------------//
//   功能：       對每篇文檔進行測試。
//	 返回值：	  該文檔所屬類別
//----------------------------------------------------------------------------------------//
int CNaiveBayes::TestTermFile(string sContent)
{
	    string::size_type pos=0, prev_pos=0;	
	    string::size_type wordnum_pos=0;
		string word;
		
		double *pro=new double[m_nClassNum];//存儲各特征項取log后的和
		memset(pro,0,m_nClassNum*sizeof(double));
		wordnum_pos = sContent.find_first_of( ' ',wordnum_pos );
		
		//每篇文章的詞數nWordNum		
		string sWordNum = sContent.substr( 0,wordnum_pos );
		int nWordNum = atoi( sWordNum.c_str() );
		
		pos=++wordnum_pos;
		prev_pos=pos;
		while((pos = sContent.find_first_of( ' ', pos ))!=string::npos)
		{
			//取得每個word 的key:weight
			string::size_type key_pos=0, weight_pos=0;
			word = sContent.substr( prev_pos, pos - prev_pos );
			prev_pos = ++pos;
			
			//取得每個word的key并計算在類中的先驗概率
			key_pos = word.find_first_of( ':', key_pos );
			string str_key = word.substr( 0 , key_pos );
			int key = atoi(str_key.c_str());
			
			for(int i=0;i<m_nClassNum;i++)
			{	
				pro[i]=pro[i]+log(m_ppfTrainRes[i][key]);
			
			}
			
		}
		
		//處理最后一個空格后的word
		word = sContent.substr( prev_pos, pos - prev_pos );
		string::size_type key_pos=0;
		key_pos = word.find_first_of( ':', key_pos );
		string str_key = word.substr( 0 , key_pos );
		int key = atoi(str_key.c_str());
		
		
		for(int i=0;i<m_nClassNum;i++)
		{	
			pro[i]=pro[i]+log(m_ppfTrainRes[i][key])+log(m_pfPrC[i]);
		
        }

		//對測試文本進行分類
		
		double t;
		t = pro[0];
		int max_pro_num = 0;
		
		for(int s = 1 ; s < m_nClassNum ; s++ )
		{
			double k = pro[s];
			if( t < k && k != 0 && t != 0 )
				
			{
				max_pro_num = s;
				t = k;
			}
		}
		

		m_pnResNum[max_pro_num]++;

		return max_pro_num;

}

//-----------------------------------------------------------------------------------------//
//   功能：       讀入類的先驗概率及對應該類的特征項的先驗概率。
//----------------------------------------------------------------------------------------//

void CNaiveBayes::PrwFRead()
{
	
	string PrcF="Pr\\Prc.txt";
	
	ifstream prcf(PrcF.c_str());
	string strPrC="";
	string classID="";//存儲類序號
	string classfreV="";//存儲類的先驗概率
	int clsID=0;
	int pos1=0;
	int pos2=0;
    
	while(getline(prcf,strPrC,'\n')!=NULL)//讀取行
	{
     	pos1=0;
		pos2=0;
		
		if((pos2= strPrC.find_first_of( ' ',pos1 ))!=-1)
		{
			classID=strPrC.substr(pos1,pos2-pos1);
            const char *cID=classID.c_str();
			clsID=atoi(cID);
			pos2++;
		}
	   if((pos1= strPrC.find_first_of( ' ',pos2))!=-1)
		{
			pos1++;
			classfreV=strPrC.substr(pos1,-1);
			const char *cFV=classfreV.c_str();
			m_pfPrC[clsID]=atof(cFV);
		//	cout<<clsID<<" "<<m_pfPrC[clsID]<<endl;
	   }
	}
	
	
	char PrwDir[20];
	memset(PrwDir,' ',20*sizeof(char));

	for (int i = 0 ; i < m_nClassNum ; i++ )
	{
		sprintf(PrwDir,"Pr\\PrW in C%d.txt",i);
		ifstream prwf(PrwDir);
		//ifstream prwf(PrwF.c_str());
		string strPrW="";
		string feaNum="";
		string fesVal="";
		int featID=0;
		float featVal;
		
		
		while(getline(prwf,strPrW,'\n')!=NULL)//讀取行
		{
			pos1=0;
			pos2=0;
			
			if((pos2= strPrW.find_first_of( '\t',pos1 ))!=-1)
			{
				feaNum=strPrW.substr(pos1,pos2-pos1);
				
				pos2++;
			}
			const char *feaN=feaNum.c_str();
			featID=atoi(feaN);
			//cout<<featID<<"  ";
			fesVal=strPrW.substr(pos2,-1);
			//cout<<fesVal.c_str()<<endl;
			const char *feaV=fesVal.c_str();
			//featVal=atof(feaV);
			m_ppfTrainRes[i][featID]=atof(feaV);
			//  cout<<featID<<" "<<m_ppfTrainRes[i][featID]<<endl;
			
		}  
	}
	
}


//-----------------------------------------------------------------------------------------//
//   功能：       
//----------------------------------------------------------------------------------------//
int CNaiveBayes::InitPara(bool bPreTrain,string sTrainFilesPath)
{
	InitClassInfo();

	if (bPreTrain)
	{
		PreTrain(sTrainFilesPath);
	}

	//從文件_all_words.lst中讀取特征詞總個數
	ifstream wfile("..\\Dic\\DF\\_all_words.lst");
	string sTemp="";
	getline(wfile,sTemp,' ');//讀取特征詞總個數
	m_nFeatureNum=atoi(sTemp.c_str());
	
	m_ppfTrainRes = new float *[m_nClassNum];
	for(int i=0;i<m_nClassNum;i++)
	{
		m_ppfTrainRes[i]=new float[m_nFeatureNum];
		memset(m_ppfTrainRes[i],0,m_nFeatureNum*sizeof(float));
	}

    return m_nClassNum;
}

int CNaiveBayes::InitClassInfo()
{
	//從文件class.lst中讀取類別數、類名
	ifstream cfile("class.lst");
	string sTemp="";
    getline(cfile,sTemp,'\n');//讀取類別數
	m_nClassNum = atoi(sTemp.c_str());
	sTemp="";
	m_psClassName = new string[m_nClassNum];
	m_pnTrainNum = new int[m_nClassNum];
	int nClassIndex=0;
	
	while(getline(cfile,sTemp,'\n')!=NULL)//讀取行
	{
		m_psClassName[nClassIndex]=sTemp.c_str();//將類名存入m_psClassName數組
		m_mapClassName2ID[sTemp] = nClassIndex;
		nClassIndex++;
		sTemp="";
		if (nClassIndex>=m_nClassNum) 
		{
			break;
		}
	}
	
	m_pfPrC = new double[m_nClassNum];
    memset(m_pfPrC,0,m_nClassNum*sizeof(double));
	
	return m_nClassNum;
}

bool CNaiveBayes::PreTrain(string sTrainFilesPath)
{
	CSSPS ssps;
	ssps.Init("Zssps");	

	string sSubTrainFilesPath="";	
	//如果目錄的最后一個字母不是'\',則在最后加上一個'\' 
	int len=sTrainFilesPath.size(); 
	if (sTrainFilesPath.at(len-1) != '\\') 
	{
		sTrainFilesPath += "\\";
	}
	for(int nClassIndex=0;nClassIndex<m_nClassNum;nClassIndex++)
	{
		sSubTrainFilesPath = sTrainFilesPath + m_psClassName[nClassIndex];
		ssps.TrainFiles(sSubTrainFilesPath.c_str(), m_psClassName[nClassIndex].c_str());
	}

	return true;
}

int CNaiveBayes::TestAFile(string sTestFilePath)
{
	string sSrcContent="";	
	ifstream fin1(sTestFilePath.c_str());
	getline(fin1,sSrcContent,'\0');
	CSSPS ssps;
	ssps.Init("Zssps");
	string sContent = ssps.GetVecStr (sSrcContent);
	int nClassID=TestTermFile(sContent);

	return nClassID;
}
上一頁 12
?? 文件大小 521 K
?? 上傳用戶 ydyzd_2008
?? 所屬分類數學計算
??? 相關標簽

#貝葉斯
?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? cnaivebayes.cpp

?? 快捷鍵說明