?? apriori.cpp
字號:
// Apriori.cpp : 定義控制臺應用程序的入口點。
//
#include "stdafx.h"
#include "Apriori.h"
#include "conio.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// 唯一的應用程序對象
CWinApp theApp;
using namespace std;
int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
int nRetCode = 0;
// 初始化 MFC 并在失敗時顯示錯誤
if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
{
// TODO: 更改錯誤代碼以符合您的需要
_tprintf(_T("致命錯誤: MFC 初始化失敗\n"));
nRetCode = 1;
}
else
{
// TODO: 在此處為應用程序的行為編寫代碼。
DBLink *D;
ItemSet *IS,*preIS;
SearchTree *STk;
char filename[30];
double min_sup = 0.02;
int Record_Count = 0,min_count;
DWORD nStartTime,nEndTime,SumTime = 0;
int k = 1;
cout<<"輸入數據文件名:";
cin>>filename;
cout<<"輸入最小支持閾值min_sup:";
cin>>min_sup;
CString exstr1;
exstr1 = "數據來源文件:";
exstr1 += filename;
exstr1 += "\r\n最小支持閾值:";
exstr1.AppendFormat("%f\r\n\r\n",min_sup);
nStartTime = GetTickCount();
D = ReadDataFromText(filename,&Record_Count);
min_count = int(Record_Count*min_sup);
STk = find_frequent_1_itemset(D,min_count);
nEndTime = GetTickCount();
SumTime += nEndTime - nStartTime;
IS = GetDataLink(STk);
//要刪除STk
STk = DeleteTree(STk);
PrintResult(IS,k,exstr1);
while(IS != NULL)
{
k++;
nStartTime = GetTickCount();
preIS = apriori_gen(IS,k);
if(preIS == NULL) break;
IS = DeleteLink(IS);
IS = find_frequent_k_itemset(D,preIS,k,min_count); //只是把preIS中的count字段作了更新并剪枝
nEndTime = GetTickCount();
SumTime += nEndTime - nStartTime;
PrintResult(IS,k,exstr1);
}
IS = DeleteLink(IS);
cout<<endl<<"Apriori算法運行完畢,為您找到最高"<<k-1<<"-項集。"<<endl;
cout<<"算法總耗時:"<<SumTime<<"ms。"<<"按任意鍵結束!!!"<<endl;
getch();
}
return nRetCode;
}
SearchTree* find_frequent_1_itemset(DBLink* D,int min_count)
{
SearchTree* ST_1 = NULL;
DBLink* DBL = D;
while(DBL != NULL) //遍歷源數據表,給每個關鍵字計數
{
int nStart = 0,nEnd;
CString str;
do{
nEnd = DBL->datastr.Find(';',nStart);
if(nEnd == -1)
str = DBL->datastr.Mid(nStart);
else{
str = DBL->datastr.Mid(nStart,nEnd-nStart);
nStart = nEnd+1;
}
ST_1 = SearchAndAddInTree(ST_1,str,1);
}while(nEnd != -1);
DBL = DBL->next;
}
SearchAndDelInTree(ST_1,min_count); //剪枝
return ST_1;
}
ItemSet* find_frequent_k_itemset(DBLink* D,ItemSet* IS,int k,int min_count)
{
DBLink* DBL = D;
int step = 0;
CString tmp;
while(DBL != NULL) //遍歷源數據表,搜索k-項集,增加計數
{
step++;
IS = SearchAndAddIn_kLink(IS,DBL->datastr);
DBL = DBL->next;
if((step % 10) == 0){
tmp.Format("%d\r\n",step);
TRACE(tmp.GetBuffer());
}
}
return Del_k_ItemSet(IS,min_count); //剪枝
}
ItemSet* apriori_gen(ItemSetLink* IS,int k)
{
ItemSetLink *preIS1 = IS,*preIS2;
ItemSetLink *ISk,*ISkhead = NULL;
while(preIS1)
{
preIS2 = preIS1->next;
while(preIS2)
{
CString tmp; //臨時存放候選節點
tmp.Empty();
if(k == 2)
tmp = preIS1->datastr+ ';' + preIS2->datastr;
else{ //注:各關鍵字集都是已經按字母序排好序的
int index1,index2;
CString tmp1,tmp2;
index1 = preIS1->datastr.ReverseFind(';');
index2 = preIS2->datastr.ReverseFind(';');
tmp1 = preIS1->datastr.Left(index1);
tmp2 = preIS2->datastr.Left(index2);
if(tmp1 == tmp2)
tmp = tmp1 + preIS1->datastr.Mid(index1) + preIS2->datastr.Mid(index2); //preIS1的子串排序在preIS2的子串前
}
if(!tmp.IsEmpty()){
if(!has_infrequent_subset(tmp,IS)) //無非頻繁子集 tmp作為一個新結點的值插入候選鏈表中
if(ISkhead == NULL){
ISkhead = CreatePreItemSet(ISkhead,tmp,k);
ISk = ISkhead;
}
else{
ISk->next = CreatePreItemSet(ISk,tmp,k); //直接從上一個節點開始插入新節點,減少無謂的鏈表遍歷
ISk = ISk->next;
}
}
preIS2 = preIS2->next;
}
preIS1 = preIS1->next;
}
return ISkhead;
}
ItemSetNode* GetDataLink(SearchTree* ST)
{
if(ST == NULL) return NULL;
ItemSetNode* N;
N = GetDataLink(ST->left);
if(N == NULL){
N = new ItemSetNode;
N->datastr = ST->datastr;
N->count = ST->count;
N->next = GetDataLink(ST->right);
}
else{
ItemSetNode* tmpN = N;
while(tmpN->next != NULL) tmpN = tmpN->next;
tmpN->next = new ItemSetNode;
tmpN = tmpN->next;
tmpN->datastr = ST->datastr;
tmpN->count = ST->count;
tmpN->next = GetDataLink(ST->right);
}
return N; //返回的是鏈表頭結點指針
}
BOOL has_infrequent_subset(CString set,ItemSet* IS) //在IS中搜索驗證set是否包含非頻繁子集
{
int nStart = 0,nEnd;
ItemSet* IShead = IS;
CString subset;
do{
//把截出來的單詞去掉,得到k-1子集
nEnd = set.Find(';',nStart);
if(nStart == 0 && nEnd != -1)
subset = set.Mid(nEnd+1);
else if(nStart == 0 && nEnd == -1)
subset = set;
else if(nStart != 0 && nEnd == -1)
subset = set.Left(nStart-1);
else
subset = set.Left(nStart)+set.Mid(nEnd+1);
nStart = nEnd+1;
//搜索該子集是否在k-1頻繁集中
IS = IShead;
while(IS)
{
if(subset.CompareNoCase(IS->datastr) == 0)
break;
IS = IS->next;
}
if(IS == NULL)
return TRUE; //有非頻繁子集
}while(nEnd != -1);
return FALSE; //無非頻繁子集
}
BOOL FindInTree(SearchTree* ST,CString str)
{
if(ST == NULL) return FALSE;
if(str.CompareNoCase(ST->datastr.GetBuffer()) < 0)
return FindInTree(ST->left,str);
else if(str.CompareNoCase(ST->datastr.GetBuffer()) > 0)
return FindInTree(ST->right,str);
else //找到了
return TRUE;
}
SearchTree* SearchAndAddInTree(SearchTree* ST,CString str,int k)
{
if(ST == NULL){
ST = new SearchTree;
if(ST == NULL)
cout<<"Out of space!!!"<<endl;
else{
ST->datastr = str;
if(k == 1) //1-項集二叉樹邊構建邊計數
ST->count = 1;
else //k-項集二叉樹先構建后計數
ST->count = 0;
ST->left = ST->right = NULL;
}
}
else if(str.CompareNoCase(ST->datastr.GetBuffer()) < 0)
ST->left = SearchAndAddInTree(ST->left,str,k);
else if(str.CompareNoCase(ST->datastr.GetBuffer()) > 0)
ST->right = SearchAndAddInTree(ST->right,str,k);
else //找到
ST->count++;
return ST;
}
ItemSetLink* SearchAndAddIn_kLink(ItemSetLink* IS,CString str) //更新count字段
{
CString sw;
ItemSetLink* IShead = IS;
int nStart = 0,nEnd;
BOOL bFound;
CStringList SL;
CString key;
do{
nEnd = str.Find(';',nStart);
if(nEnd == -1)
key = str.Mid(nStart);
else
key = str.Mid(nStart,nEnd - nStart);
SL.AddTail(key);
nStart = nEnd + 1;
}while(nEnd != -1);
while(IS)
{
bFound = TRUE;
nStart = 0;
do{
nEnd = IS->datastr.Find(';',nStart);
if(nEnd == -1)
sw = IS->datastr.Mid(nStart);
else
sw = IS->datastr.Mid(nStart,nEnd - nStart);
int index = 0;
while(index < SL.GetCount())
{
CString tmp = SL.GetAt(SL.FindIndex(index));
if(tmp.CompareNoCase(sw) == 0)
break;
index++;
}
if(index >= SL.GetCount()){
bFound = FALSE;
break;
}
nStart = nEnd + 1;
}while(nEnd != -1);
if(bFound == TRUE)
IS->count++;
IS = IS->next;
}
SL.RemoveAll();
return IShead;
}
//BOOL CompareKeyInStr(CString strDB,CString strN) //在strDB的關鍵字組中尋找strN的關鍵字是否都出現
//{
// int nStart = 0,nEnd;
// CString key;
// do{
// nEnd = strDB.Find(';',nStart);
// if(nEnd == -1)
// key = strDB.Mid(nStart);
// else
// key = strDB.Mid(nStart,nEnd - nStart);
// if(key.CompareNoCase(strN) == 0) //找到了
// return TRUE;
// nStart = nEnd + 1;
// }while(nEnd != -1);
// return FALSE;
//int nStart_DB = 0,nStart_N = 0;
//int nEnd_DB,nEnd_N;
//CString key_db,key_n;
////都讀入第一個key
// nEnd_DB = strDB.Find(';',nStart_DB);
//nEnd_N = strN.Find(';',nStart_N);
//if(nEnd_DB == -1)
// key_db = strDB.Mid(nStart_DB);
//else
// key_db = strDB.Mid(nStart_DB,nEnd_DB - nStart_DB);
//if(nEnd_N == -1)
// key_n = strN.Mid(nStart_N);
//else
// key_n = strN.Mid(nStart_N,nEnd_N - nStart_N);
//nStart_DB = nEnd_DB + 1;
//nStart_N = nEnd_N + 1;
//do{
// if(key_db.CompareNoCase(key_n) == 0) //找到一個關鍵字,則一起往后移
// {
// if(nEnd_DB != -1 && nEnd_N != -1)
// {
// nEnd_DB = strDB.Find(';',nStart_DB);
// nEnd_N = strN.Find(';',nStart_N);
// if(nEnd_DB == -1)
// key_db = strDB.Mid(nStart_DB);
// else
// key_db = strDB.Mid(nStart_DB,nEnd_DB - nStart_DB);
// if(nEnd_N == -1)
// key_n = strN.Mid(nStart_N);
// else
// key_n = strN.Mid(nStart_N,nEnd_N - nStart_N);
// }
// else
// break;
// }
// else if(key_db.CompareNoCase(key_n) > 0) //在strDB中不可能再找到一個關鍵字與當前的key_n匹配
// return FALSE;
// else //StrDB往后移
// {
// if(nEnd_DB != -1){
// nEnd_DB = strDB.Find(';',nStart_DB);
// if(nEnd_DB == -1)
// key_db = strDB.Mid(nStart_DB);
// else
// key_db = strDB.Mid(nStart_DB,nEnd_DB - nStart_DB);
// }
// else
// break;
// }
// nStart_DB = nEnd_DB + 1;
// nStart_N = nEnd_N + 1;
//}while(1); //有一個串已到尾部,則跳出循環
//if(nEnd_N == -1 && nEnd_DB == -1)
// if(key_db.CompareNoCase(key_n) == 0)
// return TRUE;
// else
// return FALSE;
//else if(nEnd_N == -1 && nEnd_DB != -1) //strN中的關鍵字已經全部找到
// return TRUE;
//else //strN中的關鍵字未找完,strDB已搜索完
// return FALSE;
//}
SearchTree* SearchAndDelInTree(SearchTree* ST,int min_count)
{
if(ST == NULL) return ST;
ST->left = SearchAndDelInTree(ST->left,min_count);
ST->right = SearchAndDelInTree(ST->right,min_count);
//刪除非頻繁節點
if(ST->count < min_count)
{
SearchTree* TmpNode;
if(ST->left && ST->right) //兩個子樹
{
TmpNode = FindMin(ST->right);
ST->datastr = TmpNode->datastr;
ST->count = TmpNode->count;
ST->right = DeleteNode(ST->right,ST->datastr);
}
else //一棵或沒有子樹
{
TmpNode = ST;
if(ST->left == NULL)
ST = ST->right;
else if(ST->right == NULL)
ST = ST->left;
delete TmpNode;
}
}
return ST;
}
SearchTree* FindMin(SearchTree* ST)
{
if(ST != NULL)
while(ST->left != NULL)
ST = ST->left;
return ST;
}
SearchTree* DeleteNode(SearchTree* ST,CString str)
{
SearchTree* TmpNode;
if(ST == NULL)
cout<<"Element not found!!!"<<endl;
else if(str.CompareNoCase(ST->datastr.GetBuffer()) < 0)
ST->left = DeleteNode(ST->left,str);
else if(str.CompareNoCase(ST->datastr.GetBuffer()) > 0)
ST->right = DeleteNode(ST->right,str);
//找到了節點
else if(ST->left && ST->right) //兩個子樹
{
TmpNode = FindMin(ST->right);
ST->datastr = TmpNode->datastr;
ST->count = TmpNode->count;
ST->right = DeleteNode(ST->right,ST->datastr);
}
else //一棵或沒有子樹
{
TmpNode = ST;
if(ST->left == NULL)
ST = ST->right;
else if(ST->right == NULL)
ST = ST->left;
delete TmpNode;
}
return ST;
}
CString GetLine(CFile* const f) //獲得文件中的一行字符
{
UINT n=0,row=0;
CString linestr;
char buf;
n=f->Read(&buf,1);
if(n==0) return "Wen_End"; //已到文件尾
do{
if(buf=='\r'){
n = f->Read(&buf,1); //把'\n'讀出
break;
}
linestr+=buf;
n = f->Read(&buf,1);
}while(n!=0);
return linestr;
}
DBLink* ReadDataFromText(CString filename,int *RC)
{
CFile ftxt;
DBLink *D,*DBL;
D = NULL; DBL = NULL;
if(!ftxt.Open(filename,CFile::modeRead)){
cout<<"無法打開數據文件";
return NULL;
}
CString linestr;
while((linestr = GetLine(&ftxt)) != "Wen_End")
{
if(!linestr.IsEmpty())
{
if(D == NULL){
D = new DBLink;
DBL = D;
}
else{
DBL->next = new DBLink;
DBL = DBL->next;
}
DBL->datastr = linestr;
DBL->next = NULL;
}
(*RC)++;
}
ftxt.Close();
return D;
}
void PrintResult(ItemSet* IS,int k,CString exstr1)
{
if(IS == NULL) return;
CString exstr;
CFile fout;
if(k == 1){
fout.Open("apriori_output.txt",CFile::modeCreate|CFile::modeWrite);
fout.Write(exstr1.GetBuffer(),exstr1.GetLength());
}
else{
fout.Open("apriori_output.txt",CFile::modeWrite);
fout.SeekToEnd();
}
cout<<"頻繁"<<k<<"-項集:"<<endl;
exstr.Format("頻繁%d-項集:\r\n",k);
fout.Write(exstr,exstr.GetLength());
while(IS != NULL){
cout<<IS->datastr<<" "<<IS->count<<endl;
exstr.Format("%s %d\r\n",IS->datastr.GetBuffer(),IS->count);
fout.Write(exstr,exstr.GetLength());
IS = IS->next;
}
cout<<endl;
fout.Write("\r\n",2);
fout.Close();
}
ItemSetLink* CreatePreItemSet(ItemSetLink* preIS,CString str,int k)
{
if(preIS == NULL){
preIS = new ItemSetLink;
preIS->datastr = str;
preIS->count = 0;
preIS->next = NULL;
return preIS;
}
//preIS->next為NULL
preIS->next = new ItemSetLink;
preIS = preIS->next;
preIS->datastr = str;
preIS->count = 0;
preIS->next = NULL;
return preIS;
}
ItemSetLink* DeleteLink(ItemSetLink* IS)
{
ItemSetLink* dIS;
while(IS)
{
dIS = IS;
IS = IS->next;
delete dIS;
}
return NULL;
}
ItemSet* Del_k_ItemSet(ItemSet* IS,int min_count)
{
ItemSet *tmpNode,*preNode,*IShead = IS;
while(IS)
{
tmpNode = NULL;
if(IS->count < min_count){ //刪除當前節點
if(IS == IShead){ //是頭節點
tmpNode = IShead;
IShead = IS->next;
preNode = IShead;
}
else{
tmpNode = IS;
preNode->next = IS->next;
}
}
else //當前節點保留
preNode = IS;
IS = IS->next;
if(tmpNode)
delete tmpNode;
}
return IShead;
}
SearchTree* DeleteTree(SearchTree* ST)
{
if(ST == NULL)
return NULL;
ST->left = DeleteTree(ST->left);
ST->right = DeleteTree(ST->right);
delete ST;
return NULL;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -