?? myapriori.cpp
字號(hào):
// MyApriori.cpp : 定義控制臺(tái)應(yīng)用程序的入口點(diǎn)。
//
#include "stdafx.h"
#include "MyApriori.h"
#include "conio.h"
#include "stdlib.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// 唯一的應(yīng)用程序?qū)ο?
CWinApp theApp;
using namespace std;
int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
{
int nRetCode = 0;
// 初始化 MFC 并在失敗時(shí)顯示錯(cuò)誤
if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
{
// TODO: 更改錯(cuò)誤代碼以符合您的需要
_tprintf(_T("致命錯(cuò)誤: MFC 初始化失敗\n"));
nRetCode = 1;
}
else
{
// TODO: 在此處為應(yīng)用程序的行為編寫代碼。
KeyTree *KT;
ItemSet *IS,*preIS;
char filename[30];
double min_sup = 0.02;
int Record_Count = 0,min_count;
int k = 1;
DWORD nStartTime,nEndTime,SumTime = 0;
cout<<"輸入數(shù)據(jù)文件名:";
cin>>filename;
cout<<"輸入最小支持閾值min_sup:";
cin>>min_sup;
CString exstr1;
exstr1 = "數(shù)據(jù)來源文件:";
exstr1 += filename;
exstr1 += "\r\n最小支持閾值:";
exstr1.AppendFormat("%f\r\n\r\n",min_sup);
nStartTime = GetTickCount();
KT = ReadDataFromText(filename,&Record_Count);
min_count = int(Record_Count*min_sup);
IS = find_frequent_1_itemset(KT,min_count); //實(shí)際是剪枝后生成鏈表
nEndTime = GetTickCount();
SumTime += nEndTime - nStartTime;
PrintResult(IS,k,exstr1);
while(IS != NULL)
{
k++;
nStartTime = GetTickCount();
preIS = apriori_gen(IS,k);
if(preIS == NULL) break;
IS = DeleteLink(IS);
IS = find_frequent_k_itemset(KT,preIS,k,min_count); //只是把preIS中的count字段作了更新并剪枝
nEndTime = GetTickCount();
SumTime += nEndTime - nStartTime;
PrintResult(IS,k,exstr1);
}
IS = DeleteLink(IS);
cout<<endl<<"My-Apriori算法運(yùn)行完畢,為您找到最高"<<k-1<<"-項(xiàng)集。"<<endl;
cout<<"算法總耗時(shí):"<<SumTime<<"ms。"<<"按任意鍵結(jié)束!!!"<<endl;
getch();
}
return nRetCode;
}
ItemSet* find_frequent_1_itemset(KeyTree* KT,int min_count)
{
KT = SearchAndDelInTree(KT,min_count);//剪枝
return GetDataLink(KT); //生成鏈表
}
ItemSetLink* GetDataLink(KeyTree* KT)
{
if(KT == NULL) return NULL;
ItemSetLink* N;
N = GetDataLink(KT->left);
if(N == NULL){
N = new ItemSetLink;
N->datastr = KT->Key;
N->count = KT->count;
N->next = GetDataLink(KT->right);
}
else{
ItemSetLink* tmpN = N;
while(tmpN->next != NULL) tmpN = tmpN->next;
tmpN->next = new ItemSetLink;
tmpN = tmpN->next;
tmpN->datastr = KT->Key;
tmpN->count = KT->count;
tmpN->next = GetDataLink(KT->right);
}
return N; //返回的是鏈表頭結(jié)點(diǎn)指針
}
KeyTree* SearchAndDelInTree(KeyTree* KT,int min_count)
{
if(KT == NULL) return KT;
KT->left = SearchAndDelInTree(KT->left,min_count);
KT->right = SearchAndDelInTree(KT->right,min_count);
//刪除非頻繁節(jié)點(diǎn)
if(KT->count < min_count)
{
KeyTree* TmpNode;
if(KT->left && KT->right) //兩個(gè)子樹
{
TmpNode = FindMin(KT->right);
KT->Key = TmpNode->Key;
KT->count = TmpNode->count;
KT->IndexSet = TmpNode->IndexSet;
KT->right = DeleteNode(KT->right,KT->Key);
}
else //一棵或沒有子樹
{
TmpNode = KT;
if(KT->left == NULL)
KT = KT->right;
else if(KT->right == NULL)
KT = KT->left;
delete TmpNode;
}
}
return KT;
}
KeyTree* FindMin(KeyTree* KT)
{
if(KT != NULL)
while(KT->left != NULL)
KT = KT->left;
return KT;
}
KeyTree* DeleteNode(KeyTree* KT,CString str)
{
KeyTree* TmpNode;
if(KT == NULL)
cout<<"Element not found!!!"<<endl;
else if(str.CompareNoCase(KT->Key.GetBuffer()) < 0)
KT->left = DeleteNode(KT->left,str);
else if(str.CompareNoCase(KT->Key.GetBuffer()) > 0)
KT->right = DeleteNode(KT->right,str);
//找到了節(jié)點(diǎn)
else if(KT->left && KT->right) //兩個(gè)子樹
{
TmpNode = FindMin(KT->right);
KT->Key = TmpNode->Key;
KT->count = TmpNode->count;
KT->IndexSet = TmpNode->IndexSet;
KT->right = DeleteNode(KT->right,KT->Key);
}
else //一棵或沒有子樹
{
TmpNode = KT;
if(KT->left == NULL)
KT = KT->right;
else if(KT->right == NULL)
KT = KT->left;
delete TmpNode;
}
return KT;
}
KeyTree* ReadDataFromText(CString filename,int *count)
{
CFile ftxt;
KeyTree* KT = NULL;
if(!ftxt.Open(filename,CFile::modeRead)){
cout<<"無法打開數(shù)據(jù)文件";
return NULL;
}
CString linestr;
while((linestr = GetLine(&ftxt)) != "Wen_End")
{
(*count)++;
if(!linestr.IsEmpty()){
CString sw;
int nStart = 0,nEnd;
do{
nEnd = linestr.Find(';',nStart);
if(nEnd == -1)
sw = linestr.Mid(nStart);
else
sw = linestr.Mid(nStart,nEnd - nStart);
nStart = nEnd + 1;
KT =UpdateKeyTree(KT,sw,*count);
}while(nEnd != -1);
}
}
ftxt.Close();
return KT;
}
KeyTree* UpdateKeyTree(KeyTree* KT,CString sw,int count)
{
if(KT == NULL){
KT = new KeyTree;
KT->Key = sw;
KT->IndexSet.Format("%d",count);
KT->count = 1;
KT->left = KT->right = NULL;
}
else if(sw.CompareNoCase(KT->Key) < 0)
KT->left = UpdateKeyTree(KT->left,sw,count);
else if(sw.CompareNoCase(KT->Key) > 0)
KT->right = UpdateKeyTree(KT->right,sw,count);
else //樹中已存在該關(guān)鍵詞
{
KT->IndexSet.AppendFormat(";%d",count);
KT->count++;
}
return KT;
}
CString GetLine(CFile* const f) //獲得文件中的一行字符
{
UINT n=0,row=0;
CString linestr;
char buf;
n=f->Read(&buf,1);
if(n==0) return "Wen_End"; //已到文件尾
do{
if(buf=='\r'){
n = f->Read(&buf,1); //把'\n'讀出
break;
}
linestr+=buf;
n = f->Read(&buf,1);
}while(n!=0);
return linestr;
}
void PrintResult(ItemSet* IS,int k,CString exstr1)
{
if(IS == NULL) return;
CString exstr;
CFile fout;
if(k == 1){
fout.Open("myapriori_output.txt",CFile::modeCreate|CFile::modeWrite);
fout.Write(exstr1.GetBuffer(),exstr1.GetLength());
}
else{
fout.Open("myapriori_output.txt",CFile::modeWrite);
fout.SeekToEnd();
}
cout<<"頻繁"<<k<<"-項(xiàng)集:"<<endl;
exstr.Format("頻繁%d-項(xiàng)集:\r\n",k);
fout.Write(exstr,exstr.GetLength());
while(IS != NULL){
cout<<IS->datastr<<" "<<IS->count<<endl;
exstr.Format("%s %d\r\n",IS->datastr.GetBuffer(),IS->count);
fout.Write(exstr,exstr.GetLength());
IS = IS->next;
}
cout<<endl;
fout.Write("\r\n",2);
fout.Close();
}
ItemSetLink* apriori_gen(ItemSet* IS,int k)
{
ItemSetLink *preIS1 = IS,*preIS2;
ItemSetLink *ISk,*ISkhead = NULL;
while(preIS1)
{
preIS2 = preIS1->next;
while(preIS2)
{
CString tmp; //臨時(shí)存放候選節(jié)點(diǎn)
tmp.Empty();
if(k == 2)
tmp = preIS1->datastr+ ';' + preIS2->datastr;
else{ //注:各關(guān)鍵字集都是已經(jīng)按字母序排好序的
int index1,index2;
CString tmp1,tmp2;
index1 = preIS1->datastr.ReverseFind(';');
index2 = preIS2->datastr.ReverseFind(';');
tmp1 = preIS1->datastr.Left(index1);
tmp2 = preIS2->datastr.Left(index2);
if(tmp1 == tmp2)
tmp = tmp1 + preIS1->datastr.Mid(index1) + preIS2->datastr.Mid(index2); //preIS1的子串排序在preIS2的子串前
}
if(!tmp.IsEmpty()){
if(!has_infrequent_subset(tmp,IS)) //無非頻繁子集 tmp作為一個(gè)新結(jié)點(diǎn)的值插入候選鏈表中
if(ISkhead == NULL){
ISkhead = CreatePreItemSet(ISkhead,tmp,k);
ISk = ISkhead;
}
else{
ISk->next = CreatePreItemSet(ISk,tmp,k); //直接從上一個(gè)節(jié)點(diǎn)開始插入新節(jié)點(diǎn),減少無謂的鏈表遍歷
ISk = ISk->next;
}
}
preIS2 = preIS2->next;
}
preIS1 = preIS1->next;
}
return ISkhead;
}
BOOL has_infrequent_subset(CString set,ItemSet* IS)
{
int nStart = 0,nEnd;
ItemSet* IShead = IS;
CString subset;
do{
//把截出來的單詞去掉,得到k-1子集
nEnd = set.Find(';',nStart);
if(nStart == 0 && nEnd != -1)
subset = set.Mid(nEnd+1);
else if(nStart == 0 && nEnd == -1)
subset = set;
else if(nStart != 0 && nEnd == -1)
subset = set.Left(nStart-1);
else
subset = set.Left(nStart)+set.Mid(nEnd+1);
nStart = nEnd+1;
//搜索該子集是否在k-1頻繁集中
IS = IShead;
while(IS)
{
if(subset.CompareNoCase(IS->datastr) == 0)
break;
IS = IS->next;
}
if(IS == NULL)
return TRUE; //有非頻繁子集
}while(nEnd != -1);
return FALSE; //無非頻繁子集
}
ItemSetLink* CreatePreItemSet(ItemSetLink* preIS,CString str,int k)
{
if(preIS == NULL){
preIS = new ItemSetLink;
preIS->datastr = str;
preIS->count = 0;
preIS->next = NULL;
return preIS;
}
//preIS->next為NULL
preIS->next = new ItemSetLink;
preIS = preIS->next;
preIS->datastr = str;
preIS->count = 0;
preIS->next = NULL;
return preIS;
}
ItemSet* find_frequent_k_itemset(KeyTree* KT,ItemSetLink* preIS,int k,int min_count)
{
CString sw;
ItemSet* IShead = preIS;
CStringArray SA;
SA.SetSize(k);
while(preIS) //對候選頻繁鏈表中的每個(gè)節(jié)點(diǎn)均進(jìn)行計(jì)數(shù)
{
int nStart = 0,nEnd;
int n = 0;
for(int i = 0; i < k; i++)
SA[i].Empty();
do{
nEnd = preIS->datastr.Find(';',nStart);
if(nEnd == -1)
sw = preIS->datastr.Mid(nStart);
else
sw = preIS->datastr.Mid(nStart,nEnd - nStart);
if(!sw.IsEmpty())
SA.SetAt(n,FindInKeyTree(KT,sw)); //把找到的“關(guān)鍵字出現(xiàn)的記錄表”添加到鏈表中
n++;
nStart = nEnd + 1;
}while(nEnd != -1);
preIS->count = CountForKey(&SA,k);
preIS = preIS->next;
}
IShead = Del_k_ItemSet(IShead,min_count);
return IShead;
}
CString FindInKeyTree(KeyTree* KT,CString str)
{
if(KT == NULL)
return "";
else if(str.CompareNoCase(KT->Key.GetBuffer()) < 0)
return FindInKeyTree(KT->left,str);
else if(str.CompareNoCase(KT->Key.GetBuffer()) > 0)
return FindInKeyTree(KT->right,str);
else //找到了
return KT->IndexSet;
}
int CountForKey(CStringArray* SA,int k)
{
int count = 0;
int *nValue = new int[k]; //記錄號(hào)
int *nStart = new int[k]; //搜索起始下標(biāo)
int *nEnd = new int[k]; //搜索到的下標(biāo)
CString tmp;
for(int i = 0; i < k; i++)
nStart[i] = 0;
GetRecordNum(SA,nValue,nStart,nEnd,k); //得到第一組記錄號(hào)
while(1)
{
int mindex;
mindex = MinnIndex(nValue,k);
if(mindex == -1){ //各記錄號(hào)相等
count++;
if(SomeIsEnd(nStart,k)) //有記錄表已到達(dá)末尾
break;
//都讀下一個(gè)記錄號(hào)
GetRecordNum(SA,nValue,nStart,nEnd,k);
}
else{
//值最小者讀下一個(gè)記錄號(hào)
if(nStart[mindex] == 0) //值最小的串已經(jīng)到達(dá)末端
break;
tmp = SA->GetAt(mindex);
nEnd[mindex] = tmp.Find(';',nStart[mindex]);
if(nEnd[mindex] == -1)
nValue[mindex] = atoi(tmp.Mid(nStart[mindex]).GetBuffer());
else
nValue[mindex] = atoi(tmp.Mid(nStart[mindex],nEnd[mindex] - nStart[mindex]).GetBuffer());
nStart[mindex] = nEnd[mindex] + 1;
}
}
delete nValue;
delete nStart;
delete nEnd;
return count;
}
int MinnIndex(int nIndexArray[],int k)
{
int mV = nIndexArray[0],mI = 0;
BOOL equal = TRUE;
for(int i = 1; i< k; i++){
if(mV != nIndexArray[i]){
equal = FALSE;
mI = mV <= nIndexArray[i]? mI:i;
mV = nIndexArray[mI];
}
}
if(equal == TRUE) //如果所有值相等,則返回-1
mI = -1;
return mI;
}
BOOL SomeIsEnd(int nStart[],int k)
{
for(int i = 0; i < k; i++)
if(nStart[i] == 0)
return TRUE;
return FALSE;
}
void GetRecordNum(CStringArray* SA,int nValue[],int nStart[],int nEnd[],int k)
{
CString tmp;
for(int i = 0;i < k;i++){ //得到SA各節(jié)點(diǎn)的下一個(gè)記錄號(hào)
tmp = SA->GetAt(i);
nEnd[i] = tmp.Find(';',nStart[i]);
if(nEnd[i] == -1)
nValue[i] = atoi(tmp.Mid(nStart[i]).GetBuffer());
else
nValue[i] = atoi(tmp.Mid(nStart[i],nEnd[i] - nStart[i]).GetBuffer());
nStart[i] = nEnd[i] + 1;
}
}
ItemSet* Del_k_ItemSet(ItemSet* IS,int min_count)
{
ItemSet *tmpNode,*preNode,*IShead = IS;
while(IS)
{
tmpNode = NULL;
if(IS->count < min_count){ //刪除當(dāng)前節(jié)點(diǎn)
if(IS == IShead){ //是頭節(jié)點(diǎn)
tmpNode = IShead;
IShead = IS->next;
preNode = IShead;
}
else{
tmpNode = IS;
preNode->next = IS->next;
}
}
else //當(dāng)前節(jié)點(diǎn)保留
preNode = IS;
IS = IS->next;
if(tmpNode)
delete tmpNode;
}
return IShead;
}
ItemSetLink* DeleteLink(ItemSetLink* IS)
{
ItemSetLink* dIS;
while(IS)
{
dIS = IS;
IS = IS->next;
delete dIS;
}
return NULL;
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -