?? l maxfpminer.cpp
字號:
// MiningMaxfp.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define TEMP_INFO_FILE "c:\\tempinfo.txt"
#define PATHLENGTH 100
#define MINNODECOUNT 4000
#define ITEMCOUNT 1000
// Define used types
// Frequency type
typedef unsigned FREQ_TYPE;
// Item type
typedef unsigned ITEM_TYPE;
// Fptree node type
typedef struct FptreeNode{
unsigned nNumber;
FREQ_TYPE fFrequency;
struct FptreeNode *pVertical;
struct FptreeNode *pHorizon;
} FPTREENODE;
// Item information type
typedef struct ItemInfo{
FREQ_TYPE fFrequency;
unsigned nNumber;
} ITEMINFO;
// Memory Block type
typedef struct MemoryBlock{
FPTREENODE *pMBlock;
struct MemoryBlock *pNext;
} MEMORYBLOCK;
// Declare functions
void Initialize();
void Sift(unsigned nFirst,unsigned nEnd);
void Sort();
void BuildFptree();
void ReverseLink();
void MiningMaxfp();
bool EliminateBranches(FPTREENODE *pV_Root,FPTREENODE *pH_Root);
void PrintMaxpatterns();
void FreeMemory();
// Declare global variables
FREQ_TYPE fSupport;
ITEM_TYPE itMaxItem;
unsigned nTotalLine;
ITEMINFO *arrfItemInfo;
ITEM_TYPE *arritNum;
FPTREENODE *pRoot;
FPTREENODE *pM_Root;
FPTREENODE **arrpFpHorizon;
FPTREENODE *pBlock;
MEMORYBLOCK *pMemory;
FPTREENODE *pNodeRecycle,*pHeaders;
ITEM_TYPE *arrNumberToItem;
FREQ_TYPE *arrfFreq;
unsigned nItemCount,nFPMaxlength;
unsigned nNodeCount,nFptreeNodeCount,nMptreeNodeCount;
unsigned long nMaxfpCount;
unsigned _int64 nTotalMined,nTotal;
FILE *fileData;
FILE *fileResult;
float fDegree;
char *strFileName;
char *strResultFile;
clock_t clockStart;
clock_t clockFirstScan;
clock_t clockBuildFpree;
clock_t clockEnd;
// Main program
int main(int argc,char *argv[])
{
// Declaration
FILE *fileInfo;
float fTemp;
char ch,p2[PATHLENGTH+1],p3[PATHLENGTH+1];
if(argc<4)
{
// Open tempory mining information file
if((fileInfo=fopen(TEMP_INFO_FILE,"r"))==NULL)
{
puts("Three parameters are needed:");
puts("Support degree_Result file_Data file.");
printf("Press ENTER to exit");
ch=getchar();
return 0;
}
// Read information from temporary information file
fscanf(fileInfo,"%f%s%s",&fTemp,p2,p3);
p2[PATHLENGTH]='\0';
p3[PATHLENGTH]='\0';
fclose(fileInfo);
if(argc<4) strFileName=p3;
if(argc<3) strResultFile=p2;
if(argc<2) fDegree=fTemp;
}
else{
fDegree=(float)atof(argv[1]);
strResultFile=argv[2];
strFileName=argv[3];
}
// Write information into temporary file
if((fileInfo=fopen(TEMP_INFO_FILE,"w"))!=NULL)
{
fprintf(fileInfo,"%f\n",fDegree);
fprintf(fileInfo,"%s\n",strResultFile);
fprintf(fileInfo,"%s\n",strFileName);
fclose(fileInfo);
}
// Confirm information
puts("Mining information : ...");
printf("Data-file path: %s\n",strFileName);
printf("Supporting degree(%%): %f\n",fDegree);
printf("The result will be written to file: %s\n\n",strResultFile);
printf("Press ENTER to confirm and continue");
ch=getchar();
if(ch!='\n') return 0;
// Initialization
clockStart=clock();
Initialize();
clockFirstScan=clock();
// Print used time in first scaning
printf("done in seconds %.3f\n",(clockFirstScan-clockStart)/(double)CLOCKS_PER_SEC);
// Write information to result file
puts("Data information ...");
fprintf(fileResult,"Data-file path: %s\n",strFileName);
fprintf(fileResult,"Maximum item: %u\n",itMaxItem);
printf("Maximum item: %u\n",itMaxItem);
fprintf(fileResult,"Total lines: %u\n",nTotalLine);
printf("Total lines: %u\n",nTotalLine);
fprintf(fileResult,"Support frequency: %u\n",fSupport);
printf("Support frequency: %u\n\n",fSupport);
fprintf(fileResult,"Supporting degree(%%): %f\n",fDegree);
// Build fptree
printf("Build fptree ... ");
BuildFptree();
clockBuildFpree=clock();
printf("done in seconds %.3f\n",(clockBuildFpree-clockFirstScan)/(double)CLOCKS_PER_SEC);
// MiningMaxfp algorithm
printf("Mining start ... ");
fprintf(fileResult,"Max-patterns are : ...\n\n");
MiningMaxfp();
clockEnd=clock();
printf("done in seconds %.3f\n",(clockEnd-clockBuildFpree)/(double)CLOCKS_PER_SEC);
// Print pattern number and used time
printf("All completed in seconds: %.3f\n\n",(clockEnd-clockStart)/(double)CLOCKS_PER_SEC);
printf("Total pattern number is: %I64u\n",nTotal-1);
printf("Total mined pattern number is: %I64u\n",nTotalMined);
printf("Total max-pattern number is: %lu\n\n",nMaxfpCount);
fprintf(fileResult,"\nTotal pattern number is: %u\n",nTotal-1);
fprintf(fileResult,"Total mined pattern number is: %I64u\n",nTotalMined);
fprintf(fileResult,"Total max-pattern number is: %lu\n",nMaxfpCount);
fprintf(fileResult,"Count of fptree nodes: %u\n",nFptreeNodeCount);
fprintf(fileResult,"Count of max-pattern tree nodes: %u\n",nMptreeNodeCount);
fprintf(fileResult,"Fptree growth completed in seconds: %.3f\n",(clockEnd-clockBuildFpree)/(double)CLOCKS_PER_SEC);
fprintf(fileResult,"All completed in seconds: %.3f\n",(clockEnd-clockStart)/(double)CLOCKS_PER_SEC);
// Close result file
fclose(fileResult);
// Clear the fptree
printf("Delete fptree and max-pattern tree ... ");
FreeMemory();
puts("done\n");
printf("Press ENTER to exit");
ch=getchar();
return 0;
}
// Initialize the variables
void Initialize()
{
unsigned i,nLength,nTemp;
float fTemp;
ITEM_TYPE item;
char ch;
// Open data file
if((fileData=fopen(strFileName,"r"))==NULL)
{
printf("\nCan't open fptree data file. \n");
printf("Press ENTER to exit");
ch=getchar();
exit(1);
}
// Allocation
arrfItemInfo=new ITEMINFO[ITEMCOUNT];
// Initialize item information
for(i=0;i<ITEMCOUNT;i++)
{
// Initialize information of each item
arrfItemInfo[i].fFrequency=0;
arrfItemInfo[i].nNumber=0;
}
// Initialization
nFPMaxlength=0;
nTotalLine=0;
itMaxItem=0;
nLength=ITEMCOUNT;
// Caculate the frequency of each item while scan the data file
printf("\nGet data information and sort ... ");
while(!feof(fileData))
{
do
{
fscanf(fileData,"%u ",&item);
if(item==-1)
{
nTotalLine++;
if(!feof(fileData)) continue;
break;
}
if(item>itMaxItem)
{
itMaxItem=item;
if(itMaxItem>=nLength)
{
nTemp=nLength;
while(nLength<=itMaxItem) nLength+=ITEMCOUNT;
arrfItemInfo=(ITEMINFO *)realloc(arrfItemInfo,nLength*sizeof(ITEMINFO));
for(i=nTemp;i<nLength;i++)
{
// Initialize information of each item
arrfItemInfo[i].fFrequency=0;
arrfItemInfo[i].nNumber=0;
}
}
}
arrfItemInfo[item].fFrequency++;
}while(1);
}
// Extract the array
realloc(arrfItemInfo,(itMaxItem+1)*sizeof(ITEMINFO));
// Caculate the support frequency
fTemp=nTotalLine*fDegree/100;
fSupport=(FREQ_TYPE)fTemp;
if(fTemp-fSupport>0.0001) fSupport++;
// Create result file
if((fileResult=fopen(strResultFile,"w"))==NULL)
{
delete[]arrfItemInfo;
printf("\nCan't create result file: %s\n",strResultFile);
printf("Press ENTER to exit");
ch=getchar();
exit(2);
}
// close data file
fclose(fileData);
// Sort the items in term of frequency
Sort();
}
// Heap-sort algorithm
void Sort()
{
ITEM_TYPE itTemp,i;
unsigned j;
// Create temporary array
arritNum=new ITEM_TYPE[itMaxItem+2];
// Initialize the itemfirst-array
nItemCount=0;
for(i=0;i<=itMaxItem;i++)
{
if(arrfItemInfo[i].fFrequency<fSupport) continue;
nItemCount++;
arritNum[nItemCount]=i;
}
// If no frequent items, exit program
if(!nItemCount)
{
delete[]arritNum;
delete[]arrfItemInfo;
printf("no frequent items ... done\n\n");
printf("Press ENTER to exit");
getchar();
exit(0);
}
// Create horizontal links
arrpFpHorizon=new FPTREENODE *[nItemCount+1];
// Create number to item array
arrNumberToItem=new ITEM_TYPE[nItemCount+1];
// Create frequency array
arrfFreq=new FREQ_TYPE[nItemCount+2];
// Build heap
for(j=nItemCount/2;j>0;j--) Sift(j,nItemCount);
// Sort the elements and store their informations
for(j=nItemCount;j>0;j--)
{
itTemp=arritNum[1];
arritNum[1]=arritNum[j];
arrfItemInfo[itTemp].nNumber=j;
arrNumberToItem[j]=itTemp;
arrpFpHorizon[j]=0;
arrfFreq[j]=0;
Sift(1,j-1);
}
// Free allocated memory
delete[]arritNum;
}
// Heap adjustment
void Sift(unsigned nFirst,unsigned nEnd)
{
// Declare variables
FREQ_TYPE fTemp,fMin,fT;
ITEM_TYPE itTemp;
unsigned nCurrent,nNext;
// Initialization
itTemp=arritNum[nFirst];
fTemp=arrfItemInfo[itTemp].fFrequency;
nCurrent=nFirst;
nNext=nCurrent+nCurrent;
// Adjust the heap to a sorted-tree
while(nNext<=nEnd)
{
// Get the smaller son
fMin=arrfItemInfo[arritNum[nNext]].fFrequency;
if(nNext<nEnd && fMin>(fT=arrfItemInfo[arritNum[nNext+1]].fFrequency))
{
fMin=fT;
nNext++;
}
// Deal with next level
if(fTemp<=fMin) break;
else{
arritNum[nCurrent]=arritNum[nNext];
nCurrent=nNext;
nNext=nCurrent+nCurrent;
}
}
// End the adjustment
arritNum[nCurrent]=itTemp;
}
// Build Fptree
void BuildFptree()
{
// Declare variables
unsigned i,j,nCount,nTemp,nN,*arrnTick;
FPTREENODE *pAnces,*pPre,*pQuery,*pInsert;
MEMORYBLOCK *pTempMemory;
ITEM_TYPE item;
char ch;
// Initialize fptree node count
nFptreeNodeCount=1;
// Ensure an opened data-file
if((fileData=fopen(strFileName,"r"))==NULL)
{
printf("\nCan't open fptree data file. \n");
printf("Press ENTER to exit");
ch=getchar();
exit(3);
}
// Create arrnTick array and initialize
arrnTick=new unsigned[nItemCount+1];
for(i=0;i<=nItemCount;i++) arrnTick[i]=0;
// Create first memory block
pMemory=new MEMORYBLOCK;
pMemory->pNext=0;
pBlock=new FPTREENODE[MINNODECOUNT];
pMemory->pMBlock=pBlock;
// Initialize fptree root
pRoot=pBlock;
pRoot->nNumber=0;
pRoot->pVertical=0;
pM_Root=pBlock+1;
pM_Root->nNumber=nItemCount+1;
pM_Root->pVertical=pRoot;
nNodeCount=2;
// Build the fptree
while(!feof(fileData))
{
// Initialize nCount
nCount=0;
// Read another transaction and sort
do
{
// Read another item
fscanf(fileData,"%u ",&item);
if(item==-1) break;
nTemp=arrfItemInfo[item].nNumber;
// Ensure an valid item
if(nTemp!=0)
{
// Find the biggest-smaller integer
i=nCount;
while(arrnTick[i]>=nTemp) i--;
// Verify existence
i++;
if(arrnTick[i]!=nTemp)
{
arrfFreq[nTemp]++;
nCount++;
for(j=nCount;j>i;j--) arrnTick[j]=arrnTick[j-1];
arrnTick[i]=nTemp;
}
}
// If not transaction end , continue
}while(1);
// Ensure an unempty transaction
if(nCount==0) continue;
// Get maximum-length
if(nCount>nFPMaxlength) nFPMaxlength=nCount;
// Initialization
i=1;
pAnces=pRoot;
// Scan the turns of valid items in a transaction
do
{
// Get the head-node
pPre=pAnces->pVertical;
// Check descendant NULL
if(pPre==0)
{
do
{
// Get the head-node
nN=arrnTick[i];
// Create a new fpnode and initialize it
if(nNodeCount==MINNODECOUNT)
{
pTempMemory=pMemory;
pMemory=new MEMORYBLOCK;
pMemory->pNext=pTempMemory;
pBlock=new FPTREENODE[MINNODECOUNT];
pMemory->pMBlock=pBlock;
nNodeCount=0;
}
pQuery=pBlock+nNodeCount;
nNodeCount++;
nFptreeNodeCount++;
pQuery->nNumber=nN+nItemCount;
pQuery->fFrequency=1;
pQuery->pHorizon=pQuery;
// Set ancestor-node
pAnces->pVertical=pQuery;
pAnces=pQuery;
// Inspect next item
arrnTick[i]=0;
i++;
}while(i<=nCount);
// End vertical link
pAnces->pVertical=0;
break;
}
// Get the current number
nN=arrnTick[i];
// Have finded it
if(pPre->nNumber==nN+nItemCount)
{
pPre->fFrequency++;
pAnces=pPre;
// Inspect next item
arrnTick[i]=0;
i++;
if(i<=nCount) continue;
break;
}
// Browse in link
pQuery=pPre->pHorizon;
while(pQuery->nNumber<nN)
{
pPre=pQuery;
pQuery=pPre->pHorizon;
}
// Deal with the result
if(pQuery->nNumber>nN)
{
// Create a new node and initialize it
if(nNodeCount==MINNODECOUNT)
{
pTempMemory=pMemory;
pMemory=new MEMORYBLOCK;
pMemory->pNext=pTempMemory;
pBlock=new FPTREENODE[MINNODECOUNT];
pMemory->pMBlock=pBlock;
nNodeCount=0;
}
pInsert=pBlock+nNodeCount;
nNodeCount++;
nFptreeNodeCount++;
pInsert->fFrequency=1;
pInsert->nNumber=nN;
// Link it to brother link
pInsert->pHorizon=pQuery;
pPre->pHorizon=pInsert;
// Set ancestor node
pAnces=pInsert;
// Go to next item
arrnTick[i]=0;
i++;
while(i<=nCount)
{
// Get the head-node
nN=arrnTick[i];
// Create a new fpnode and initialize it
if(nNodeCount==MINNODECOUNT)
{
pTempMemory=pMemory;
pMemory=new MEMORYBLOCK;
pMemory->pNext=pTempMemory;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -