?? fptn20.c
字號:
aNode = headerTable[baseIndex]; while (aNode != NULL) { aNode2 = aNode->parent; count = 0; while (aNode2 != T) { for (i=0; i < conHeaderSize; i++) if (aNode2->item == conLargeItem[i]) { freqItemP[count] = aNode2->item; indexList[count] = i; count++; break; } aNode2 = aNode2->parent; } q_sortA(indexList, freqItemP, 0, count-1, count); path = 0; insert_tree(&(freqItemP[0]), &(indexList[0]), aNode->count, 0, count, *conRoot, *conHeader, &path); aNode = aNode->hlink; } free(freqItemP); free(indexList); return;}void FPgrowth(FPTreeNode T, FPTreeNode *headerTableLink, int headerSize, int *baseItems, int baseSize){ int count; int i, j; int *pattern; int patternSupport; int conHeaderSize; FPTreeNode *conHeader; FPTreeNode conRoot; FPTreeNode aNode, aNode2; int *conLargeItem; int *conLargeItemSupport; int localStartHeader=startHeader; if (baseSize >= realK) return; if (T == NULL) return; if (T->numPath == 1) { //printf("single path\n"); conLargeItem = (int *) malloc (sizeof(int) * headerSize); conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize); if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) { printf("out of memory\n"); exit(1); } count = 0; if (T->children != NULL) aNode = T->children->node; while (aNode != NULL) { conLargeItem[count] = aNode->item; conLargeItemSupport[count] = aNode->count; count++; if (aNode->children != NULL) aNode = aNode->children->node; else aNode = NULL; } combine(conLargeItem, conLargeItemSupport, 0, count, baseItems, baseSize); free(conLargeItem); free(conLargeItemSupport); } else { //printf("multiple path\n"); pattern = (int *) malloc (sizeof(int) * (baseSize + 1)); if (pattern == NULL) { printf("out of memory\n"); exit(1); } conLargeItem = (int *) malloc (sizeof(int) * headerSize); conLargeItemSupport = (int *) malloc (sizeof(int) * headerSize); if ((conLargeItem == NULL) || (conLargeItemSupport == NULL)) { printf("out of memory\n"); exit(1); } for (j=0; j < baseSize; j++) { pattern[j+1] = baseItems[j]; } if (localStartHeader > headerSize) localStartHeader = headerSize; for (i=localStartHeader-1; i >= 0; i--) { pattern[0] = headerTableLink[i]->item; aNode = headerTableLink[i]; patternSupport = 0; while (aNode != NULL) { patternSupport += aNode->count; aNode = aNode->hlink; } if ((baseSize > 0) && (patternSupport < oldThreshold) && (patternSupport >= thresholdK[baseSize])) addToLargeList(pattern, patternSupport, baseSize); /* generate conditional pattern base */ for (j=0; j < headerSize; j++) conLargeItemSupport[j] = 0; aNode = headerTableLink[i]; conHeaderSize = 0; while (aNode != NULL) { aNode2 = aNode->parent; while (aNode2 != T) { for (j=0; j < headerSize; j++) if (aNode2->item == headerTableLink[j]->item) { conLargeItemSupport[j] += aNode->count; if ((conLargeItemSupport[j] >= threshold) && (conLargeItemSupport[j] - aNode->count < threshold)) { conLargeItem[j] = aNode2->item; conHeaderSize++; } break; } aNode2 = aNode2->parent; } aNode = aNode->hlink; } /* generate conditional pattern tree */ if (conHeaderSize > 0) { q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize); buildConTree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport, T, headerTableLink, i, headerSize); FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1); free(conHeader); destroyTree(conRoot, 1); } } for (i=localStartHeader; i < headerSize; i++) { pattern[0] = headerTableLink[i]->item; aNode = headerTableLink[i]; patternSupport = 0; while (aNode != NULL) { patternSupport += aNode->count; aNode = aNode->hlink; } if (patternSupport < threshold) break; if ((baseSize > 0) && (patternSupport < oldThreshold) && (patternSupport >= thresholdK[baseSize])) addToLargeList(pattern, patternSupport, baseSize); /* generate conditional pattern base */ for (j=0; j < headerSize; j++) conLargeItemSupport[j] = 0; aNode = headerTableLink[i]; conHeaderSize = 0; while (aNode != NULL) { aNode2 = aNode->parent; while (aNode2 != T) { for (j=0; j < headerSize; j++) if (aNode2->item == headerTableLink[j]->item) { conLargeItemSupport[j] += aNode->count; if ((conLargeItemSupport[j] >= threshold) && (conLargeItemSupport[j] - aNode->count < threshold)) { conLargeItem[j] = aNode2->item; conHeaderSize++; } break; } aNode2 = aNode2->parent; } aNode = aNode->hlink; } /* generate conditional pattern tree */ if (conHeaderSize > 0) { q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize); buildConTree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport, T, headerTableLink, i, headerSize); FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1); free(conHeader); destroyTree(conRoot, 1); } } free(pattern); free(conLargeItem); free(conLargeItemSupport); } return;}int findCombination(int n, int m){ int factorial1=1, factorial2=1; int i; for (i=n; i > (n-m); i--) factorial1 *= i; for (i=m; i > 1; i--) factorial2 *= i; return (factorial1 / factorial2);}void findLimit(){ int i, j; for (i=1; i < realK; i++) { for (j=i+1; j < realK; j++) { limit[i] += findCombination(j, i); } if (limit[i] > N) limit[i] = N; } return;}void pass1(){ int transSize; int item; int maxSize=0; FILE *fp; int i, j; support1 = (int *) malloc (sizeof(int) * numItem); largeItem1 = (int *) malloc (sizeof(int) * numItem); if ((support1 == NULL) || (largeItem1 == NULL)) { printf("out of memory\n"); exit(1); } limit = (int *) malloc (sizeof(int) * numItem); if (limit == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < numItem; i++) limit[i] = 0; for (i=0; i < numItem; i++) { support1[i] = 0; largeItem1[i] = i; } /* scan DB to count frequency of each item */ if ((fp = fopen(dataFile, "r")) == NULL) { printf("Can't open data file, %s.\n", dataFile); exit(1); } for (i=0; i < numTrans; i++) { fscanf(fp, "%d", &transSize); if (transSize > maxSize) maxSize = transSize; (limit[transSize-1])++; for (j=0; j < transSize; j++) { fscanf(fp, "%d", &item); (support1[item])++; } } fclose(fp); /* initialize large itemset list and support list */ realK = expectedK; if ((maxSize < expectedK) || (expectedK <= 0)) realK = maxSize; printf("maxSize = %d\nrealK = %d\n", maxSize, realK); largeItemset = (LargeItemPtr *) malloc (sizeof(LargeItemPtr) * realK); numLarge = (int *) malloc (sizeof(int) * realK); if ((largeItemset == NULL) || (numLarge == NULL)) { printf("out of memory\n"); exit(1); } for (i=0; i < realK; i++) { largeItemset[i] = NULL; numLarge[i] = 0; } /* sort the support in ascending order */ q_sortD(&(support1[0]), largeItem1, 0, numItem-1, numItem); /* for (i=0; i < numItem; i++) printf("%d[%d] ", largeItem1[i], support1[i]); printf("\n"); */ headerTableSize = N; while ((headerTableSize < numItem) && (support1[N-1] == support1[headerTableSize])) headerTableSize++; numLarge[0] = headerTableSize; headerTableSize = numItem; while (support1[headerTableSize-1] < 1) headerTableSize--; /* if (headerTableSize > realK) threshold = support1[N-1]; else { headerTableSize = realK; while ((headerTableSize < numItem) && (support1[realK] == support1[headerTableSize])) headerTableSize++; if (realK < numItem) threshold = support1[realK]; else threshold = support1[realK-1]; } */ printf("\nheaderTableSize=%d\n", headerTableSize); printf("numLarge[0]=%d\n", numLarge[0]); findLimit(); return;}void buildTree(){ int *freqItemP; int *indexList; int count; FILE *fp; int transSize; int item; int i, j, m; int path; /* build header table */ headerTableLink = (FPTreeNode *) malloc (sizeof(FPTreeNode) * headerTableSize); if (headerTableLink == NULL) { printf("out of memory\n"); exit(1); } for (i=0; i < numItem; i++) headerTableLink[i] = NULL; /* create root of the FP-tree */ root = (FPTreeNode) malloc (sizeof(FPNode)); if (root == NULL) { printf("out of memory\n"); exit(1); } root->numPath = 1; root->parent = NULL; root->children = NULL; root->hlink = NULL; freqItemP = (int *) malloc (sizeof(int) * numItem); if (freqItemP == NULL) { printf("out of memory\n"); exit(1); } indexList = (int *) malloc (sizeof(int) * numItem); if (indexList == NULL) { printf("out of memory\n"); exit(1); } /* scan DB and insert frequent items into the FP-tree */ if ((fp = fopen(dataFile, "r")) == NULL) { printf("Can't open data file, %s.\n", dataFile); exit(1); } for (i=0; i < numTrans; i++) { fscanf(fp, "%d", &transSize); count = 0; path = 0; for (j=0; j < transSize; j++) { fscanf(fp, "%d", &item); for (m=0; m < headerTableSize; m++) { if (item == largeItem1[m]) { freqItemP[count] = item; indexList[count] = m; count++; break; } } } q_sortA(indexList, freqItemP, 0, count-1, count); insert_tree(&(freqItemP[0]), &(indexList[0]), 1, 0, count, root, headerTableLink, &path); } fclose(fp); free(freqItemP); free(indexList); return;}void displayResult(){ LargeItemPtr aLargeItemset; FILE *fp; int count; int support; int i, j; if ((fp = fopen(outFile, "w")) == NULL) { printf("Can't open data file, %s.\n", outFile); exit(1); } fprintf(fp, "%d\n\n", realK); fprintf(fp, "%d\n", numLarge[0]); if (numLarge[0] > 0) { printf("\nLarge %d-itemsets[support]:\n", 1); for (i=0; i < numLarge[0]; i++) { printf("%d ", largeItem1[i]); fprintf(fp, "%d ", largeItem1[i]); printf("[%d]\n", support1[i]); fprintf(fp, " %d\n", support1[i]); } printf("\n"); fprintf(fp, "\n"); } for (i=1; i < realK; i++) { if (numLarge[i] == 0) break; fprintf(fp, "%d\n", numLarge[i]); printf("\nLarge %d-itemsets[support]:\n", i+1); aLargeItemset = largeItemset[i]; count = 0; while (aLargeItemset != NULL) { count++; if (count == N) support = aLargeItemset->support; else if (count > N) if (aLargeItemset->support < support) break; for (j=0; j <= i; j++) { printf("%d ", aLargeItemset->itemset[j]); fprintf(fp, "%d ", aLargeItemset->itemset[j]); } printf("[%d]\n", aLargeItemset->support); fprintf(fp, " %d\n", aLargeItemset->support); aLargeItemset = aLargeItemset->next; } printf("\n"); fprintf(fp, "\n"); } fclose(fp); return;}void input(char *configFile){ FILE *fp; float thresholdDecimal; if ((fp = fopen(configFile, "r")) == NULL) { printf("Can't open config. file, %s.\n", configFile); exit(1); } fscanf(fp, "%d %d %f %d %d %f", &N, &expectedK, &thresholdDecimal, &numItem, &numTrans, &decreaseRate); fscanf(fp, "%s %s %s", dataFile, treeFile, outFile); fclose(fp); printf("N = %d\n", N); printf("expectedK = %d\n", expectedK); printf("thresholdDecimal = %f (dummy)\n", thresholdDecimal); printf("numItem = %d\n", numItem); printf("numTrans = %d\n", numTrans); printf("decreaseRate = %f\n", decreaseRate); printf("dataFile = %s\n", dataFile); printf("treeFile = %s\n", treeFile); printf("outFile = %s\n\n", outFile); threshold = thresholdDecimal * numTrans; printf("threshold = %d (dummy)\n", threshold); oldThreshold = numTrans; if (N > numItem) { printf("N > numItem\n"); exit(1); } return;}void main(int argc, char *argv[]){ float userTime, sysTime; struct rusage myTime1, myTime2; /* usage ------------------------------------------*/ printf("fptN20.c\n"); printf("FP-tree (finding N-most interesting large itemsets)\n"); if (argc != 3) { printf("Usage: fptN20 <config. file> <where to start scanning the header table>\n"); exit(1); } /* read input parameters --------------------------*/ printf("input\n"); input(argv[1]); startHeader = atoi(argv[2]); getrusage(RUSAGE_SELF,&myTime1); /* pass 1 -----------------------------------------*/ printf("\npass1\n"); pass1(); if (startHeader == 0) startHeader = headerTableSize / 2; printf("startHeader=%d\n", startHeader); /* create FP-tree --------------------------*/ printf("\nbuildTree\n"); buildTree(); /* initialize thresholdK -------------------*/ printf("\ninitThreshold\n"); initThresholdK(); round++; printf("round = %d, threshold = %d\n", round, threshold); /* mining frequent patterns ----------------*/ printf("\npassK\n"); FPgrowth(root, headerTableLink, headerTableSize, NULL, 0); getrusage(RUSAGE_SELF,&myTime2); /* output result of large itemsets ----------------*/ printf("\nresult\n"); displayResult(); /* free memory ------------------------------------*/ printf("\ndestroy\n"); destroy(); /* output running time ----------------------------*/ printf("build tree time:\n"); userTime = ((float) (myTime2.ru_utime.tv_sec - myTime1.ru_utime.tv_sec)) + ((float) (myTime2.ru_utime.tv_usec - myTime1.ru_utime.tv_usec)) * 1e-6; sysTime = ((float) (myTime2.ru_stime.tv_sec - myTime1.ru_stime.tv_sec)) + ((float) (myTime2.ru_stime.tv_usec - myTime1.ru_stime.tv_usec)) * 1e-6; printf("User time : %f seconds\n",userTime); printf("System time : %f seconds\n\n",sysTime); return;}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -