?? dffast.cc
字號:
//// C++ program that finds frequent itemsets - file dffast.cc// December 12, 2003// Wim Pijls and Walter Kosters// Erasmus University Rotterdam and Leiden University, The Netherlands// pijls@few.eur.nl and kosters@liacs.nl// http://www.liacs.nl/home/kosters/df///// makefile: g++ -Wall -O3 -o fim_all dffast.cc//// Assume that the relevant database and the trie containing all frequent// sets together fit in main memory; relevant here means: all transactions// that contain at least 2 frequent items (the so-called relevant// transactions), whereas only frequent items are considered. Every byte // contains 8 bits of the database. So its size is the number of // frequent items times the number of relevant transactions divided by 8.//// The data is read four times:// 1. to find minimal and maximal item number that occurs// 2. to determine the frequencies of the single item numbers// 3. to find the relevant transactions (containing at least 2 frequent items)// 4. to store the (relevant) database in main memory//// Several (time) printing commands are commented out with ////// This version (dffast.cc) replaces previous versions dfmemory.cc// and dftime.cc; main difference: memory management is improved// in order to avoid unnecessary allocations, which makes different// memory/time efficient versions obsolete.// Runtimes are comparable with those of its predecessors// // name inputfile:#define input_filename argv[1]// (absolute) value of minsup:#define macro_minsup atoi(argv[2])// name outputfile:#define output_filename argv[3]#include <iostream>#include <fstream>#include <cstdio>#include <cstdlib>#include <ctime>using namespace std;typedef char *transaction;const int MAXDEPTH = 100; // maximal depth of trie (for printing)int minsup; // minimum supportint therow; // number of currect transactiontransaction globpointer; // current transactiontransaction *dataset; // the whole databasechar vector[8]; // masks for fastchar *supervector; // array addressingclass initialcounts{ public: initialcounts (char *inputdata); static int *itemsorder; static int *items_frequency; static short *ranking; static int min_itemnr, max_itemnr; static int number_transactions, number_freq_items, lines; private: void first_pass ( ); void second_pass ( ); void third_pass ( ); void initialsort ( ); char *infilename; int *init_items_frequency;};class data_array{ public: data_array (char *inputdata); private: char *next_transaction; void inputread (char *inputdata);};struct bucket{ short itemvalue; int count; int aux; // if 2-itemsets have support < 60000, change // "int" into "unsigned short" (twice) short number_followers; struct bucket *next;};void count (bucket *trienode, short number_buckets);// does currect transaction (globpointer) contain item "column"?inline int inspect (int column){ return ( ( globpointer[column >> 3] & supervector[column] ) );}//inspectclass trie{ public: trie ( ) { }; trie (initialcounts & initialdata, data_array & datagrid); void build_up ( ); void printtrie (char *outputdata); private: int length_count[MAXDEPTH]; void extend (int k); void makeaux0 (bucket *root, int number); FILE *outfilename; struct bucket *root; int triesize; int results[MAXDEPTH]; void copying (struct bucket *p, struct bucket *q, int number_q_buckets); void printout (int depth, struct bucket *trienode, int number_buckets);};int *initialcounts::items_frequency = NULL;short *initialcounts::ranking = NULL;int *initialcounts::itemsorder = NULL;int initialcounts::number_transactions = 0;int initialcounts::lines = 0;int initialcounts::number_freq_items = 0;int initialcounts::min_itemnr = 0;int initialcounts::max_itemnr = 0;// constructorinitialcounts::initialcounts (char *inputdata){ infilename = inputdata; first_pass ( ); second_pass ( ); third_pass ( ); initialsort ( );}//initialcounts::initialcounts// computes minimal and maximal item number that occur in the database;// if these are known in advance, this function can be easily adapted// function reads whole file!void initialcounts::first_pass ( ){ int itemnr; bool first = true; ifstream fin (infilename); if ( ! fin ) cout << "No such filename" << endl; char c; int pos; do { do { fin.get (c); itemnr = 0; pos = 0; while ( ( c >= '0' ) && ( c <= '9' ) && ! fin.eof ( ) ) { itemnr = 10*itemnr + (int)(c) - (int)('0'); pos++; fin.get (c); }//while if ( pos ) { if ( first ) max_itemnr = min_itemnr = itemnr; first = false; if ( itemnr < min_itemnr ) min_itemnr = itemnr; else if ( itemnr > max_itemnr ) max_itemnr = itemnr; }//if } while ( c != '\n' && ! fin.eof ( ) ); } while ( ! fin.eof ( ) ); fin.close ( );}//initialcounts::first_pass// determines frequency for all items, and number of frequent items// function reads whole file!void initialcounts::second_pass ( ){ int k; ifstream fin (infilename); if ( ! fin ) cout << "No such filename" << endl; int itemrange = max_itemnr-min_itemnr+1; init_items_frequency = new int[itemrange]; for ( k = 0; k < itemrange; k++ ) init_items_frequency[k] = 0; char c; int item, pos; do { do { fin.get (c); item = 0; pos = 0; while ( ( c >= '0' ) && ( c <= '9' ) && ! fin.eof ( ) ) { item = 10*item + (int)(c) - (int)('0'); pos++; fin.get (c); }//while if ( pos ) init_items_frequency[item-min_itemnr]++; } while ( c != '\n' && ! fin.eof ( ) ); } while ( ! fin.eof ( ) ); fin.close ( ); for ( k = 0; k < itemrange; k++ ) if ( init_items_frequency[k] >= minsup ) number_freq_items++;// printf ("Number of frequent items: %d\n", number_freq_items);}//initialcounts::second_pass// determines number of relevant transactions, i.e., those// that contain at least two frequent items// (those that contain one frequent item have already been accounted for// while determining the frequency of the single items)// function reads whole file!void initialcounts::third_pass ( ){ number_transactions = 0; ifstream fin (infilename); if ( ! fin ) cout << "No such filename" << endl; char c; int item, pos, items_in_trans; bool line; do { line = false; items_in_trans = 0; do { fin.get (c); item = 0; pos = 0; while ( ( c >= '0' ) && ( c <= '9' ) && ! fin.eof ( ) ) { item = 10*item + (int)(c) - (int)('0'); pos++; line = true; fin.get (c); }//while if ( pos && init_items_frequency[item-min_itemnr] >= minsup ) items_in_trans++; } while ( c != '\n' && ! fin.eof ( ) ); if ( line ) lines++; if ( items_in_trans >= 2 ) number_transactions++; } while ( ! fin.eof ( ) );// printf ("Number of relevant transactions: %d\n", number_transactions);// printf ("Number of lines: %d\n", lines); fin.close ( );}//initialcounts::third_pass// sort items with respect to support - and renumbervoid initialcounts::initialsort ( ){ int left_cursor = 0; int right_cursor = max_itemnr-min_itemnr; int itemrange = right_cursor+1; int *items_numbers; int i, j, k; items_numbers = new int[number_freq_items]; for ( k = 0; k < number_freq_items; k++ ) items_numbers[k] = k; while ( left_cursor < right_cursor ) {
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -