?? dci.cc
字號:
if (!DCI_dataset.VD_is_allocated()) { // trunc and set global pruning mask only if DCP continues oo.trunc(); counters.set_num_of_trans(oo.get_num_of_trans()); counters.init_global_pruning(); } T1 count; int num_freq=0, k=0; set_of_frequents<T,T1> *set_freq; set_freq = new set_of_frequents<T,T1>(2); T t_mapped[2]; if (write_output) { // dump to file frequent 2-itemsets FSout o(OUTF, 2); // Iter=2 if(!o.isOpen()) { cerr << OUTF << " could not be opened for writing!" << endl; exit(1); } static const int SZ_NUM = 128; char cand_unmapped[SZ_NUM * (2+1)]; // +1 for storing count int sz1, sz2, num_written; // loop over the first item for (int i=0; i < m1-1; i++) { t_mapped[0] = i; memcpy(&cand_unmapped[0], counters.unmap_ascii[i], counters.unmap_ascii_len[i]); sz1 = counters.unmap_ascii_len[i]; // loop over the second item for (int j=i+1; j < m1; j++) { t_mapped[1] = j; memcpy(&cand_unmapped[sz1], counters.unmap_ascii[j], counters.unmap_ascii_len[j]); sz2 = sz1 + counters.unmap_ascii_len[j]; count = c.get_cand_count(k++); if (count >= (T1) counters.min_count){ // yes, itemset (i,j) is frequent num_freq++; if (!DCI_dataset.VD_is_allocated()) { // prune only if DCP continues counters.incr_global_pruning((unsigned int) i); counters.incr_global_pruning((unsigned int) j); } set_freq->add_itemset(t_mapped, count); num_written = sprintf(&cand_unmapped[sz2], "(%d)\n", count); o.printSet(cand_unmapped, sz2+num_written); } } } } else { // DON't dump to file frequent 2-itemsets // loop over the first item for (int i=0; i < m1-1; i++) { t_mapped[0] = i; // loop over the second item for (int j=i+1; j < m1; j++) { t_mapped[1] = j; count = c.get_cand_count(k++); if (count >= (T1) counters.min_count){ // yes, itemset (i,j) is frequent num_freq++; if (!DCI_dataset.VD_is_allocated()) { // prune only if DCP continues counters.incr_global_pruning((unsigned int) i); counters.incr_global_pruning((unsigned int) j); } set_freq->add_itemset(t_mapped, count); } } } } if (!DCI_dataset.VD_is_allocated()) // prune only if DCP continues counters.end_global_pruning(2); // parameter: K=2#ifdef VERBOSE print_statistics("DCP", 2, m1*(m1-1)/2, num_freq, time.ReadChronos());#else printf("%d\n",num_freq);#endif return set_freq;}// performs the current iteration with DCP. // If possible it builds VD on the flytemplate <class T, class T1>void DCP_iter(int iter, int max_trans_len, dci_items& counters, DCP_candidates<T,T1>& c, set_of_frequents<T,T1>& next_freq, DCI_vertical_dataset<T>& DCI_dataset){ Chronos time; time.StartChronos(); next_freq.reset(iter); binFSin<T> ii(TEMPORARY_DATASET); if(!ii.isOpen()) { cerr << TEMPORARY_DATASET << " could not be opened!" << endl; exit(1); } bool create=false; binFSout<T> oo(TEMPORARY_DATASET, create); if(!oo.isOpen()) { cerr << TEMPORARY_DATASET << " could not be opened!" << endl; exit(1); } int m1 = counters.get_m1(); dci_transaction<T> t(max_trans_len, iter, m1); bool is_remapped=false; if (DCI_dataset.VD_is_allocated()) { is_remapped = counters.update_map(); // new mapping of items!!! } unsigned int n_tr=0; while(ii.getNextTransaction(t)) { t.prune_global(counters); // t.t_len = 0 or at least iter if (t.t_len > 0) { t.init_prune_local(); c.subset_and_count_and_prune_local(t); t.prune_local(); // t.t_len = 0 or at least iter+1 if (DCI_dataset.VD_is_allocated()) { // write the trans in VD on the fly if (is_remapped) { for (unsigned int h=0; h<t.t_len; h++) t.elements[h] = counters.map[t.elements[h]]; } DCI_dataset.write_t_in_VD(n_tr, t); n_tr++; } else oo.writeTransaction(t); // write pruned trans } } if (!DCI_dataset.VD_is_allocated()) {// trunc and prune only if DCP continues oo.trunc(); counters.set_num_of_trans(oo.get_num_of_trans()); counters.init_global_pruning(); } // Dump frequents T1 count; int num_freq=0; T *v, *v_remapped; v = new T[iter]; v_remapped = new T[iter]; int ind; T1 tmp_c; c.init_read_itemsets(); while ((ind=c.read_next_itemset(v, tmp_c)) != -1) { if ((count = c.get_count(ind)) >= (T1) counters.min_count) { num_freq++; if (!DCI_dataset.VD_is_allocated()) { // prune only if DCP continues for (int i=0; i<iter; i++) { counters.incr_global_pruning((unsigned int) v[i]); } next_freq.add_itemset(v, count); } else { if (is_remapped) { for (int i=0; i<iter; i++) v_remapped[i] = (T) counters.map[v[i]]; // re-map items next_freq.add_itemset(v_remapped, count); } else next_freq.add_itemset(v, count); } } } delete [] v; delete [] v_remapped; if (DCI_dataset.VD_is_allocated() && is_remapped) counters.update_unmap(write_output); // new mapping of items!!! if (write_output) { // dump to file frequent itemsets FSout o(OUTF, iter); if(!o.isOpen()) { cerr << OUTF << " could not be opened for writing!" << endl; exit(1); } next_freq.dump_itemsets(counters, o); } if (!DCI_dataset.VD_is_allocated()) // prune only if DCP continues counters.end_global_pruning(iter); #ifdef VERBOSE print_statistics("DCP", iter, c.get_num_candidates(), num_freq, time.ReadChronos());#else printf("%d\n",num_freq);#endif}// performs the current iteration with DCI // by using the optimizations for sparse datasetstemplate <class T, class T1>void DCI_iter_diffuse(int iter,dci_items& counters, set_of_frequents<T,T1>& previous_freq, set_of_frequents<T,T1>& next_freq, DCI_vertical_dataset<T>& DCI_dataset){ Chronos time; time.StartChronos(); static bool pruning_flag = true; pruning_flag = ! pruning_flag; next_freq.reset(iter); if (!previous_freq.init_gen_cand()) return; static bool first_order=false; T *cand; T *CACHE_items; cand = new T[iter]; CACHE_items = new T[iter]; CACHE_items[0] = counters.get_m1() - 1; // init CACHE - surely different !!! int num_freq = 0; int num_cand = 0; int cand_type; int count; previous_freq.get_prefix(cand); previous_freq.get_suffix(&cand[iter - 2]); num_cand++; cand_type = NEW_PREFIX; DCI_statistics stats; stats.reset_stats(); DCI_dataset.reset_prune_mask(); DCI_dataset.init_cache(iter); counters.init_flag_item(); counters.init_first_item_counts(); while (1) { int start; if (cand_type == NEW_PREFIX) start = 0; else start = iter - 2; int prefix_len; for (prefix_len = start; prefix_len < iter-1; prefix_len++) { if (cand[prefix_len] != CACHE_items[prefix_len]) break; } for (int i = prefix_len; i < iter; i++) { // copy to cache CACHE_items[i] = cand[i]; } if (DCI_dataset.candidate_is_frequent_diffuse(cand, prefix_len, iter, counters.min_count, count, stats, pruning_flag)) { num_freq++; next_freq.add_itemset(cand, (T1) count); if (pruning_flag) { for (int i = 0; i < iter; i++) counters.flag_item[cand[i]] = true; counters.first_item_counts[cand[0]]++; } } cand_type = previous_freq.next_cand(); if (cand_type == END_GEN_CAND) break; else if (cand_type == NEW_SUFFIX) previous_freq.get_suffix(&cand[iter-2]); else { previous_freq.get_prefix(cand); previous_freq.get_suffix(&cand[iter-2]); } num_cand++; } int COMPLEXITY_INTERSECTION, COMPLEXITY_PRUNING; int new_tid_list_size; if (pruning_flag) { int sz_list_proj; // aggiunto // aggiunto parametro stats.get_stats(counters.get_active_items(), num_cand, iter, DCI_dataset.get_tid_list_size(), sz_list_proj, COMPLEXITY_INTERSECTION, COMPLEXITY_PRUNING); new_tid_list_size = DCI_dataset.check_pruning(); if ((sz_list_proj > PRUNE_FACTOR_PROJ*DCI_dataset.get_tid_list_size()) && (new_tid_list_size<DCI_dataset.get_tid_list_size()*PRUNE_FACTOR_NEWLIST) && (COMPLEXITY_PRUNING < COMPLEXITY_INTERSECTION/PRUNE_FACTOR_COMPLEXITY)) { DCI_dataset.prune_VD (new_tid_list_size, counters); // cout << "ORDERING\n"; DCI_dataset.order_bits_diffuse(counters); first_order = true; } // stats.get_stats(counters.get_active_items(), num_cand, iter, // DCI_dataset.get_tid_list_size(), // COMPLEXITY_INTERSECTION, // COMPLEXITY_PRUNING); // new_tid_list_size = DCI_dataset.check_pruning(); // if ((new_tid_list_size < DCI_dataset.get_tid_list_size() * 0.8) && // (COMPLEXITY_PRUNING < COMPLEXITY_INTERSECTION/2)) { // DCI_dataset.prune_VD (new_tid_list_size, counters); // DCI_dataset.order_bits_sparse(counters); // first_order = true; // } } if (first_order == false) { DCI_dataset.order_bits_diffuse(counters); first_order = true; } delete [] cand; delete [] CACHE_items; if (write_output) { // dump to file frequent itemsets FSout o(OUTF, iter); if(!o.isOpen()) { cerr << OUTF << " could not be opened for writing!" << endl; exit(1); } next_freq.dump_itemsets(counters, o); }#ifdef VERBOSE print_statistics("DCIs", iter, num_cand, num_freq, time.ReadChronos());#else printf("%d\n",num_freq);#endif return;}// performs the current iteration with DCI by using the optimizations for sparse datasets and key patternstemplate <class T, class T1>void DCI_iter_diffuse_key(int iter,dci_items& counters, set_of_frequents<T,T1>& previous_freq, set_of_frequents<T,T1>& next_freq, DCI_vertical_dataset<T>& DCI_dataset){ Chronos time; time.StartChronos(); next_freq.reset(iter); if (!previous_freq.init_gen_cand()) return; static bool first_order=false; T *cand, *cand_subset; T *CACHE_items; T key_pair[2]; T1 count_pair[2]; cand = new T[iter]; cand_subset = new T[iter-1]; CACHE_items = new T[iter]; CACHE_items[0] = counters.get_m1() - 1; // init CACHE - surely different !!! int num_freq = 0; int num_cand = 0; int cand_type; int count; previous_freq.get_prefix(cand); previous_freq.get_suffix(&cand[iter - 2], key_pair, count_pair); num_cand++; cand_type = NEW_PREFIX; DCI_statistics stats; stats.reset_stats(); DCI_dataset.reset_prune_mask(); DCI_dataset.init_cache(iter); counters.init_flag_item(); counters.init_first_item_counts(); T key = 0; T1 min_count; T min_key; int one_search=0; while (1) { if ((key_pair[0] != (T) - 1) || (key_pair[1] != (T) - 1)) {// cand is surely not a key pattern one_search++; if (cand[iter-2] == key_pair[0]) { // the key pattern is the first generator
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -