?? dci.cc
字號:
num_freq++; next_freq.add_itemset(cand, (T1) count_pair[1], cand[iter-2]); if (!first_order) { for (int i = 0; i < iter; i++) counters.flag_item[cand[i]] = true; counters.first_item_counts[cand[0]]++; } } else if (cand[iter-1] == key_pair[1]) {// the key pattern is the second generator num_freq++; next_freq.add_itemset(cand, (T1) count_pair[0], cand[iter-1]); if (!first_order) { for (int i = 0; i < iter; i++) counters.flag_item[cand[i]] = true; counters.first_item_counts[cand[0]]++; } } else {// the key pattern is another subset: we must find it if (key_pair[0] != (T) -1) key=key_pair[0]; else key=key_pair[1]; int j=0; for (int i=0; i<iter; i++) if (cand[i] != key) cand_subset[j++] = cand[i]; T tmp_key; if (previous_freq.find_one_subset(cand_subset, tmp_key, count)) { num_freq++; next_freq.add_itemset(cand, (T1) count, key); if (!first_order) { for (int i = 0; i < iter; i++) counters.flag_item[cand[i]] = true; counters.first_item_counts[cand[0]]++; } } } } else { if (count_pair[0] < count_pair[1]) { // remember min_count and corresponding key between generators min_count = count_pair[0]; min_key = cand[iter - 1]; } else { min_count = count_pair[1]; min_key = cand[iter - 2]; } T other_key; bool is_key_pattern = true; bool pruned = false; for (int del=iter-3; del>=0; del--) { // look for the subset with minimum count int j = 0; for (int i=0; i<iter; i++) if (i != del) cand_subset[j++] = cand[i]; if (previous_freq.find_one_subset(cand_subset, other_key, count) == 0) { pruned = true; break; // a subset is infrequent, prune the cand and take the next one } if (other_key == (T) -1) {// remember min_count and corresponding key if (min_count >= (T1) count) { min_count = count; min_key = cand[del]; } } else {// cand is not a key pattern is_key_pattern = false; int j1=0; for (int i=0; i<iter; i++) if (cand[i] != other_key) cand_subset[j1++] = cand[i]; T tmp_key; // check if the associated subset is frequent if (previous_freq.find_one_subset(cand_subset, tmp_key, count)) { num_freq++; next_freq.add_itemset(cand, (T1) count, other_key); if (!first_order) { for (int i = 0; i < iter; i++) counters.flag_item[cand[i]] = true; counters.first_item_counts[cand[0]]++; } break; } } } if (!pruned && is_key_pattern) // we must count candidate support! { int prefix_len; for (prefix_len = 0; prefix_len < iter-1; prefix_len++) { if (cand[prefix_len] != CACHE_items[prefix_len]) break; } for (int i = prefix_len; i < iter; i++) { // copy to cache CACHE_items[i] = cand[i]; } if (DCI_dataset.candidate_is_frequent_diffuse(cand, prefix_len, iter, counters.min_count, count, stats, !first_order)) { if (count != (int) min_count) next_freq.add_itemset(cand, (T1) count, (T) -1); else next_freq.add_itemset(cand, (T1) count, min_key); num_freq++; if (!first_order) { for (int i = 0; i < iter; i++) counters.flag_item[cand[i]] = true; counters.first_item_counts[cand[0]]++; } } } } // generate next candidate cand_type = previous_freq.next_cand(); if (cand_type == END_GEN_CAND) break; else if (cand_type == NEW_PREFIX) previous_freq.get_prefix(cand); previous_freq.get_suffix(&cand[iter-2], key_pair, count_pair); num_cand++; } if (first_order == false) { DCI_dataset.order_bits_diffuse(counters); first_order = true; } delete [] cand; delete [] cand_subset; delete [] CACHE_items; if (write_output) { // dump to file frequent itemsets FSout o(OUTF, iter); if(!o.isOpen()) { cerr << OUTF << " could not be opened for writing!" << endl; exit(1); } next_freq.dump_itemsets(counters, o); }#ifdef VERBOSE print_statistics("DCIsk", iter, num_cand, num_freq, time.ReadChronos());// cout << "one search : " << one_search << " ("<< ((float) one_search)/num_cand*100 << ")"<< endl;#else printf("%d\n",num_freq);#endif return;}// performs the current iteration with DCI // by using the optimizations for dense datasetstemplate <class T, class T1>void DCI_iter_compact_key(int iter,dci_items& counters, set_of_frequents<T,T1>& previous_freq, set_of_frequents<T,T1>& next_freq, DCI_vertical_dataset<T>& DCI_dataset){ Chronos time; time.StartChronos(); next_freq.reset(iter); if (!previous_freq.init_gen_cand()) return; T *cand, *cand_subset; T *CACHE_items; T key_pair[2]; T1 count_pair[2]; cand = new T[iter]; cand_subset = new T[iter-1]; CACHE_items = new T[iter]; CACHE_items[0] = counters.get_m1() - 1; // init CACHE - surely different !!! int num_freq = 0; int num_cand = 0; int cand_type; int count; previous_freq.get_prefix(cand); previous_freq.get_suffix(&cand[iter - 2], key_pair, count_pair); num_cand++; cand_type = NEW_PREFIX; DCI_statistics stats; stats.reset_stats(); DCI_dataset.init_cache(iter); T key = 0; T1 min_count; T min_key; int one_search=0; while (1) { if ((key_pair[0] != (T) - 1) || (key_pair[1] != (T) - 1)) { // cand is surely not a key pattern one_search++; if (cand[iter-2] == key_pair[0]) { // the key pattern is the first generator num_freq++; next_freq.add_itemset(cand, (T1) count_pair[1], cand[iter-2]); } else if (cand[iter-1] == key_pair[1]) { // the key pattern is the second generator num_freq++; next_freq.add_itemset(cand, (T1) count_pair[0], cand[iter-1]); } else { // the key pattern is another subset: we must find it if (key_pair[0] != (T) -1) key=key_pair[0]; else key=key_pair[1]; int j=0; for (int i=0; i<iter; i++) if (cand[i] != key) cand_subset[j++] = cand[i]; T tmp_key; if (previous_freq.find_one_subset(cand_subset, tmp_key, count)) { num_freq++; next_freq.add_itemset(cand, (T1) count, key); } } } else { if (count_pair[0] < count_pair[1]) { // remember min_count and corresponding key between generators min_count = count_pair[0]; min_key = cand[iter - 1]; } else { min_count = count_pair[1]; min_key = cand[iter - 2]; } T other_key; bool is_key_pattern = true; bool pruned = false; for (int del=iter-3; del>=0; del--) { // look for the subset with minimum count int j = 0; for (int i=0; i<iter; i++) if (i != del) cand_subset[j++] = cand[i]; if (previous_freq.find_one_subset(cand_subset, other_key, count)==0){ pruned = true; break; // a subset is infrequent, prune the cand and take the next one } if (other_key == (T) -1) { // remember min_count and corresponding key if (min_count >= (T1) count) { min_count = count; min_key = cand[del]; } } else {// cand is not a key pattern is_key_pattern = false; int j1=0; for (int i=0; i<iter; i++) if (cand[i] != other_key) cand_subset[j1++] = cand[i]; T tmp_key; // check if the associated subset is frequent if (previous_freq.find_one_subset(cand_subset, tmp_key, count)) { num_freq++; next_freq.add_itemset(cand, (T1) count, other_key); break; }// else // cout << "ERROR\n"; } } if (!pruned && is_key_pattern) // we must count candidate support! { int prefix_len; for (prefix_len = 0; prefix_len < iter-1; prefix_len++) { if (cand[prefix_len] != CACHE_items[prefix_len]) break; } for (int i = prefix_len; i < iter; i++) { // copy to cache CACHE_items[i] = cand[i]; } if (DCI_dataset.candidate_is_frequent_compact(cand, prefix_len, iter, (int) counters.min_count, count, stats)) { num_freq++; if (count != (int) min_count) next_freq.add_itemset(cand, (T1) count, (T) -1); else next_freq.add_itemset(cand, (T1) count, min_key); } } } // generate next candidate cand_type = previous_freq.next_cand(); if (cand_type == END_GEN_CAND) break; else if (cand_type == NEW_PREFIX) previous_freq.get_prefix(cand); previous_freq.get_suffix(&cand[iter-2], key_pair, count_pair); num_cand++; } delete [] cand; delete [] cand_subset; delete [] CACHE_items; if (write_output) { // dump to file frequent itemsets FSout o(OUTF, iter); if(!o.isOpen()) { cerr << OUTF << " could not be opened for writing!" << endl; exit(1); } next_freq.dump_itemsets(counters, o); } #ifdef VERBOSE print_statistics("DCIdk", iter, num_cand, num_freq, time.ReadChronos());// cout << "one search : " << one_search << " ("<< ((float) one_search)/num_cand*100 << ")"<< endl;#else printf("%d\n",num_freq);#endif return;}// performs the current iteration with DCI by using the optimizations for dense datasetstemplate <class T, class T1>void DCI_iter_compact(int iter,dci_items& counters, set_of_frequents<T,T1>& previous_freq, set_of_frequents<T,T1>& next_freq, DCI_vertical_dataset<T>& DCI_dataset){ Chronos time; time.StartChronos(); next_freq.reset(iter); if (!previous_freq.init_gen_cand()) return; T *cand; T *CACHE_items; cand = new T[iter]; CACHE_items = new T[iter]; CACHE_items[0] = counters.get_m1() - 1; // init CACHE - surely different !!! int num_freq = 0; int num_cand = 0; int cand_type; int count; previous_freq.get_prefix(cand); previous_freq.get_suffix(&cand[iter - 2]); num_cand++; cand_type = NEW_PREFIX; DCI_statistics stats; stats.reset_stats(); DCI_dataset.init_cache(iter); while (1) { int start; if (cand_type == NEW_PREFIX) start = 0; else start = iter - 2; int prefix_len; for (prefix_len = start; prefix_len < iter-1; prefix_len++) { if (cand[prefix_len] != CACHE_items[prefix_len]) break; } for (int i = prefix_len; i < iter; i++) { // copy to cache CACHE_items[i] = cand[i]; } //DCI_dataset.set_is_included_flags(cand, prefix_len, iter); if (DCI_dataset.candidate_is_frequent_compact(cand, prefix_len, iter, (int) counters.min_count, count, stats)) { num_freq++; next_freq.add_itemset(cand, (T1) count); } cand_type = previous_freq.next_cand(); if (cand_type == END_GEN_CAND) break; else if (cand_type == NEW_SUFFIX) previous_freq.get_suffix(&cand[iter-2]); else { previous_freq.get_prefix(cand); previous_freq.get_suffix(&cand[iter-2]); } num_cand++; } delete [] cand; delete [] CACHE_items; if (write_output) { // dump to file frequent itemsets FSout o(OUTF, iter); if(!o.isOpen()) { cerr << OUTF << " could not be opened for writing!" << endl; exit(1); } next_freq.dump_itemsets(counters, o); } #ifdef VERBOSE print_statistics("DCId", iter, num_cand, num_freq, time.ReadChronos());#else printf("%d\n",num_freq);#endif return;}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -