?? prefixspan.cpp
字號:
/* PrefixSpan: An efficient algorithm for sequential pattern mining $Id: prefixspan.cpp,v 1.3 2002/01/07 01:30:04 taku-ku Exp $; Copyright (C) 2002 Taku Kudo All rights reserved. This is free software with ABSOLUTELY NO WARRANTY. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA*/#include <iostream>#include <map>#include <vector>#include <string>#include <string.h>#include <unistd.h>#include <stdlib.h>using namespace std;class string2string {public: string operator() (const string &str) const { return str; };};class string2int {public: unsigned int operator() (const string &str) const { return atoi (str.c_str()); };};template <class T = unsigned int, class F = string2int> class PrefixSpan {private: vector < vector <T> > transaction; vector < pair <T, unsigned int> > pattern; T init; unsigned int minsup; unsigned int minpat; string delimiter; bool verbose; ostream *os; void split (const string &src, const string& key, vector <T>& result) { result.clear(); int len = src.size(); int i = 0; int si = 0; while(i < len) { while (i < len && key.find(src[i]) != string::npos) { si++; i++; }; while (i < len && key.find(src[i]) == string::npos) i++; result.push_back(F()(src.substr(si,i-si))); si = i; } } void project (T opat, vector <pair <unsigned int, int> > &projected) { if (verbose) cerr << "## projected by pattern [" << opat << "], " << projected.size() << " instances\n"; map <T, vector <pair <unsigned int, int> > > cnt; for (unsigned int i = 0; i < projected.size(); i++) { int pos = projected[i].second; unsigned int id = projected[i].first; unsigned int size = transaction[id].size(); map <T, int> tmp; for (unsigned int j = pos + 1; j < size; j++) { T item = transaction[id][j]; if (tmp.find (item) == tmp.end()) tmp[item] = j ; } for (map <T, int>::iterator k = tmp.begin(); k != tmp.end(); ++k) cnt[k->first].push_back (make_pair <unsigned int, int> (id, k->second)); } if (verbose) cerr << "## " << cnt.size() << " items found\n"; for (map <T, vector <pair <unsigned int, int> > >::iterator l = cnt.begin (); l != cnt.end (); ) { if (l->second.size() < minsup) { map <T, vector <pair <unsigned int, int> > >::iterator tmp = l; tmp = l; ++tmp; cnt.erase (l); l = tmp; } else { ++l; } } if (verbose) cerr << "## pruned to " << cnt.size() << " items found\n"; if (cnt.size () == 0) { if (minpat <= pattern.size()) { for (unsigned int i = 0; i < pattern.size(); i++) *os << (i ? " " : "") << pattern[i].first << delimiter << pattern[i].second; *os << endl; } return; } for (map <T, vector <pair <unsigned int, int> > >::iterator l = cnt.begin (); l != cnt.end(); ++l) { pattern.push_back (make_pair <T, unsigned int> (l->first, l->second.size())); project (l->first, l->second); pattern.erase (pattern.end()); } }public: PrefixSpan (T _init, unsigned int _minsup = 1, unsigned int _minpat = 1, string _delimiter = "/", bool _verbose = false): init (_init), minsup(_minsup), minpat (_minpat), delimiter (_delimiter), verbose (_verbose) {}; ~PrefixSpan () {}; istream &read (istream &is) { string line; vector <T> tmp; while (getline (is, line)) { split (line, " ", tmp); transaction.push_back (tmp); } return is; } void run (ostream &_os) { os = &_os; vector <pair <unsigned int, int> > root; for (unsigned int i = 0; i < transaction.size(); i++) root.push_back (make_pair (i, -1)); project (init, root); } void clear () { transaction.clear (); pattern.clear (); }};int main (int argc, char **argv){ extern char *optarg; bool verbose = false; unsigned int minsup = 1; unsigned int minpat = 1; string delimiter = "/"; bool useint = true; int opt; while ((opt = getopt(argc, argv, "vsM:m:d:")) != -1) { switch(opt) { case 'v': verbose = true; break; case 'm': minsup = atoi (optarg); break; case 'M': minpat = atoi (optarg); break; case 's': useint = false; break; case 'd': delimiter = string (optarg); break; default: cout << "Usage: " << argv[0] << " [-m minsup] [-M minpat] [-v] [-s] [-d delimiter] < data .." << endl; return -1; } } if (useint) { PrefixSpan<unsigned int, string2int> prefixspan (0, minsup, minpat, delimiter, verbose); prefixspan.read (cin); prefixspan.run (cout); } else { PrefixSpan<string, string2string> prefixspan ("", minsup, minpat, delimiter, verbose); prefixspan.read (cin); prefixspan.run (cout); } return 0;}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -