?? occlonglist.cpp
字號:
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Class: OccLongList
Description: Used to store the occurrence list of a rooted
ordered pattern tree, for CMOrderedTreeMiner algorithms
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#pragma warning (disable: 4083 4786)
#include "OccLongList.h"
typedef map<int, vector< vector<short> > > MAP_frequency;
void OccLongList::insert(int newdocTid, int newpathTid, int newLocation)
{
pathocc occ;
occ.dococ=newdocTid;
occ.pathoc = newpathTid;
occ.pos=newLocation;
occurrenceLong.push_back(occ);
if ( lastTid != newdocTid ) { //if comes from a new transaction
lastTid = newdocTid;
mySupport++;
}
}
bool OccLongList::combineList(const OccLongList& mother, const OccList& newNodes)
{
occurrenceLong.clear();
int nodeId ;
int motherId ;
nodeId = 0;
motherId = 0;
mySupport = 0;
int tempTid = -1;
while ( nodeId < newNodes.occurrence.size() ) {
//first, find the correct mother
while ( motherId != newNodes.occurrence[nodeId].first ) motherId++;
occurrenceLong.push_back(mother.occurrenceLong[motherId]);
occurrenceLong.back().pos = newNodes.occurrence[nodeId].second;
if ( tempTid != mother.occurrenceLong[motherId].dococ ) {
tempTid = mother.occurrenceLong[motherId].dococ;
mySupport++;
}
while ( (nodeId+1) < newNodes.occurrence.size()
&& (newNodes.occurrence[nodeId].first == newNodes.occurrence[nodeId+1].first) ) {
nodeId++;
occurrenceLong.push_back(mother.occurrenceLong[motherId]);
occurrenceLong.back().pos = newNodes.occurrence[nodeId].second;
}
nodeId++;
}
return true;
}
int includepath(vector<short> a, vector<short> b)
{
int i,j;
if(a.size()>b.size())
{
i=j=0;
while (i<a.size() && j<b.size())
{
if(a[i]!=b[j]) i++;
else{
j++; i++;
}
}
if(j==b.size()) return 1;
}
if(b.size()>a.size())
{
i=j=0;
while (i<b.size() && j<a.size())
{
if(b[i]!=a[j]) i++;
else{
j++; i++;
}
}
if(j==a.size()) return -1;
}
return 0;
}
void OccLongList::explore(const vector<bool>& isFrequent,
const vector<TextTree>& database,
const int& support,
map<int, vector< vector<short> > >& frequency,
vector< vector<short> >& maximal,
vector< vector<pathocc > >& maximalocclonglist)
{
//for debug
//cout << "support of current path is: " << mySupport << endl;
//for(int m=0;m<currentPath.size();m++)
// cout<<currentPath[m]<<" ";
//cout<<endl;
int tempV = currentPath.size();
MAP_frequency::iterator posfrequency;
posfrequency = frequency.find(tempV);
if(posfrequency != frequency.end())
frequency[tempV].push_back(currentPath);
else {
vector< vector<short> > aa;
frequency.insert(MAP_frequency::value_type(tempV,aa));
frequency[tempV].push_back(currentPath);
}
//search the maximal
vector< vector<short> >::iterator posmaximal;
vector< vector<pathocc> >::iterator posmaximalocc;
bool insertflag=true;
if( maximal.empty()) {
maximal.push_back(currentPath);
maximalocclonglist.push_back(occurrenceLong);
}
else{
insertflag=true;
//掃描整個maximal數組
posmaximalocc = maximalocclonglist.begin();
for(posmaximal = maximal.begin(); posmaximal!=maximal.end();++posmaximal,++posmaximalocc)
{
/*cout<<"currentPath: ";
for(int m=0;m<currentPath.size();m++)
cout<<currentPath[m]<<" ";
cout<<endl;
cout<<"pospath: ";
for( m=0; m< posmaximal->size();m++)
cout<< (*posmaximal)[m]<<" ";
cout<<endl;*/
if( includepath(*posmaximal, currentPath) == 1){
//posmaximal包含currentpath
insertflag=false;
}
else if( includepath(*posmaximal, currentPath) == -1){
//currentpath包含posmaximal
maximalocclonglist.erase(posmaximalocc);
maximal.erase(posmaximal);
posmaximal--;
posmaximalocc--;
}
}
if(insertflag)
{
maximal.push_back(currentPath);
maximalocclonglist.push_back(occurrenceLong);
}
}
//step 3, explore all the expansions
//step 3.1, explore the children of the rightmost node
map<short,OccList> potentialChildren;
map<short,OccList>::iterator pos;
for (int n = 0; n < occurrenceLong.size(); n++ ) {
int myTid = occurrenceLong[n].dococ;
int myPathid = occurrenceLong[n].pathoc;
int myLocation = occurrenceLong[n].pos;
int k = myLocation+1;
//here, redundancy must be recorded also.
while ( k <database[myTid].path[myPathid].second.size() ) {
if ( isFrequent[database[myTid].path[myPathid].second[k] - MIN_VERTEX] == true) {
potentialChildren[database[myTid].path[myPathid].second[k] - MIN_VERTEX].insert(myTid,k,n);
}
k++;
}
}
//for test
/*for( pos= potentialChildren.begin() ; pos !=potentialChildren.end();++pos)
{
cout<<pos->first+MIN_VERTEX<<" "<<pos->second.mySupport<<endl;
for(m=0;m<pos->second.occurrence.size();m++)
cout<<pos->second.occurrence[m].first<<" "<<pos->second.occurrence[m].second<<endl;
cout<<endl;
}*/
for ( pos = potentialChildren.begin(); pos != potentialChildren.end(); ++pos ) {
if ( pos->second.mySupport >= support ) { //a frequent extension!
currentPath.push_back(pos->first + MIN_VERTEX);
OccLongList newLongList;
newLongList.combineList(*this,pos->second);
newLongList.explore(isFrequent,database,support,frequency,maximal,maximalocclonglist);
currentPath.erase(currentPath.begin()+currentPath.size()-1);
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -