?? aprioriall.java
字號:
// noprune
if(doPruning)
Bk = new boolean[Lk.length];
for(i=k=m=0;i<Lk.length;i++) // for each k-sequence
{
if(boundsLk[i][0]==-1) { // if doesn't make any candidate
Sk[i][0] = -1;
continue; // to next k-sequence
}
Sk[i][0] = k;
n = boundsLk[i][1] - boundsLk[i][0]; // number of parents it can be merged with
for(j=0;j<=n;j++)
{
l = boundsLk[i][2]+j; // candidate number
if(suppCk[l]>=m_minSuppInt)
{
newLk[k] = new int[3];
newLk[k][0] = i; newLk[k][1] = j+boundsLk[i][0]; newLk[k][2] = suppCk[l];
suppCk[l] = -m-1;
// noprune
if(doPruning)
{
Bk[i] = true; // mark as not maximal
Bk[boundsLk[i][0] + j] = true;
}
k++;
} else m++;
}
if(k!=Sk[i][0]) Sk[i][1] = k-1;
else Sk[i][0] = -1;
}
allLargeSequences.add(Lk); // add to results
// noprune
if(doPruning)
allBk.add(Bk);
Lk = newLk;
System.out.print("lits"); // debug
tidTable = checkTIDs(newTidTable,suppCk); // correct tidTable
if(debug>1)
{
System.out.println("Lk"+nStep);
printLk(Lk);
System.out.println("TID"+nStep);
printTIDs(tidTable);
}
}
allLargeSequences.add(Lk); // add to results last Lk
allBk.add(null);
if(debug>0)
{
System.out.println("Maximum customer sequence length "+maxCustSeqLen);
System.out.println("Maximum tid table size was "+maxTidSize);
}
saveResults();
}
/**
* Corrects tidTable: indices in Ck -> indices in Lk
* @param lits new generated tidTable
* @param supp support counters of candidates
* @return corrected tidTable as an arrays
*/
private int[][][] checkTIDs(int[][][] lits, int[] supp)
{
int i,j,k,l,m,n,p,q;
// number of non-empty customers
for(i=n=0;i<lits.length;i++) if(lits[i]!=null) n++;
int[][][] newTID = new int[n][][];
for(i=k=0;i<lits.length;i++) // for each customer
{
if(lits[i]==null) continue;
// actual number of rows
for(j=n=0;j<lits[i].length;j++) if(lits[i][j]!=null) n++;
newTID[k] = new int[n][];
for(j=m=0;j<lits[i].length;j++)
{
if(lits[i][j]==null) continue;
// number of supported candidates in row
for(p=n=0;p<lits[i][j].length;p++) if(supp[lits[i][j][p]]<0) n++;
if(n==0) continue;
if(maxTidSize<n) maxTidSize = n; // for debug
newTID[k][m] = new int[n]; // new row
for(p=q=0;p<lits[i][j].length;p++)
{
l = lits[i][j][p]; // element of old row
// supp[l] must be < 0 for supported candidate cause it contains -m-1
if(supp[l]<0) newTID[k][m][q++] = lits[i][j][p] + supp[l] + 1;
}
java.util.Arrays.sort(newTID[k][m]);
m++;
}
k++;
}
return newTID;
}
private int findStart(int[] arr, int lo, int hi)
{
if(hi<arr[0]||lo>arr[arr.length-1]) return -1;
if(lo<arr[0]) return 0;
int p = java.util.Arrays.binarySearch(arr,lo);
if(p<0) p = -p-1;
return p;
}
/*
private void addResult(CustomItemSetList cisl, int[] ab, int supp)
{
int i;
CustomSequence cs = new CustomSequence();
for(i=0;i<ab.length;i++)
cs.addItemSet(allLits[ab[i]]);
cs.setSupportCount(supp);
cisl.addItemSetList(cs);
}
*/
/**
* Save results in specified way
* process each large k-sequence begin from L2
* excluding sequences which are parents of L(k+1) sequences as it's marked in Bk
* and exclude large sequences contained in other large sequences
*/
private Vector saveResults()
{
int i,j,l,k,c;
int[][] Lk;
boolean[] Bk = null;
Vector cisl = new Vector(); // result sequences list
Vector all = new Vector();
for(i=0;i<allLits.length;i++) // for litemset
{
// noprune
if(doPruning)
{
if(B1!=null&&B1[i]) continue; // if it make and 2-sequence exclude it
for (j = 0; j < allLits.length; j++) // for each litemset except for same
// i-th litemsets contained in j-th
if (i != j && subset(allLitsArr[i], allLitsArr[j]))
break;
if (j != allLits.length)
continue; // take next i-th litemset
}
int[] l1 = new int[2]; // 1-sequence with support
l1[0] = i; l1[1] = allLitsSupp[i];
all.add(l1);
}
if(debug>0) System.out.println("L1 done: "+all.size());
if(allLargeSequences.size()!=0) // if there is any L2 and more sequences
{
Lk = (int[][])allLargeSequences.get(0); // take L2 sequences
// noprune
if(doPruning)
Bk = (boolean[])allBk.get(0); // take L2's mask
int[][] ab = new int[Lk.length][3]; // sequences as arrays indices in litemsets array
Vector v = new Vector();
for(i=0;i<Lk.length;i++)
{
ab[i][0] = Lk[i][0]; // copy to ab
ab[i][1] = Lk[i][1];
ab[i][2] = Lk[i][2]; // support
if(Bk==null||!Bk[i]) // if mask exists and L2[i] isn't marked as not maximal
v.add(ab[i]);
}
// noprune
if(doPruning)
{
// contains all sequences not ejected at previous step
int[][] arr = new int[v.size()][];
for (i = 0; i < v.size(); i++)
arr[i] = (int[]) v.get(i);
sweep(all, arr); // sweep all k-sequences which contained in other k-sequences
}
else all.addAll(v);
if(debug>0) System.out.println("L2 done: "+all.size());
int[][] newab; // new (k+1)-sequences maked from k-sequences
for(l=1;l<allLargeSequences.size();l++) // for all Lk begin from L3
{
Lk = (int[][])allLargeSequences.get(l); // take pairs of parents
// noprune
if(doPruning)
Bk = (boolean[])allBk.get(l); // take Lk's mask
newab = new int[Lk.length][l+3]; // (k+1)-sequences as arrays of indices
v.clear();
for(i=0;i<Lk.length;i++)
{
System.arraycopy(ab[Lk[i][0]],0,newab[i],0,l+1); // first k elements
newab[i][l+1] = ab[Lk[i][1]][l]; // last k+1 element
newab[i][l+2] = Lk[i][2]; // support
if(Bk==null||!Bk[i])
v.add(newab[i]);
}
// no prune
if(doPruning)
{
int[][] arr = new int[v.size()][];
for (i = 0; i < v.size(); i++)
arr[i] = (int[]) v.get(i);
sweep(all, arr);
}
else all.addAll(v);
ab = newab;
if(debug>0) System.out.println("L"+(l+2)+" done: "+all.size());
}
}
// all large sequences which are not ejected yet
int[][] allArr = new int[all.size()][];
for(i=0;i<all.size();i++)
allArr[i] = (int[])all.get(i);
all = null;
// noprune
boolean bAdd = true;
for(i=0;i<allArr.length;i++) // for each sequence
{
// noprune
if(doPruning)
{
for (j = 0; j < allArr.length; j++)
if (i != j) { // for each another sequence
// check if i-th sequence contained in j-th sequence
for (l = k = 0;
k < allArr[j].length - 1 && l < allArr[i].length - 1; k++)
if (subset(allLitsArr[allArr[i][l]], allLitsArr[allArr[j][k]]))
l++;
if (l == allArr[i].length - 1)
break; // contained
}
if (j == allArr.length) { // not contained => add to result
bAdd = true;
} else bAdd = false;
}
if(bAdd) {
CustomSequence cs = new CustomSequence();
for(j=0;j<allArr[i].length-1;j++)
cs.addItemSet(allLits[allArr[i][j]]);
cs.setSupportCount(allArr[i][allArr[i].length-1]);
cisl.add(cs);
}
}
rules = cisl;
cleanup();
return rules;
}
public Vector getSequentialRules() {
return rules;
}
private void sweep(Vector all, int[][] arr)
{
int i,j,k,c;
for(i=0;i<arr.length;i++)
{
for(j=0;j<arr.length;j++)
if(i!=j)
{
for(k=c=0;k<arr[i].length-1;k++)
{
int[] ai = allLitsArr[arr[i][k]];
int[] bi = allLitsArr[arr[j][k]];
if(!subset(ai,bi)) break;
}
if(k==arr[i].length-1) break;
}
if(j==arr.length) all.add(arr[i]);
}
}
/**
* return true if bi contains ai
* @param ai first itemset
* @param bi second itemset
* @return true if bi contains ai
*/
private boolean subset(int[] ai, int[] bi)
{
int i,k;
if(ai.length>bi.length) return false;
for(i=k=0;i<ai.length;i++)
{
while(k<bi.length&&bi[k]<ai[i]) k++;
if(k==bi.length) return false;
if(bi[k]!=ai[i]) return false;
}
return true;
}
/**
* initialize vectors used in algorithms for containing data
* associated with result
*/
private void initVectors()
{
allLargeItemsets = new Vector(20);
allLargeItemsetsSupp = new Vector(20);
allTIDs = new Vector(20);
allLargeSequences = new Vector(20);
allBk = new Vector(20);
rules.clear();
}
/**
* clear algorithm internal data
*/
private void cleanup()
{
allLits = null;
allLargeSequences = null;
allBk = null;
B1 = null;
allLitsArr = null;
allLitsSupp = null;
}
/**
* clear apriori phase internal data
*/
private void clearApriori()
{
L = null;
TIDTable = null;
CustomerID = null;
allLargeItemsets = null;
allLargeItemsetsSupp = null;
allTIDs = null;
}
/**
* prints Lk, used debugging
*/
private void printLk(int[][] Lk)
{
for(int i=0;i<Lk.length;i++)
if(Lk[i]!=null)
{
System.out.print(i+":");
for(int j=0;j<Lk[i].length;j++) System.out.print(" "+Lk[i][j]);
System.out.println();
}
}
/**
* prints tidTable, used for debugging
*/
private void printTIDs(int[][][] lits)
{
for(int i=0;i<lits.length;i++)
{
System.out.println("Customer #"+i+":");
if(lits[i]!=null)
for(int j=0;j<lits[i].length;j++)
{
System.out.print("\t");
if(lits[i][j]!=null)
for(int k=0;k<lits[i][j].length;k++) System.out.print(lits[i][j][k]+" ");
else System.out.print("empty");
System.out.println();
}
else System.out.println("\tempty");
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -