?? aprioriall.java
字號:
// init all vectors
initVectors();
if (customerTransSet == null) {
customerTransSet = convertMiningminingInputStream();
};
fireMiningEvent(new CreationModelBeginMessage(getAlgorithmLevel()));
// apply modified apriori tid for finding of litemsets
apriori(customerTransSet);
if(debug>1)
{
System.out.println("Litemsets:");
for(i=n=0;i<allLargeItemsets.size();i++)
{
int[][] lk = (int[][])allLargeItemsets.get(i);
for(j=0;j<lk.length;j++,n++)
{
System.out.print(n+":");
for(k=0;k<lk[j].length;k++)
System.out.print(" "+lk[j][k]);
System.out.println();
}
}
}
// transformation using information found at previous phase
// allTIDs and allLargeItemsets was created in the apriori method
// total number of transactions of all customers
int nTotalTrans = ((int[][])allTIDs.get(0)).length;
int[][] lits;
// ts will contain all transformed transactions
int[][] ts = new int[nTotalTrans][];
for(i=0;i<nTotalTrans;i++)
{
// for each transaction #i
// counting number of all litemsets it contains
for(l=n=0;l<allTIDs.size();l++)
{
lits = (int[][])allTIDs.get(l);
if(lits[i]!=null)
{
for(m=0;m<lits[i].length;m++)
if(lits[i][m]!=-1) n++; // increment counter
} else break;
}
if(n==0) continue; // take next transaction if no litemsets
ts[i] = new int[n];
// copying litemsets indices adding with previous number of litemsets
// lits[i][j] on l-level is an index in L(l-1) litemsets
// q - correction of index and is a sum of |L(i)| for each i in [1:l-2]
for(l=q=n=0;l<allTIDs.size();l++) // for each level (length of litemsets)
{
lits = (int[][])allTIDs.get(l); // take litemsets indices on l-level
if(lits[i]!=null)
{
for(m=0;m<lits[i].length;m++)
if(lits[i][m]!=-1) ts[i][n++] = lits[i][m]+q;
} else break;
q += ((int[][])allLargeItemsets.get(l)).length;
}
}
if(debug>1)
{
System.out.println("\nTS:");
for(i=0;i<ts.length;i++)
{
System.out.print(i+":");
if(ts[i]!=null)
{
for(j=0;j<ts[i].length;j++) System.out.print(" "+ts[i][j]);
System.out.println();
} else System.out.println(" empty");
}
}
// number of customers in original transaction set
int nCustom = customerTransSet.getSize();
// stores number of non-empty transactions for each customer
int[] nCustomTrans = new int[nCustom];
// k - counter of customers after transformation
for(i=k=0,w=-1;i<nTotalTrans;i++)
if(ts[i]!=null)
{
nCustomTrans[CustomerID[i]]++;
if(CustomerID[i]!=w)
{
w = CustomerID[i];
k++;
}
}
if(k==0) return;
if(debug>0) System.out.println(k+" customers retained");
// transformed customer sequences
int[][][] tts = new int[k][][]; // k customers, k <= nCustom
int maxCustSeqLen = 0; // maximum customer's sequences length
// w - last customer ID
for(i=n=0,w=k=-1;i<nTotalTrans;i++)
{
if(ts[i]==null) continue;
if(CustomerID[i]!=w) // new customer
{
int custLen = nCustomTrans[CustomerID[i]];
k++; tts[k] = new int[custLen][]; // add new customer
if(maxCustSeqLen<custLen) maxCustSeqLen = custLen;
n = 0;
w = CustomerID[i];
}
tts[k][n++] = ts[i];
}
if(debug>1)
{
System.out.println("Transformed transaction set:");
for(i=0;i<tts.length;i++)
{
System.out.println("Customer #"+i+":");
if(tts[i]!=null)
for(j=0;j<tts[i].length;j++)
{
System.out.print("Transaction #"+j+":");
if(tts[i][j]!=null)
for(k=0;k<tts[i][j].length;k++) System.out.print(" "+tts[i][j][k]);
else System.out.print(" empty");
System.out.println();
}
else System.out.println(" empty");
}
}
allTIDs = null;
// creating L2
int nL1; // number of all litemsets
for(l=nL1=0;l<allLargeItemsets.size();l++) nL1 += ((int[][])allLargeItemsets.get(l)).length;
allLits = new ItemSet[nL1]; // litemsets as itemsets for results list
allLitsArr = new int[nL1][]; // litemsets as arrays for maximal phase
allLitsSupp = new int[nL1]; // litemsets supports
for(l=k=0;l<allLargeItemsets.size();l++)
{
int[][] L1 = (int[][])allLargeItemsets.get(l); // litemsets of length l
int[] S1 = (int[])allLargeItemsetsSupp.get(l); // their supports
for(i=0;i<L1.length;i++)
{
allLits[k] = new ItemSet();
allLitsArr[k] = L1[i];
allLitsSupp[k] = S1[i];
for(j=0;j<L1[i].length;j++) allLits[k].addItem(L1[i][j]);
k++;
}
}
clearApriori();
int nC12 = nL1*nL1; // number of candidates of length 2
if(debug>0)
{
System.out.println("nL1 = "+nL1);
System.out.println("nC2 = "+nC12+" ("+(long)nC12*8l+" bytes)");
}
int suppCk[] = new int[nC12]; // support counters for candidates
int cidCk[] = new int[nC12]; // customer id's for candidates
for(i=0;i<nC12;i++) cidCk[i] = -1;
int a,b;
int[][][] tidTable = new int[tts.length][][];
IntVec[] tvec = new IntVec[maxCustSeqLen-1];
for(i=0;i<maxCustSeqLen-1;i++) tvec[i] = new IntVec();
// counting support of candidates of length 2
for(k=0;k<tts.length;k++) // for each customer k
{
for(i=0;i<tts[k].length-1;i++) // for its each transformed transaction
{
tvec[i].clear();
for(j=0;j<tts[k][i].length;j++) // for each element of that transaction
{
a = tts[k][i][j];
for(m=i+1;m<tts[k].length;m++) // for each transaction after i-th
for(n=0;n<tts[k][m].length;n++) // for its each element
{
b = tts[k][m][n];
l = a*nL1+b; // candidate's number
// if it is not incremented by i-th transaction of k-th customer
if(cidCk[l]!=k+i+1)
{
cidCk[l] = k+i+1;
// add to list of candidates of i-th transaction of k-th cust.
tvec[i].add(l);
}
}
}
}
// convert vectors to arrays
tidTable[k] = new int[tts[k].length-1][];
for(i=0;i<tts[k].length-1;i++)
{
l = tvec[i].size();
if(l==0) continue;
tidTable[k][i] = new int[tvec[i].size()];
tvec[i].copy(tidTable[k][i]);
// java.util.Arrays.sort(tidTable[k][i]);
for(j=0;j<tidTable[k][i].length;j++)
{
l = tidTable[k][i][j];
// if candidate's support counter is not incremented by k-th customer
if(cidCk[l]!=k)
{
suppCk[l]++; // increment it
cidCk[l] = k;
}
}
}
}
// k - counter of supported candidates
for(i=k=0;i<nC12;i++) if(suppCk[i]>=m_minSuppInt) k++;
if(k==0) {
saveResults(); // save L1 and exit
return;
}
int Lk[][] = new int[k][]; // L2 sequences
int Sk[][] = new int[nL1][2];
// noprune
if(doPruning)
B1 = new boolean[nL1];
for(i=k=l=m=0;i<nL1;i++) // for each 1-sequence
{
Sk[i][0] = k; // number of first 2-sequence which have
// i-th 1-sequence as first parent
for(j=0;j<nL1;j++,l++)
if(suppCk[l]>=m_minSuppInt)
{
Lk[k] = new int[3]; // new large 2-sequence
Lk[k][0] = i; Lk[k][1] = j; Lk[k][2] = suppCk[l];
suppCk[l] = -m-1; // (-m-1) value used for correcting tidTable
// noprune
if(doPruning)
{
B1[i] = true; // mark i-th and j-th 1-sequences
B1[j] = true; // as not maximal
}
k++; // counter of large 2-sequences
}
else m++; // counter of not supported candidates
if(k!=Sk[i][0]) Sk[i][1] = k-1; // number of last 2-sequence which have
// i-th 1-sequence as first parent
else Sk[i][0] = -1; // i-th 1-sequences doesn't have any children
// with it as first parent
}
tidTable = checkTIDs(tidTable,suppCk); // correct tidTable
if(debug>1)
{
System.out.println("L2:");
for(i=0;i<Lk.length;i++) System.out.println(i+": "+Lk[i][0]+","+Lk[i][1]);
System.out.println("TID Table #2:");
printTIDs(tidTable);
}
int ckCounter; // counter of generated candidates
int newTidTable[][][];
int newLk[][];
int nStep = 2;
// for each k-sequence boundsLk[i][0] - number of first k-sequence it can be
// merged with, boundsLk[i][1] - number of last k-sequence it can be merged
// with, boundsLk[i][2] - number of first candidate it generated
// boundsLk[i][0] = -1 if k-sequence doesn't generate any candidates
int[][] boundsLk;
boolean [] Bk = null;
int hi,lo,cand,s;
while(true)
{
nStep++;
System.out.println("\t"+Lk.length+" large sequences #"+(nStep-1));
if(debug>0) System.out.println("Level = "+nStep+" Lk = "+Lk.length);
newTidTable = new int[tidTable.length][][];
boundsLk = new int[Lk.length][3];
ckCounter = 0;
if(debug>1) System.out.print("Making bounds...");
for(i=0;i<Lk.length;i++) // for each large sequence found at previous step
{
b = Lk[i][1]; // second parent
if(Sk[b][0]!=-1) // if it has generated any large k-sequence
{
boundsLk[i][0] = Sk[b][0];
boundsLk[i][1] = Sk[b][1];
boundsLk[i][2] = ckCounter;
ckCounter += Sk[b][1]-Sk[b][0]+1;
} else boundsLk[i][0] = -1;
}
if(debug>1)
{
System.out.println("\tok:");
printLk(boundsLk);
System.out.println("\nckCounter = "+ckCounter);
}
if(ckCounter==0) break; // no candidates
System.out.print(ckCounter+" candidates");
suppCk = new int[ckCounter]; // support counters
cidCk = new int[ckCounter]; // last customer id's
for(i=0;i<ckCounter;i++) cidCk[i] = -1;
int nPoints = tidTable.length / 20; // for debug
if(nPoints==0) nPoints = 1; // for debug
for(i=0;i<tidTable.length;i++) // take i-th customer
{
if(i%nPoints==0) System.out.print("."); // for debug
if(tidTable[i]==null||tidTable[i].length<2) continue; // can't generate any pairs
for(j=n=0;j<tidTable[i].length-1;j++,n=0) // for each row of tidTable except for last
{
tvec[j].clear(); // clear vector of candidates numbers
if(tidTable[i][j]==null) continue; // empty row => take next
s = i+j+1; // used for excluding repeating candidates numbers in new rows
for(k=0;k<tidTable[i][j].length;k++) // for each element of j-th row
{
a = tidTable[i][j][k]; // element
lo = boundsLk[a][0]; // first parent it can be merged with
if(lo!=-1) // if no take next
{
hi = boundsLk[a][1]; // last parent
cand = boundsLk[a][2] - lo; // first candidate number
for(m=j+1;m<tidTable[i].length;m++) // for each next row of tidTable
{
if(tidTable[i][m]==null) continue; // empty => next row
// find position of first parent
l = findStart(tidTable[i][m],lo,hi);
if(l<0) continue; // not found => next row
while(l<tidTable[i][m].length)
{
b = tidTable[i][m][l++];
if(b>hi) break; // more number than last parent => next row
b += cand; // candidate number
if(cidCk[b]!=s) // if not processed by j-th row of i-th customer
{
tvec[j].add(b); // add to vector of candidates numbers of j-th row
cidCk[b] = s;
}
}
}
}
}
}
// make new tidTable for i-th customer
newTidTable[i] = new int[tidTable[i].length-1][];
for(j=0;j<tidTable[i].length-1;j++)
{
if(tvec[j].size()==0) continue; // empty row
newTidTable[i][j] = new int[tvec[j].size()];
tvec[j].copy(newTidTable[i][j]);
// java.util.Arrays.sort(newTidTable[i][j]);
for(k=0;k<newTidTable[i][j].length;k++)
{
b = newTidTable[i][j][k]; // candidate number
if(cidCk[b]!=i) // if candidate support counter isn't incremented
{ // by i-th customer
cidCk[b] = i;
suppCk[b]++; // increment support counter
}
}
}
}
if(debug>1)
{
System.out.println("newTidTable:");
printTIDs(newTidTable);
}
// number of large (k+1)-sequences
for(i=k=0;i<ckCounter;i++) if(suppCk[i]>=m_minSuppInt) k++;
if(debug>0)
{
if(k==0) System.out.println("no supported candidates");
}
if(k==0) break; // no large sequences
newLk = new int[k][]; // new large sequences as an arrays
Sk = new int[Lk.length][2];
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -