?? aprioriall.java
字號:
*
* @param array input data
* @param value for search
* @return pos in array
* @author Alexey Grinyuk
*/
private int findInArray(int array[][], int value) {
int start = 0;
int end = array.length-1;
int pos;
if(end == -1) return -1;
while(start + 1 < end) {
pos = (end - start) / 2 + start;
if(array[pos][0] < value) {
start = pos + 1;
} else if(array[pos][0] > value) {
end = pos - 1;
} else return pos;
}
if(array[start][0] == value) return start;
if(array[end][0] == value) return end;
return -1;
}
/**
* Compare 2 integer arrays
*
* @param array1 input data
* @param array2 input data
* @param length number items for compare
* @return array1 == array2
* @author Alexey Grinyuk
*/
private boolean arrayCompare(int array1[], int array2[], int length) {
int i;
for(i=0; i<length; i++) {
if(array1[i] != array2[i]) return false;
}
return true;
}
/**
* Global variable L[][] And TidTable[][] are parameters this recursive function
*/
private void createAllLk() {
int i, j, counter, numberOfAllChildren;
int LLength = L.length;
if(LLength < 2) return;
int LWidth = L[0].length;
int firstChild[] = new int[LLength];
int lastChild[] = new int[LLength];
int childSupp[];
int toLargeChild[];
int largeChildren[][];
int largeChildSupp[];
numberOfAllChildren = createFirstAndLastChildArrays(L, /*Out*/ firstChild, /*Out*/ lastChild);
// Allocate memory...
childSupp = new int[numberOfAllChildren]; // ...for Support of all children
toLargeChild = new int[numberOfAllChildren]; // ...for associate table (All Children -> Large Children)
for(i=0; i<numberOfAllChildren; i++) { // Initialize memeory childLastCustomer
//childSupp[i] = 0;
toLargeChild[i] = -1;
}
TIDTable = calculateSupportAndBuildNewTIDTable(TIDTable, firstChild, lastChild, /*Out*/ childSupp);
// Calculate number of large children and
// build associate table (All Children -> Large Children)
counter = 0;
for(i=0; i<childSupp.length; i++)
if(childSupp[i] >= m_minSuppInt) toLargeChild[i] = counter++;
// Allocate memory...
largeChildren = new int[counter][]; //...for array of large children
largeChildSupp = new int[counter]; //...for supports of large children
buildArrayOfLargeChildren(L, firstChild, lastChild, childSupp, /*Out*/ largeChildren, /*Out*/ largeChildSupp);
// Mark little itemsets in TIDTable as removed
for(i=0; i<TIDTable.length; i++) {
if(TIDTable[i] != null)
for(j=0; j<TIDTable[i].length; j++)
TIDTable[i][j] = toLargeChild[ TIDTable[i][j] ];
}
// Add Lk to general result
allLargeItemsets.add(largeChildren);
allLargeItemsetsSupp.add(largeChildSupp);
allTIDs.add(TIDTable);
L = largeChildren;
firstChild = null;
lastChild = null;
childSupp = null;
toLargeChild = null;
largeChildren = null;
largeChildSupp = null;
System.out.println("|L" + (LWidth + 1) + "| = " + L.length);
if( m_maxItemSize == LWidth + 1 ) return;
createAllLk(); // recursive call
}
/**
* @param L large itemsets
* @param firstChild array of first children
* @param lastChild array of last children
* @return number of all children
* @author Alexey Grinyuk
*/
private int createFirstAndLastChildArrays(int L[][], /*Out*/ int firstChild[], /*Out*/ int lastChild[]) {
int i, j;
int counter;
int LLength = L.length;
if(LLength < 2) return 0;
int LWidth = L[0].length;
for(i=0; i < LLength; i++) {
firstChild[i] = -1;
lastChild[i] = -1;
}
// Generate ordinal numbers for first and last child of each item L (firstChild and lastChild)
counter = 0;
for(i=0; i < LLength-1; i++) { // for each itemset in L (parent1)
for(j=i+1; j < LLength; j++) { // for each itemset in L (parent2)
if( arrayCompare(L[i], L[j], LWidth-1) ) { // if parent1 and parent2 can bear a child
if(firstChild[i] == -1) firstChild[i] = counter; // first child of parent1
lastChild[i] = counter; // last child of parent1
counter++;
} else break;
}
}
return counter;
}
/**
* @param TIDTable association table: Transaction ID -> large itemsets list
* @param firstChild array of first children
* @param lastChild array of last children
* @param childSupp supports for all children
* @return new TIDTable
* @author Alexey Grinyuk
*/
private int[][] calculateSupportAndBuildNewTIDTable(int TIDTable[][],
int firstChild[],
int lastChild[],
int childSupp[])
{
int i, TID, pos1, pos2;
int parent1, parent2;
int lastParent;
int child;
IntVector temp = new IntVector();
int tempSize;
int newTIDTable[][] = new int[TIDTable.length][];
int newTIDTableCounter = 0;
int rezult[][];
int childLastCustomer[];
// Allocate and initialize memory for last customers which increments child supp
childLastCustomer = new int[childSupp.length];
for(i=0; i<childLastCustomer.length; i++) childLastCustomer[i] = -1;
for(TID=0; TID<TIDTable.length; TID++) { // for each transaction in TIDTable
if(TIDTable[TID] == null) continue;
// for each itemset associated with transaction (for each parent1)
for(pos1=0; pos1 < TIDTable[TID].length-1; pos1++) {
parent1 = TIDTable[ TID ][ pos1 ];
if( parent1 == -1 ) continue;
if( firstChild[ parent1 ] == -1 ) continue;
// ordinal number (index in L[]) of last parent2
lastParent = parent1 + (lastChild[ parent1 ] - firstChild[ parent1 ] + 1);
// for each parent2
for(pos2 = pos1+1; pos2 < TIDTable[TID].length && TIDTable[TID][pos2] <= lastParent; pos2++) {
parent2 = TIDTable[ TID ][ pos2 ];
if(parent2 != -1) { // if cell of TIDTable[TID] not marked as removed
// ordinal number (index in childSupp[]) of child that born parent1 and parent2:
child = firstChild[ parent1 ] + (parent2 - parent1 - 1);
if(childLastCustomer[child] != CustomerID[TID]) {
childSupp[child]++;
childLastCustomer[child] = CustomerID[TID];
}
temp.addElement(child); // build row of new TIDTable
}
}
}
tempSize = temp.size(); // number of itemsets assosiated with transaction
if(tempSize > 0) {
// allocate memory for new row of new TIDTable
newTIDTable[TID] = new int[tempSize];
for(i=0; i<tempSize; i++) newTIDTable[TID][i] = temp.IntegerAt(i);
temp.clear();
}
}
return newTIDTable;
}
/**
* @param L large itemsets
* @param firstChild array of first children
* @param lastChild array of last children
* @param childSupp support for all children
* @param largeChildren array of large children
* @param largeChildSupp support for large children
* @author Alexey Grinyuk
*/
private void buildArrayOfLargeChildren(int L[][],
int firstChild[],
int lastChild[],
int childSupp[],
int largeChildren[][],
int largeChildSupp[])
{
int parent1, parent2;
int child;
int counter;
int LLength = L.length;
int LWidth = L[0].length;
counter = 0;
// for each itemset in L (for each parent1)
for(parent1=0; parent1<LLength; parent1++) {
// if this itemset have children
if(firstChild[parent1] != -1) {
// for all children of itemset
for(child = firstChild[parent1]; child <= lastChild[parent1]; child++) {
if(childSupp[child] >= m_minSuppInt) { // if itemset is large
largeChildSupp[counter] = childSupp[child];
largeChildren[counter] = new int[LWidth + 1];
// get parent2 of child
parent2 = parent1 + (child - firstChild[parent1] + 1);
// create child itemset
System.arraycopy(L[parent1], 0, largeChildren[counter], 0, LWidth);
largeChildren[counter][LWidth] = L[parent2][LWidth - 1];
counter++;
}
}
}
}
}
// << By Victor Borichev>>
private CustomTransSet convertMiningminingInputStream() throws MiningException {
fireMiningEvent(new ReadingBeginMessage(getAlgorithmLevel()));
CustomTransSet cts = new CustomTransSet();
Hashtable customers = new Hashtable();
int rows = 0;
while(miningInputStream.next())
{
MiningVector vector = miningInputStream.read();
// +++++++++ Invalid vector => ignore:
if (vector == null)
continue;
// --------- Invalid vector, ignore.
double custId = vector.getValue(customerId);
double transPos = vector.getValue(transactionPosition);
double itId = vector.getValue(itemId);
// Missing value => ignore line:
if ( Category.isMissingValue(custId) || Category.isMissingValue(transPos) || Category.isMissingValue(itId) )
continue;
Double value = new Double(custId); // customer id
Hashtable sequence = (Hashtable)customers.get(value); // its transactions
if(sequence == null)
{
sequence = new Hashtable();
customers.put(value,sequence);
}
Integer pos = new Integer((int)transPos);
ItemSet transaction = (ItemSet)sequence.get(pos);
if(transaction == null)
{
transaction = new ItemSet();
sequence.put(pos,transaction);
}
int item = (int)itId;
transaction.addItem(item);
rows++;
}
Enumeration cids = customers.keys();
while(cids.hasMoreElements()) {
Double cid = (Double)cids.nextElement();
Hashtable sequence = (Hashtable)customers.get(cid);
int size = sequence.size();
int[] poss = new int[size];
ItemSet[] itemsets = new ItemSet[size];
Enumeration em = sequence.keys();
int i = 0;
while(em.hasMoreElements()) {
Integer pos = (Integer)em.nextElement();
poss[i] = pos.intValue();
itemsets[i++] = (ItemSet)sequence.get(pos);
}
// sorting transactions by position
int j,min,element;
for(i=0;i<size;i++)
{
min = poss[i]; element = -1;
for(j=i+1;j<size;j++)
if(poss[j]<min)
{
min = poss[j];
element = j;
}
if(element!=-1)
{
int iswap = poss[i];
poss[i] = poss[element];
poss[element] = iswap;
ItemSet oswap = itemsets[i];
itemsets[i] = itemsets[element];
itemsets[element] = oswap;
}
}
TransactionSet ts = new TransactionSet();
for(i=0;i<size;i++)
ts.addTransaction(itemsets[i]);
cts.addCustomerTransSet(ts);
}
fireMiningEvent(new ReadingEndMessage(rows, getAlgorithmLevel()));
return cts;
}
/**
* AprioriAll. If a customerId, transactionPosition, or itemId has
* a missing value, the (customerId, transactionPosition, itemId)-tuple
* is ignored.
*/
public void runAlgorithm() throws MiningException
{
int i,j,k,l,m,n,q,w;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -