?? aprioriall.java
字號:
/**
* Title: XELOPES Data Mining Library
* Description: Java Data Mining API. Supported standarts: <a href="http://www.dmg.org">Predictive Model Markup Language (PMML 2.0) </a>; <a href="http://www.omg.org/cwm">DataMining specification for Common Warehouse Metamodel (OMG)</a>.
* Copyright: Copyright (c) 2002 Prudential Systems Software GmbH
* Company: <a href="mailto:valentine.stepanenko@zsoft.ru">ZSoft, Spb, Russia</a>
* @author Victor Borichev
* @author Valentine Stepanenko (valentine.stepanenko@zsoft.ru)
* @version 1.0
*/
package com.prudsys.pdm.Models.CustomerSeq.Algorithms;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
import com.prudsys.pdm.Core.Category;
import com.prudsys.pdm.Core.MiningException;
import com.prudsys.pdm.Core.Event.Status.CreationModelBeginMessage;
import com.prudsys.pdm.Core.Event.Status.ReadingBeginMessage;
import com.prudsys.pdm.Core.Event.Status.ReadingEndMessage;
import com.prudsys.pdm.Input.MiningVector;
import com.prudsys.pdm.Models.AssociationRules.ItemSet;
import com.prudsys.pdm.Models.CustomerSeq.CustomSequence;
import com.prudsys.pdm.Models.CustomerSeq.CustomerSequentialAlgorithm;
import com.prudsys.pdm.Models.Sequential.IntVec;
import com.prudsys.pdm.Models.Sequential.Algorithms.Seq.TransactionSet;
import com.prudsys.pdm.Utils.IntHashtable;
import com.prudsys.pdm.Utils.IntVector;
public class AprioriAll extends CustomerSequentialAlgorithm {
Vector rules = new Vector();
/** Customer transaction set */
private CustomTransSet customerTransSet = null;
/**
* for litemsets phase of algorithm
*/
/** Used for Lk on all steps of algorithm */
private int L[][];
/** Used for association table: Transaction ID -> large itemsets list */
private int TIDTable[][];
/** Used for customer ID of each transaction */
private int CustomerID[];
/** All Lk */
private Vector allLargeItemsets;
/** Supports for all Lk */
private Vector allLargeItemsetsSupp;
/** TID's */
private Vector allTIDs;
/** all litemsets */
private ItemSet[] allLits;
/** Minimum number of supporting transactions, only used if >0. */
private int minimumSupportCount = 0;
/** Minimum support count */
private int m_minSuppInt;
/** Minimum item site. */
private int m_minItemSize = 1;
/** Maximum item size. */
private int m_maxItemSize = -1;
/**
* for sequential phases of algorithm
*/
/** all large sequences */
private Vector allLargeSequences;
/** debug level */
private int debug = 1;
/** maximal length of row of tidtable */
private int maxTidSize = 0;
/** B1[i] = false if L1[i] doesn't make any child */
private boolean[] B1;
/** vector of all Bk for all Lk except for last*/
private Vector allBk;
/** all litemset from apriori phase as an arrays */
private int[][] allLitsArr;
/** all litemsets supports */
private int[] allLitsSupp;
/** d pruning phase */
private boolean doPruning = true;
public boolean getDoPruning() { return doPruning; }
public void setDoPruning( boolean p ) { doPruning = p; }
/**
* Returns minimum number of transactions containing item pairs.
* The relative minimum support value is defined by the minimumSupport
* parameter.
*
* @return minimum support count
*/
public int getMinimumSupportCount()
{
return minimumSupportCount;
}
/**
* Sets minimum number of transactions containing item pairs.
* The relative minimum support value is defined by the minimumSupport
* parameter.
*
* @param minimumSupportCount new minimum support count
*/
public void setMinimumSupportCount(int minimumSupportCount)
{
this.minimumSupportCount = minimumSupportCount;
}
/**
* The method getMinSupport returns the minimum support.
*
* @return minimum support
*/
public double getMinSupport() {
return minimumSupport;
}
/**
* The method getMinSupport sets the minimum support.
*
* @param minimum support to set
*/
public void setMinSupport(double minSupp) {
minimumSupport = minSupp;
}
public int getMinItemSize() {
return m_minItemSize;
}
public void setMinItemSize(int minItemSize) {
m_minItemSize = minItemSize;
}
public int getMaxItemSize() {
return m_maxItemSize;
}
public void setMaxItemSize(int maxItemSize) {
m_maxItemSize = maxItemSize;
}
/**
* Checks mining algorithm for completeness by calling verify method
* of superclass. Adiitionally, it checks whether minimumItemSize and
* maximumItemSize are admissible.
*
* @throws IllegalArgumentException if some algorithm attributes are incorrect
*/
public void verify() throws IllegalArgumentException
{
super.verify();
if (m_minItemSize < 0)
throw new IllegalArgumentException("minimumItemSize can't be negative");
if (m_maxItemSize > -1 && m_minItemSize > m_maxItemSize)
throw new IllegalArgumentException("minimumItemSize can't be larger than maximumItemSize");
if (generateRules && doPruning)
throw new IllegalArgumentException("can't generate rules with pruning");
}
// << By Alexey Grinyuk >>
/**
* Implements the Algorithm AprioriTID.
*
* @param transSet transaction set
* @return result as list of association rules
* @author Alexey Grinyuk
*/
private void apriori(CustomTransSet customTransSet) {
int L1Supp[];
m_minSuppInt = minimumSupportCount;
if (m_minSuppInt <= 0)
m_minSuppInt = (int)Math.ceil( minimumSupport * customTransSet.getSize() );
System.out.println("---------minSuppCount: " + m_minSuppInt + "--------");
// build L1 and TIDTable
L1Supp = createL1(customTransSet);
sortArrays(L, L1Supp);
createTIDTable(customTransSet);
System.out.println("|L1| = " + L.length);
// add L1 to general result
allLargeItemsets.add(L);
allLargeItemsetsSupp.add(L1Supp);
allTIDs.add(TIDTable);
createAllLk(); // recursive function for build next Lk
System.out.println("createAllLk finished");
}
/**
* Create L1 and store it in L[]
*
* @param transSet transaction set
* @return supports for L1
* @author Alexey Grinyuk
*/
private int[] createL1(CustomTransSet customTransSet)
{
int i, j, item, supp;
int customer;
IntHashtable hashtable = new IntHashtable();
IntHashtable hashtableTest = new IntHashtable();
Integer suppCount;
Integer test;
int newItem;
boolean addFlag;
TransactionSet transSet;
ItemSet transactionItemSet;
int itemCounter;
int tmpL[][];
int tmpLSupp[];
int result[];
for(customer=0; customer<customTransSet.getSize(); customer++) {
transSet = customTransSet.getCustomerTransSet(customer);
// for all transactions
for(i=0; i<transSet.getSize(); i++) {
transactionItemSet = (ItemSet)transSet.getTransactionAt(i);
// for all items of transaction
for(j=0; j<transactionItemSet.getSize(); j++) {
newItem = transactionItemSet.getItemAt(j);
test = hashtableTest.get(newItem);
// if item not found in hashtableTest
if(test == null) {
hashtableTest.put(newItem, customer); // add item to hashtableTest
addFlag = true;
} else if( test.equals(new Integer(customer)) ) {
addFlag = false;
} else {
addFlag = true;
hashtableTest.put(newItem, customer);
}
if(addFlag) {
suppCount = hashtable.get(newItem);
// if item not found in hashtable
if(suppCount == null) {
// add item to hashtable
hashtable.put(newItem, 1);
} else {
// increment support counter
hashtable.put(newItem, suppCount.intValue() + 1);
}
}
}
}
}
itemCounter = hashtable.size();
tmpL = new int[itemCounter][1]; // allocate memory for L
tmpLSupp = new int[itemCounter]; // allocate memory for supports
Enumeration em = hashtable.keys();
i = 0;
// find large itemsets (build L1)
while(em.hasMoreElements()) {
item = ((Integer)em.nextElement()).intValue();
supp = ((Integer)hashtable.get( item )).intValue();
if(supp >= m_minSuppInt) {
tmpL[i][0] = item;
tmpLSupp[i] = supp;
i++;
}
}
L = new int[i][1];
result = new int[i];
// rebuild arrayas
for(j=0; j<i; j++) {
L[j] = tmpL[i - j - 1];
result[j] = tmpLSupp[i - j - 1];
}
// return supports for L1
return result;
}
/**
* Create association table: Transaction ID -> large itemsets list
* Store rezult in TIDTable[][]
*
* @param transSet transaction set
* @author Alexey Grinyuk
*/
private void createTIDTable(CustomTransSet customTransSet) {
int i, j, customer, transactionSize;
int counter = 0;
int transactionCounter = 0;
int customerTransCounter;
TransactionSet transSet;
ItemSet transactionItemSet;
for(i=0; i<customTransSet.getSize(); i++) {
transSet = customTransSet.getCustomerTransSet(i);
transactionCounter += transSet.transactionList.size();
}
TIDTable = new int[transactionCounter][];
CustomerID = new int[transactionCounter];
for(customer=0; customer<customTransSet.getSize(); customer++) {
transSet = customTransSet.getCustomerTransSet(customer);
customerTransCounter = transSet.transactionList.size();
// for all transactions
for(i=0; i<customerTransCounter; i++, counter++) {
transactionItemSet = (ItemSet)transSet.transactionList.get(i);
transactionSize = transactionItemSet.getSize();
TIDTable[counter] = new int[transactionSize];
for(j=0; j<transactionSize; j++) {
// TIDTable[counter][j] = reference to item L1
TIDTable[counter][j] = findInArray( L, transactionItemSet.getItemAt(j) );
}
CustomerID[counter] = customer;
}
}
}
/**
* Sort L1 (int[][1])
*
* @param arrayL - L1
* @param arraySupp - supports for L1
*/
private void sortArrays(int arrayL[][], int arraySupp[]) {
int i, j, k, tmpVal, tmpSupp;
int arraySize = arrayL.length;
for(i=0; i < arraySize-1; i++) {
for(j=0; j < arraySize-1-i; j++) {
if(arrayL[j][0] > arrayL[j + 1][0]) {
tmpVal = arrayL[j + 1][0];
tmpSupp = arraySupp[j + 1];
arrayL[j + 1][0] = arrayL[j][0];
arraySupp[j + 1] = arraySupp[j];
arrayL[j][0] = tmpVal;
arraySupp[j] = tmpSupp;
}
}
}
}
/**
* Find in array int[][1]
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -