?? myapriori.java
字號:
package apriori;
/**
* formatted with JxBeauty (c) johann.langhofer@nextra.at<br>
*/
import java.util.*;
import java.sql.*;
import oracle.jdbc.pool.*;
/**
* A program to find association rules with the apriori algorithm (Agrawal et al. 1993).<br>
* Other than the standard apriori algorithm, this program enable to find<br>
* apriori all relation.<br>
*<br>
* The program able to run on any SQL92 enable database which have the schema<br>
* like this:<br>
* <pre>
* Table "web_record"
* Attribute | Type | Modifier
* -------------+----------------------+----------
* customer_id | character varying(5) | not null
* view_seq | integer | not null
* vroot_id | character varying(4) | not null
* </pre>
*<br>
* Which are the customerID, transactionSequence and transactionID.<br>
* You should able to modify all data that can be run apriori all algorithm with<br>
* this data structure.<br>
*<br>
* Here is an data parser example to parse the Microsoft Anonymous Web Data<br>
* http://kdd.ics.uci.edu/databases/msweb/msweb.html<br>
* <br>
* Here is the example running command:<br>
* $JAVA_HOME/bin/java -Xmx128m Apriori -s 1 -c 5 -seq<br>
* $JAVA_HOME/bin/java -Xmx128m Apriori -s 2 -c 8<br>
* And the result will display on the screen<br>
*
* The command line option:<br>
* -s: support value, 1 mean only support larger than 1% will display<br>
* -c: confident value, 5 mean only confident larger than 5% will display<br>
* -seq: the program will run apriori all algorithm with this value<br>
*/
public class myApriori {
private String strDriver = "com.microsoft.jdbc.sqlserver.SQLServerDriver";
private String url = "jdbc:microsoft:sqlserver://127.0.0.1:1433;DatabaseName=master;SelectMethod=cursor";
private String tableName = "web_record";
private String countField = "counts";
private String customerIdField = "vroot_id";
private String transactionIdField = "customer_id";
private String transactionSequenceField = "view_seq";
private double support;
private double confident;
private boolean aprioriAll;
private Connection conn;
private int totalCustomer = 0;
private HashMap supportCount; // Held the itemSet and the support of that itemSet
private ResultSet execSQL (String sql) throws Exception {
PreparedStatement stmt = conn.prepareStatement(sql);
return stmt.executeQuery();
}
private void addRule (List values, List supports, List confidences, List allItemSets,
List thisSubSet, List remainSet, float wholeCount, float baseCount) {
if (aprioriAll) {
List checkList = new ArrayList((List)thisSubSet);
checkList.addAll(remainSet);
if (allItemSets.contains(checkList)) {
values.add(remainSet);
supports.add(new Float((float)wholeCount/(float)totalCustomer));
confidences.add(new Float((float)wholeCount/(float)baseCount));
}
}
else {
values.add(remainSet);
supports.add(new Float((float)wholeCount/(float)totalCustomer));
confidences.add(new Float((float)wholeCount/(float)baseCount));
}
}
/**
* The default construstor, specific the support, confident value, and run
* apriori all algorithm or not
* @param double support
* @param double confident
* @param boolean aprioriAll
*/
public myApriori (double support, double confident, boolean aprioriAll) throws Exception
{
/* String connectURI = "jdbc:oracle:thin:@192.168.0.224:1521:orahzoa";
String userName = "apriori";
String passWord = "apriori";
Class.forName("oracle.jdbc.driver.OracleDriver").newInstance();
conn = DriverManager.getConnection(connectURI, userName, passWord);
*/
String userName = "sa";
String passWord = "sa";
Class.forName(strDriver).newInstance();
conn = DriverManager.getConnection(url, userName, passWord);
// Class.forName(strDriver).newInstance();
// conn = DriverManager.getConnection(url);
supportCount = new HashMap();
this.support = support;
this.confident = confident;
this.aprioriAll = aprioriAll;
}
/**
* The method to parse the string array argumnents
* @param String[] args
* @return
* @exception Exception
*/
public static myApriori init (String[] args) throws Exception
{
double confidence = 0.0;
double support = 0.0;
boolean aprioriAll = false;
if (args.length > 0) {
for (int i = 0; i < args.length; i++) {
if ("-s".equals(args[i])) {
int tempSupport = Integer.parseInt(args[i + 1]);
if (tempSupport > 0) {
support = (double)(tempSupport)/100;
}
i++;
}
if ("-c".equals(args[i])) {
int tempConfidence = Integer.parseInt(args[i + 1]);
if (tempConfidence > 0) {
confidence = (double)(tempConfidence)/100;
}
i++;
}
if ("-seq".equals(args[i])) {
aprioriAll = true;
}
}
}
return new myApriori(support, confidence, aprioriAll);
}
/**
* Find the total no. of customer and use this to calculate the support for
* the rules belong to that customer.
* @return
* @exception Exception
*/
public int getTotalCustomer () throws Exception {
if (totalCustomer == 0) {
String sql = "select count(distinct("+customerIdField+")) from "+tableName+"";
ResultSet rs = execSQL(sql);
if (rs.next())
totalCustomer = rs.getInt(1);
}
return totalCustomer;
}
/**
* Construct the SQL<br>
* <br>
* 1) select a0.transactionId, count(a0.customerId) from tableName a0 group by a0.transactionId;<br>
* 2) select a0.transactionId, a1.transactionId, count(a0.customerId) from tableName a0, tableName a1 where<br>
* a0.transactionSequence < a1.transactionSequence and a0.customerId=a1.customerId and <br>
* a0.transactionId in ([previous appear item]) and a0.transactionId in ([previous appear item])<br>
* group by a0.transactionId, a1.transactionId;<br>
* .....<br>
* .....<br>
* 3) select a0.transactionId, ...., aN.transactionId, count(a0.customerId) from tableName a0, ......, tableName aN where <br>
* a0.transactionSequence < a1.transactionSequence and .... and a(N-1).transactionSequence < aN.transactionSequence and<br>
* a0.customerId=a1.customerId and .... and a(N-1).customerId=aN.customerId<br>
* a0.transactionId in ([previous appear item]) and .... and aN.transactionId in ([previous appear item])<br>
* group by a0.transactionId, ...., aN.transactionId;<br>
*<br>
* If we not do the ApriorAll algorithm, we just need to remove the statement:<br>
* a0.transactionSequence < a1.transactionSequence and .... and a(N-1).transactionSequence < aN.transactionSequence<br>
* and add the following statement:<br>
* a0.transactionId < a1.transactionId and .... and a(N-1).transactionId < aN.transactionId<br>
* because we can remove the duplication entries<br>
*
* @param noOfItem the SQL of that no. of itemsets
* @param previous the previous items find
* @return The SQL
*/
public String getSQL (int noOfItem, List previous) {
String transactionIDs = " a0."+transactionIdField+"";
String tables = " "+tableName+" a0";
String aprioriAllFreq = "";
String skipDupTransaction = "";
String customerIDs = "";
String previousItems = "";
String searchItems = " ";
if (previous != null) {
StringBuffer sb = new StringBuffer(100);
// Extrace the items from the previous itemsets gotten
for (int i = 0; i < previous.size(); i++) {
ArrayList itemSet = (ArrayList)previous.get(i);
for (int j = 0; j < itemSet.size(); j++) {
String item = Integer.toString(((Integer)itemSet.get(j)).intValue());
if ( sb.toString().indexOf(item) == -1 )
sb.append(",").append(item);
}
}
previousItems = sb.substring(1); //Don't need the first ','
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -