?? assocrulemining.java
字號:
/* -------------------------------------------------------------------------- */
/* */
/* ASSOCIATION RULE DATA MINING */
/* */
/* Frans Coenen */
/* */
/* Wednesday 9 January 2003 */
/* (revised 21/1/2003, 14/2/2003, 2/5/2003, 2/7/2003, 3/2/2004, 8/5/2004, */
/* 1/2/2005, 3/2/2005) */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* -------------------------------------------------------------------------- */
/* To compile: javac.exe AssocRuleMining.java */
// Java packages
import java.io.*;
import java.util.*;
// Java GUI packages
import javax.swing.*;
/** Set of utillities to support various Association Rule Mining (ARM)
algorithms.
@author Frans Coenen
@version 1 February 2005 */
public class AssocRuleMining extends JFrame {
/* ------ FIELDS ------ */
// Inner class for storing linked list of ARs or CARs as appropriate.
protected class RuleNode {
/** Antecedent of AR. */
protected short[] antecedent;
/** Consequent of AR. */
protected short[] consequent;
/** The confidence value associate with the rule represented by this
node. */
double confidenceForRule=0.0;
/** Link to next node */
RuleNode next = null;
/** Three argument constructor
@param ante the antecedent (LHS) of the AR.
@param cons the consequent (RHS) of the AR.
@param confValue the associated confidence value. */
protected RuleNode(short[] ante, short[]cons, double confValue) {
antecedent = ante;
consequent = cons;
confidenceForRule = confValue;
}
}
// Data structures
/** The reference to start of the rule list. */
protected RuleNode startRulelist = null;
/** 2-D aray to hold input data from data file. Note that within the data
array records are numbered from zero, thus rexcord one has index 0 etc. */
protected short[][] dataArray = null;
/** 2-D array used to renumber columns for input data in terms of
frequency of single attributes (reordering will enhance performance
for some ARM algorithms). */
protected int[][] conversionArray = null;
/** 1-D array used to reconvert input data column numbers to their
original numbering where the input data has been ordered to enhance
computational efficiency. */
protected short[] reconversionArray = null;
// Constants
/** Minimum support value */
private static final double MIN_SUPPORT = 0.0;
/** Maximum support value */
private static final double MAX_SUPPORT = 100.0;
/** Maximum confidence value */
private static final double MIN_CONFIDENCE = 0.0;
/** Maximum confidence value */
private static final double MAX_CONFIDENCE = 100.0;
// Command line arguments with default values and associated fields.
/** Command line argument for data file name. */
protected String fileName = null;
/** Command line argument for number of columns. */
protected int numCols = 0;
/** Command line argument for number of rows. */
protected int numRows = 0;
/** Command line argument for % support (default = 20%). */
protected double support = 20.0;
/** Minimum support value in terms of number of rows. <P>Set when input
data is read and the number of records is known, */
protected double minSupport = 0;
/** Command line argument for % confidence (default = 80%). */
protected double confidence = 80.0;
/** The number of one itemsets (singletons). */
protected int numOneItemSets = 0;
// Flags
/** Error flag used when checking command line arguments (default =
<TT>true</TT>). */
protected boolean errorFlag = true;
/** Input format OK flag( default = <TT>true</TT>). */
protected boolean inputFormatOkFlag = true;
/** Flag to indicate whether system has data or not. */
private boolean haveDataFlag = false;
/** Flag to indicate whether input data has been sorted or not. */
protected boolean isOrderedFlag = false;
/** Flag to indicate whether input data has been sorted and pruned or
not. */
protected boolean isPrunedFlag = false;
// Other fields
/** The input stream. */
protected BufferedReader fileInput;
/** The file path */
protected File filePath = null;
/* ------ CONSTRUCTORS ------ */
/** Processes command line arguments */
public AssocRuleMining(String[] args) {
// Process command line arguments
for(int index=0;index<args.length;index++) idArgument(args[index]);
// If command line arguments read successfully (errorFlag set to "true")
// check validity of arguments
if (errorFlag) CheckInputArguments();
else outputMenu();
}
/* ------ METHODS ------ */
/* ---------------------------------------------------------------- */
/* */
/* COMMAND LINE ARGUMENTS */
/* */
/* ---------------------------------------------------------------- */
/* IDENTIFY ARGUMENT */
/** Identifies nature of individual command line agruments:
-C = confidence, -F = file name, -S = support. */
protected void idArgument(String argument) {
if (argument.charAt(0) == '-') {
char flag = argument.charAt(1);
argument = argument.substring(2,argument.length());
switch (flag) {
case 'C':
confidence = Double.parseDouble(argument);
break;
case 'F':
fileName = argument;
break;
case 'S':
support = Double.parseDouble(argument);
break;
default:
System.out.println("INPUT ERROR: Unrecognise command " +
"line argument -" + flag + argument);
errorFlag = false;
}
}
else {
System.out.println("INPUT ERROR: All command line arguments " +
"must commence with a '-' character (" +
argument + ")");
errorFlag = false;
}
}
/* CHECK INPUT ARGUMENTS */
/** Invokes methods to check values associate with command line
arguments */
protected void CheckInputArguments() {
// Check support and confidence input
checkSupportAndConfidence();
// Check file name
checkFileName();
// Return
if (errorFlag) outputSettings();
else outputMenu();
}
/* CHECK SUPPORT AND CONFIDANCE */
/** Checks support and confidence input % values, if either is out of
bounds then <TT>errorFlag</TT> set to <TT>false</TT>. */
protected void checkSupportAndConfidence() {
// Check Support
if ((support < MIN_SUPPORT) || (support > MAX_SUPPORT)) {
System.out.println("INPUT ERROR: Support must be specified " +
"as a percentage (" + MIN_SUPPORT +
" - " + MAX_SUPPORT + ")");
errorFlag = false;
}
// Check confidence
if ((confidence < MIN_CONFIDENCE) || (confidence > MAX_CONFIDENCE)) {
System.out.println("INPUT ERROR: Confidence must be " +
"specified as a percentage (" + MIN_CONFIDENCE +
" - " + MAX_CONFIDENCE + ")");
errorFlag = false;
}
}
/* CHECK FILE NAME */
/** Checks if data file name provided, if not <TT>errorFlag</TT> set
to <TT>false</TT>. */
protected void checkFileName() {
if (fileName == null) {
System.out.println("INPUT ERROR: Must specify file name (-F)");
errorFlag = false;
}
}
/* ---------------------------------------------------------------- */
/* */
/* READ INPUT DATA FROM FILE */
/* */
/* ---------------------------------------------------------------- */
/* INPUT DATA SET */
/** Commences process of getting input data (GUI version also exists). */
public void inputDataSet() {
// Read the file
readFile();
// Check ordering (only if input format is OK)
if (inputFormatOkFlag) {
if (checkOrdering()) {
System.out.println("Number of records = " + numRows);
countNumCols();
System.out.println("Number of columns = " + numCols);
minSupport = (numRows * support)/100.0;
System.out.println("Min support = " +
twoDecPlaces(minSupport) + " (records)");
}
else {
System.out.println("Error reading file: " + fileName + "\n");
closeFile();
System.exit(1);
}
}
}
/* READ FILE */
/** Reads input data from file specified in command line argument
<TT>fileName</TT>. <P>Note that it is assumed
that no empty records are included. Proceeds as follows:
<OL>
<LI>Gets number of rows (lines) in file, checking format of each line
(space separated integers), if incorrectly formatted line found
<TT>inputFormatOkFlag</TT> set to <TT>false</TT>.
<LI>Dimensions input array.
<LI>Reads data
</OL> */
protected void readFile() {
try {
// Dimension data structure
inputFormatOkFlag=true;
numRows = getNumberOfLines(fileName);
if (inputFormatOkFlag) {
dataArray = new short[numRows][];
// Read file
System.out.println("Reading input file: " + fileName);
readInputDataSet();
}
else System.out.println("Error reading file: " + fileName + "\n");
}
catch(IOException ioException) {
System.out.println("Error reading File");
closeFile();
System.exit(1);
}
}
/* GET NUMBER OF LINES */
/** Gets number of lines/records in input file and checks format of each
line.
@param nameOfFile the filename of the file to be opened.
@return the number of rows in the given file. */
protected int getNumberOfLines(String nameOfFile) throws IOException {
int counter = 0;
// Open the file
if (filePath==null) openFileName(nameOfFile);
else openFilePath();
// Loop through file incrementing counter
// get first row.
String line = fileInput.readLine();
while (line != null) {
checkLine(counter+1,line);
StringTokenizer dataLine = new StringTokenizer(line);
int numberOfTokens = dataLine.countTokens();
if (numberOfTokens == 0) break;
counter++;
line = fileInput.readLine();
}
// Close file and return
closeFile();
return(counter);
}
/* CHECK LINE */
/** Check whether given line from input file is of appropriate format
(space separated integers), if incorrectly formatted line found
<TT>inputFormatOkFlag</TT> set to <TT>false</TT>.
@param counter the line number in the input file.
@param str the current line from the input file. */
protected void checkLine(int counter, String str) {
for (int index=0;index <str.length();index++) {
if (!Character.isDigit(str.charAt(index)) &&
!Character.isWhitespace(str.charAt(index))) {
JOptionPane.showMessageDialog(null,"FILE INPUT ERROR:\n" +
"charcater on line " + counter +
" is not a digit or white space");
inputFormatOkFlag = false;
haveDataFlag = false;
break;
}
}
}
/* READ INPUT DATA SET */
/** Reads input data from file specified in command line argument. */
public void readInputDataSet() throws IOException {
readInputDataSet(fileName);
}
/* READ INPUT DATA SET */
/** Reads input data from given file.
@param fName the given file name. */
protected void readInputDataSet(String fName) throws IOException {
int rowIndex=0;
// Open the file
if (filePath==null) openFileName(fName);
else openFilePath();
// Get first row.
String line = fileInput.readLine();
// Preocess rest of file
while (line != null) {
// Process line
if (!processInputLine(line,rowIndex)) break;
// Increment first (row) index in 2-D data array
rowIndex++;
// get next line
line = fileInput.readLine();
}
// Close file
closeFile();
}
/* READ INPUT DATA SEGMENT */
/** Reads input data segment from a given file and places content into to
the data array structure commencing at the given row index, continues until
the end index is rerached.
@param fName the given file name.
@param startRowIndex the given row strat index.
@param endRowIndex the given row end index. */
protected void readInputDataSetSeg(String fName, int startRowIndex,
int endRowIndex) throws IOException {
int rowIndex=startRowIndex;
// Open the file
if (filePath==null) openFileName(fName);
else openFilePath();
// get first row.
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -