?? assocrulemining.java
字號:
String line = fileInput.readLine();
for (int index=startRowIndex;index<endRowIndex;index++) {
// Process line
processInputLine(line,index);
// get next line
line = fileInput.readLine();
}
// Close file
closeFile();
}
/* PROCESS INPUT LINE */
/** Processes a line from the input file and places it in the
<TT>dataArray</TT> structure.
@param line the line to be processed from the input file
@param rowIndex the index to the current location in the
<TT>dataArray</TT> structure.
@rerturn true if successfull, false if empty record. */
private boolean processInputLine(String line, int rowIndex) {
// If no line return false
if (line==null) return(false);
// Tokenise line
StringTokenizer dataLine = new StringTokenizer(line);
int numberOfTokens = dataLine.countTokens();
// Empty line or end of file found, return false
if (numberOfTokens == 0) return(false);
// Convert input string to a sequence of short integers
short[] code = binConversion(dataLine,numberOfTokens);
// Dimension row in 2-D dataArray
int codeLength = code.length;
dataArray[rowIndex] = new short[codeLength];
// Assign to elements in row
for (int colIndex=0;colIndex<codeLength;colIndex++)
dataArray[rowIndex][colIndex] = code[colIndex];
// Return
return(true);
}
/* CHECK DATASET ORDERING */
/** Checks that data set is ordered correctly.
@return true if appropriate ordering, false otherwise. */
protected boolean checkOrdering() {
boolean result = true;
// Loop through input data
for(int index=0;index<dataArray.length;index++) {
if (!checkLineOrdering(index+1,dataArray[index])) {
haveDataFlag = false;
result=false;
}
}
// Return
return(result);
}
/* CHECK LINE ORDERING */
/** Checks whether a given line in the input data is in numeric sequence.
@param lineNum the line number.
@param itemSet the item set represented by the line
@return true if OK and false otherwise. */
protected boolean checkLineOrdering(int lineNum, short[] itemSet) {
for (int index=0;index<itemSet.length-1;index++) {
if (itemSet[index] >= itemSet[index+1]) {
JOptionPane.showMessageDialog(null,"FILE FORMAT ERROR:\n" +
"Attribute data in line " + lineNum +
" not in numeric order");
return(false);
}
}
// Default return
return(true);
}
/* COUNT NUMBER OF COLUMNS */
/** Counts number of columns represented by input data. */
protected void countNumCols() {
int maxAttribute=0;
// Loop through data array
for(int index=0;index<dataArray.length;index++) {
int lastIndex = dataArray[index].length-1;
if (dataArray[index][lastIndex] > maxAttribute)
maxAttribute = dataArray[index][lastIndex];
}
numCols = maxAttribute;
numOneItemSets = numCols; // default value only
}
/* OPEN FILE NAME */
/** Opens input file using fileName (instance field).
@param nameOfFile the filename of the file to be opened. */
protected void openFileName(String nameOfFile) {
try {
// Open file
FileReader file = new FileReader(nameOfFile);
fileInput = new BufferedReader(file);
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(this,"Error Opening File",
"Error: ",JOptionPane.ERROR_MESSAGE);
System.exit(1);
}
}
/* OPEN FILE PATH */
/** Opens file using filePath (instance field). */
protected void openFilePath() {
try {
// Open file
FileReader file = new FileReader(filePath);
fileInput = new BufferedReader(file);
}
catch(IOException ioException) {
JOptionPane.showMessageDialog(this,"Error Opening File",
"Error: ",JOptionPane.ERROR_MESSAGE);
System.exit(1);
}
}
/* CLOSE FILE */
/** Close file fileName (instance field). */
protected void closeFile() {
if (fileInput != null) {
try {
fileInput.close();
}
catch (IOException ioException) {
JOptionPane.showMessageDialog(this,"Error Closeing File",
"Error: ",JOptionPane.ERROR_MESSAGE);
System.exit(1);
}
}
}
/* BINARY CONVERSION. */
/** Produce an item set (array of elements) from input
line.
@param dataLine row from the input data file
@param numberOfTokens number of items in row
@return 1-D array of short integers representing attributes in input
row */
protected short[] binConversion(StringTokenizer dataLine,
int numberOfTokens) {
short number;
short[] newItemSet = null;
// Load array
for (int tokenCounter=0;tokenCounter < numberOfTokens;tokenCounter++) {
number = new Short(dataLine.nextToken()).shortValue();
newItemSet = realloc1(newItemSet,number);
}
// Return itemSet
return(newItemSet);
}
/* ---------------------------------------------------------------- */
/* */
/* REORDER DATA SET ACCORDING TO ATTRIBUTE FREQUENCY */
/* */
/* ---------------------------------------------------------------- */
/* REORDER INPUT DATA: */
/** Reorders input data according to frequency of
single attributes. <P> Example, given the data set:
<PRE>
1 2 5
1 2 3
2 4 5
1 2 5
2 3 5
</PRE>
This would produce a countArray (ignore index 0):
<PRE>
+---+---+---+---+---+---+
| | 1 | 2 | 3 | 4 | 5 |
+---+---+---+---+---+---+
| | 3 | 5 | 2 | 1 | 4 |
+---+---+---+---+---+---+
</PRE>
Which sorts to:
<PRE>
+---+---+---+---+---+---+
| | 2 | 5 | 1 | 3 | 4 |
+---+---+---+---+---+---+
| | 5 | 4 | 3 | 2 | 1 |
+---+---+---+---+---+---+
</PRE>
Giving rise to the conversion Array of the form (no index 0):
<PRE>
+---+---+---+---+---+---+
| | 3 | 1 | 4 | 5 | 2 |
+---+---+---+---+---+---+
| | 3 | 5 | 2 | 1 | 4 |
+---+---+---+---+---+---+
</PRE>
Note that the second row here are the counts which no longer play a role
in the conversion exercise. Thus to the new column number for column 1 is
column 3 (i.e. the first vale at index 1). The reconversion array of the
form:
<PRE>
+---+---+---+---+---+---+
| | 2 | 5 | 1 | 3 | 4 |
+---+---+---+---+---+---+
</PRE> */
public void idInputDataOrdering() {
// Count singles and store in countArray;
int[][] countArray = countSingles();
// Bubble sort count array on support value (second index)
orderCountArray(countArray);
// Define conversion and reconversion arrays
defConvertArrays(countArray);
// Set sorted flag
isOrderedFlag = true;
}
/* COUNT SINGLES */
/** Counts number of occurrences of each single attribute in the
input data.
@return 2-D array where first row represents column numbers
and second row represents support counts. */
protected int[][] countSingles() {
// Dimension and initialize count array
int[][] countArray = new int[numCols+1][2];
for (int index=0;index<countArray.length;index++) {
countArray[index][0] = index;
countArray[index][1] = 0;
}
// Step through input data array counting singles and incrementing
// appropriate element in the count array
for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) {
if (dataArray[rowIndex] != null) {
for (int colIndex=0;colIndex<dataArray[rowIndex].length;
colIndex++)
countArray[dataArray[rowIndex][colIndex]][1]++;
}
}
// Return
return(countArray);
}
/* ORDER COUNT ARRAY */
/** Bubble sorts count array produced by <TT>countSingles</TT> method
so that array is ordered according to frequency of single items.
@param countArray The 2-D array returned by the <TT>countSingles</TT>
method. */
private void orderCountArray(int[][] countArray) {
int attribute, quantity;
boolean isOrdered;
int index;
do {
isOrdered = true;
index = 1;
while (index < (countArray.length-1)) {
if (countArray[index][1] >= countArray[index+1][1]) index++;
else {
isOrdered=false;
// Swap
attribute = countArray[index][0];
quantity = countArray[index][1];
countArray[index][0] = countArray[index+1][0];
countArray[index][1] = countArray[index+1][1];
countArray[index+1][0] = attribute;
countArray[index+1][1] = quantity;
// Increment index
index++;
}
}
} while (isOrdered==false);
}
/* ORDER FIRST N ELEMENTS IN COUNT ARRAY */
/** Bubble sorts first N elements in count array produced by
<TT>countSingles</TT> method so that array is ordered according to
frequency of single items. <P> Used when ordering classification input
data.
@param countArray The 2-D array returned by the <TT>countSingles</TT>
method.
@param endIndex the index of the Nth element. */
protected void orderFirstNofCountArray(int[][] countArray, int endIndex) {
int attribute, quantity;
boolean isOrdered;
int index;
do {
isOrdered = true;
index = 1;
while (index < endIndex) {
if (countArray[index][1] >= countArray[index+1][1]) index++;
else {
isOrdered=false;
// Swap
attribute = countArray[index][0];
quantity = countArray[index][1];
countArray[index][0] = countArray[index+1][0];
countArray[index][1] = countArray[index+1][1];
countArray[index+1][0] = attribute;
countArray[index+1][1] = quantity;
// Increment index
index++;
}
}
} while (isOrdered==false);
}
/* DEFINE CONVERSION ARRAYS: */
/** Defines conversion and reconversion arrays.
@param countArray The 2-D array sorted by the <TT>orderCcountArray</TT>
method.*/
protected void defConvertArrays(int[][] countArray) {
// Dimension arrays
conversionArray = new int[numCols+1][2];
reconversionArray = new short[numCols+1];
// Assign values
for(int index=1;index<countArray.length;index++) {
conversionArray[countArray[index][0]][0] = index;
conversionArray[countArray[index][0]][1] = countArray[index][1];
reconversionArray[index] = (short) countArray[index][0];
}
// Diagnostic ouput if desired
//outputConversionArrays();
}
/* RECAST INPUT DATA. */
/** Recasts the contents of the data array so that each record is ordered
according to conversion array.
<P>Proceed as follows:
1) For each record in the data array. Create an empty new itemSet array.
2) Place into this array attribute/column numbers that correspond to the
appropriate equivalents contained in the conversion array.
3) Reorder this itemSet and return into the data array. */
public void recastInputData() {
short[] itemSet;
int attribute;
// Step through data array using loop construct
for(int rowIndex=0;rowIndex<dataArray.length;rowIndex++) {
itemSet = new short[dataArray[rowIndex].length];
// For each element in the itemSet replace with attribute number
// from conversion array
for(int colIndex=0;colIndex<dataArray[rowIndex].length;colIndex++) {
attribute = dataArray[rowIndex][colIndex];
itemSet[colIndex] = (short) conversionArray[attribute][0];
}
// Sort itemSet and return to data array
sortItemSet(itemSet);
dataArray[rowIndex] = itemSet;
}
}
/* RECAST INPUT DATA AND REMOVE UNSUPPORTED SINGLE ATTRIBUTES. */
/** Recasts the contents of the data array so that each record is
ordered according to ColumnCounts array and excludes non-supported
elements. <P> Proceed as follows:
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -