?? instancelist.java
字號:
package shared;
import java.lang.*;
import java.io.*;
import java.util.*;
/** The InstanceList class provides basic functions for ordered lists of
* instances. Instances may be labelled or unlabelled. Depending on
* usage, the list may or may not keep counts about various data in the list.
* These counts are kept in the BagCounters class. <P>
* Assumptions : <P>
* File format follows Quinlan (pp. 81-83) EXCEPT: <BR>
* 1) , : | \ . do not appear in names <BR>
* 2) . at end of lists is optional <BR>
* 3) a word that appears before the labels are enumerated, that is preceded by
* \ is interpreted as a modifier. Currently, the only implemented modifier is
* "weighted", which indicates that the list will be weighted. This means that
* labels are assumed to be nominal type for read_names(). <P>
* Comments : <P>
* Line numbers given are the result of '\n', not wrapping of lines. <P>
* "continuous" is not a legal name for the first nominal attribute value; it
* is reserved to indicate a continuous(RealAttrInfo) attribute. <P>
* "discrete" is not a legal name for the first nominal attribute value; it is
* reserved to indicate a discrete (NominalAttrInfo) attribute but with a
* dynamic set of values to be specified as they appear in the data file. <P>
* "discrete n" is supported, where n is an estimate of the number of values of
* the attribute. <P>
* "nolabel" may be specified as the label field ONLY. If specified, it
* indicates an unlabelled list. <P>
*
* Enhancements : <P>
* Cause fatal_error() if read_names() is called by methods other than the
* constructor or InstanceList.read_names() <P>
* Extend read_attributes to handle AttrInfo other than NominalAttrInfo and
* RealAttrInfo. <P>
* Expand capability of input function to: <P>
* 1) allow . in name if not followed by a space <P>
* 2) allow , : | and \ in names if preceded by a backslash <P>
* (this would mimic Quinlan) <P>
* Use lex to do the lexical analysis of the input file. This will be critical
* if the syntax becomes more complicated. <P>
* Ideally impute_unknown_values would handle both nominal and real values in
* a single pass. It should accept an array of operators allowing each
* attribute to handle unknowns in a different way. Obvious operators would be:
* unique_value, mode, mean... <P>
*
* @author James Louis 12/08/2000 Ported to Java.
* @author Alex Kozlov 8/22/96 Added transpose() function.
* @author Dan Sommerfield 2/22/96 Combined BagSet/InstList/CtrInstList
* into one class.
* @author Robert Allen 1/27/95 Modify project() after spreading work to
* Instance & Schema.
* @author Richard Long 7/29/93 Initial revision (.h, .c)
*/
public class InstanceList implements Cloneable{
/** The maximum number of attributes allowable in an Instance.**/
private static int maxAttrVals;
/** The maximum number of labels allowed for an Instance.**/
private static int maxLabelVals;
/** Indicator of whether the Instances in this InstanceList are weighted.**/
private boolean weighted;
/** The total weight of all Instances in this InstanceList.**/
private double totalWeight;
/** Counts of each classification label found in the Instances stored in
* this object. **/
private BagCounters bagCounters;
/** Schema for the data stored in the file from which data in this object
* is produced. **/
private FileSchema fileSchema;
/** Schema of attriubtes that will actually be used in computation. **/
private Schema schema;
/** Indicator for removing all Instances for which there are unknown values
* on attributes.**/
private static boolean removeUnknownInstances;
/** Indicator that Instances that have no weight should be removed from the
* InstanceList.**/
private static boolean removeZeroWeights;
/** The rate at which Instances should have attribute values replaced
* with unknown values.**/
private static double corruptToUnknownRate;
/** The seed for random placement of unknown values.**/
private static int unknownSeed;
/** The random number generator used for the placement of unknown values.**/
private static Random mrandomForUnknowns;
/** Indicator that this InstanceList has been initialized with Instances**/
private static boolean initialized;
/** TRUE if the MineSet program is being used.**/
private static boolean mineset = false;
/** The maximum number of warnings that will be logged for unknown labels.**/
private static int MAX_UNKNOWN_LABEL_WARNING = 10;
/** The maximum number of warnings that will be logged for Instances with
* negative weight values.**/
private static int MAX_NEG_WEIGHT_WARNINGS = 10;
/** The list of Instances.**/
private LinkedList instances; //list of Instance references
/** LogOptions object containing information for logging purposes.
*/
public static LogOptions logOptions = new LogOptions();
/** Constructor.
* @param file The root name of the file to be loaded into the InstanceList.
*/
public InstanceList(String file) {
instances = new LinkedList();
weighted = false;
totalWeight = 0;
bagCounters = null;
init_max_vals();
String namesFile = new String(file + Globals.DEFAULT_NAMES_EXT);
fileSchema = new FileSchema(namesFile);
schema = fileSchema.create_schema(); //SchemaRC->Schema
fileSchema.display();
String dataName = file + Globals.DEFAULT_DATA_EXT;
read_data(dataName,false);
}
/** Constructor. InstanceList(String, String, String) takes complexity of
* InstanceList.read_names() + complexity of InstanceList.read_data().
* @param file The root name of the file to be loaded into the InstanceList.
* @param namesExtension The file extension for the schema file.
* @param dataExtension The file extension for the data file.
*/
public InstanceList( String file,
String namesExtension,
String dataExtension) {
instances = new LinkedList();
weighted = false;
totalWeight = 0;
bagCounters = null;
init_max_vals();
String namesFile = new String(file + namesExtension);
fileSchema = new FileSchema(namesFile);
schema = fileSchema.create_schema();
String dataName = file + dataExtension;
read_data(dataName, false);
}
/** Constructor.
* @param catSchema The schema of categories for these data sets.
* @param file The root name of the file to be loaded into the InstanceList.
* @param namesExtension The file extension for the schema file.
* @param testExtension The file extension for the test file.
*/
public InstanceList(Schema catSchema,
String file,
String namesExtension,
String testExtension) {
instances = new LinkedList();
totalWeight = 0;
try{
schema = new Schema(catSchema);
}catch(CloneNotSupportedException e){
Error.err("InstanceList:constructor(Schema)):clone not"
+" supported exception caught");}
fileSchema = null;
weighted = false;
bagCounters = null;
init_max_vals();
String namesFile = file + namesExtension;
fileSchema = new FileSchema(namesFile);
read_data(file + testExtension, true);
}
/** Constructor.
* @param catSchema The schema of categories for these data sets.
*/
public InstanceList(Schema catSchema) {
instances = new LinkedList();
totalWeight = 0;
try{
schema = new Schema(catSchema);
}catch(CloneNotSupportedException e){
Error.err("InstanceList:constructor(Schema)):clone not"
+" supported exception caught");}
fileSchema = null;
weighted = false;
bagCounters = null;
init_max_vals();
}
/** Constructor.
* @param catSchema The schema of categories for these data sets.
* @param names The schema of attributes for these data sets.
* @param testName The file name for the test file.
*/
public InstanceList(Schema catSchema,
FileSchema names,
String testName) {
instances = new LinkedList();
weighted = false;
totalWeight = 0;
bagCounters = null;
init_max_vals();
fileSchema = new FileSchema(names);
try{
schema = new Schema(catSchema);
}catch(CloneNotSupportedException e){
Error.err("InstanceList:copyConstructor:clone not"
+" supported exception caught");}
read_data(testName, true);
}
/** Constructor.
* @param source The InstanceList that is being copied.
*/
public InstanceList(InstanceList source) {
boolean preserveCounters = false;
instances = new LinkedList();
totalWeight = 0;
try{
schema = new Schema(source.schema);
}catch(CloneNotSupportedException e){
Error.err("InstanceList:copyConstructor:clone not"
+" supported exception caught");}
fileSchema = null;
weighted = source.weighted;
bagCounters = null;
//weight is accumulated into totalWeight as instances are added.
init_max_vals();
ListIterator pix = source.instance_list().listIterator();
Instance inst = null;
while(pix.hasNext()) {
inst = (Instance)pix.next();
add_instance(inst);
}
//If we have a fileSchema, copy it.
if(source.fileSchema != null)
fileSchema = new FileSchema(source.fileSchema);
//if we have counters and we want to preserve them, the copy.
if(source.has_counters() && preserveCounters)
bagCounters = new BagCounters(source.counters() );
//DBG(OK());
}
/** Copy constructor.
* @param source The InstanceList object to be copied.
* @param preserveCounters TRUE if counters of values should be copied, FALSE otherwise.
*/
public InstanceList(InstanceList source, boolean preserveCounters) {
instances = new LinkedList();
totalWeight = 0 ; // will get set when instances are added
try{
schema = new Schema(source.schema);
}catch(CloneNotSupportedException e){
Error.err("InstanceList:copyConstructor:clone not"
+" supported exception caught");}
fileSchema = null;
weighted = source.weighted;
bagCounters = null;
// weight is accumulated into totalWeight as instances are
// added.
init_max_vals();
for (ListIterator pix = source.instance_list().listIterator();
pix.hasNext();)
add_instance((Instance)pix.next());
// If we have a fileSchema, copy it.
if(source.fileSchema != null)
fileSchema = new FileSchema(source.fileSchema);
// If we have counters and we want to preserve them, then copy.
if(source.has_counters() && preserveCounters)
bagCounters = new BagCounters(source.counters());
// if(Globals.DBG(OK();)
}
/** Build an instance list which is designed to be a test list for some
* other training set. The training set must have been built with a
* FileSchema which will now be used to interpret the test data.
* @param trainList The training InstanceList that will be used to identify Schema for test data set.
* @param testName The name of the file containing the test data set.
*/
public InstanceList(InstanceList trainList,String testName) {
instances = new LinkedList();
totalWeight = 0;
try{
schema = new Schema(trainList.get_schema());
}catch(CloneNotSupportedException e){e.printStackTrace();System.exit(1);}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -