?? instancereader.java
字號:
package shared;
import java.io.*;
/** Provide a set of functions for reading a list of instances from a source which
* provides a single instance at a time, attribute by attribute. Supports the
* exclusion of nominal attributes which have more than a set limit on the number
* of values.
* @author James Louis Java Implmentation.
* @author Dan Sommerfield 5/03/96 Initial revision (.h, .c)
*
*/
public class InstanceReader {
/** The InstanceList in which are stored Instances that are read.
*/
private InstanceList instList;
/** TRUE if unknown values for attributes are possible, FALSE otherwise.
*/
private boolean makeUnknowns;
/** TRUE if unknown labels are possible, FALSE otherwise.
*/
private boolean allowUnknownLabels;
/** The FileSchema detailing the data being read by this InstanceReader.
*/
private FileSchema fileSchema;
/** Values possible for the attributes.
*/
private AttrValue[] vals;
private boolean[] setAttr;
private boolean anySet;
private int attrValueLimit;
/** The total weight of Instances.
*/
private double weight;
private boolean warnOnSetComplete;
private int[] assimMap;
private boolean[] projMap;
/**
*/
private boolean[] listProjMap;
//private QuarkTable[] quarkTables;
/** Special value for mapping operations. Any integer value is valid.
*/
public static final int unmapped = -1;
/** Special value for mapping operations. Any integer value is valid.
*/
public static final int mapToLabel = -2;
/** Constructor. Builds an InstanceReader which can be used to construct instances
* for ownerList. OwnerList MUST have a FileSchema associated with it; this
* defines the form of all incoming data. The data will be ASSIMILATED to the form
* of ownerList's schema as it is read. <BR>
* The limit parameter specifies an optional limit on the number of distinct
* attribute values which are allowed on any given attribute. If this limit is
* exceeded, the attribute in question will be projected out, and future incoming
* data for that attribute will be ignored. <BR>
* The makeUnknown parameter, if TRUE, will cause all attribute values not present
* in ownerList's schema to be converted to UNKNOWN. <BR>
* NOTE: for reading test data, limit should be set to 0 and makeUnknown should be
* TRUE.
* @param ownerList The InstaceList in which Instances will be stored.
*/
public InstanceReader(InstanceList ownerList){ //ADDED BY JL
this(ownerList,0,false,false);}
/** Constructor. Builds an InstanceReader which can be used to construct instances
* for ownerList. OwnerList MUST have a FileSchema associated with it; this
* defines the form of all incoming data. The data will be ASSIMILATED to the form
* of ownerList's schema as it is read. <BR>
* The limit parameter specifies an optional limit on the number of distinct
* attribute values which are allowed on any given attribute. If this limit is
* exceeded, the attribute in question will be projected out, and future incoming
* data for that attribute will be ignored. <BR>
* The makeUnknown parameter, if TRUE, will cause all attribute values not present
* in ownerList's schema to be converted to UNKNOWN. <BR>
* NOTE: for reading test data, limit should be set to 0 and makeUnknown should be
* TRUE.
* @param ownerList The InstaceList in which Instances will be stored.
* @param limit The limit number of how many attribute values are possible.
*/
public InstanceReader(InstanceList ownerList, int limit){ //ADDED BY JL
this(ownerList,limit,false,false);}
/** Constructor. Builds an InstanceReader which can be used to construct instances
* for ownerList. OwnerList MUST have a FileSchema associated with it; this
* defines the form of all incoming data. The data will be ASSIMILATED to the form
* of ownerList's schema as it is read. <BR>
* The limit parameter specifies an optional limit on the number of distinct
* attribute values which are allowed on any given attribute. If this limit is
* exceeded, the attribute in question will be projected out, and future incoming
* data for that attribute will be ignored. <BR>
* The makeUnknown parameter, if TRUE, will cause all attribute values not present
* in ownerList's schema to be converted to UNKNOWN. <BR>
* NOTE: for reading test data, limit should be set to 0 and makeUnknown should be
* TRUE.
* @param ownerList The InstaceList in which Instances will be stored.
* @param limit The limit number of how many attribute values are possible.
* @param makeUnknown TRUE if unknown values for attributes are possible, FALSE otherwise.
*/
public InstanceReader(InstanceList ownerList, int limit, boolean makeUnknown){ //ADDED BY JL
this(ownerList,limit,makeUnknown,false);}
/** Constructor. Builds an InstanceReader which can be used to construct instances
* for ownerList. OwnerList MUST have a FileSchema associated with it; this
* defines the form of all incoming data. The data will be ASSIMILATED to the form
* of ownerList's schema as it is read. <BR>
* The limit parameter specifies an optional limit on the number of distinct
* attribute values which are allowed on any given attribute. If this limit is
* exceeded, the attribute in question will be projected out, and future incoming
* data for that attribute will be ignored. <BR>
* The makeUnknown parameter, if TRUE, will cause all attribute values not present
* in ownerList's schema to be converted to UNKNOWN. <BR>
* NOTE: for reading test data, limit should be set to 0 and makeUnknown should be
* TRUE.
* @param ownerList The InstaceList in which Instances will be stored.
* @param limit The limit number of how many attribute values are possible.
* @param makeUnknown TRUE if unknown values for attributes are possible, FALSE otherwise.
* @param allowUnknownLab TRUE if unknown labels are possible, FALSE otherwise.
*/
public InstanceReader(InstanceList ownerList, int limit, boolean makeUnknown, boolean allowUnknownLab) {
instList = ownerList;
makeUnknowns = makeUnknown;
allowUnknownLabels = allowUnknownLab;
setAttr = new boolean[ownerList.get_original_schema().num_attr()];
for(int i=0;i<setAttr.length;i++)setAttr[i]=false;
anySet = false;
attrValueLimit = limit;
weight = 1.0;
fileSchema = ownerList.get_original_schema();
vals = new AttrValue[ownerList.get_original_schema().num_attr()];
for(int j=0;j<vals.length;j++)vals[j]=new AttrValue();///ADDED BY JL
assimMap = new int[ownerList.get_original_schema().num_attr()];
for(int i=0;i<assimMap.length;i++)assimMap[i]=-1;
projMap = new boolean[ownerList.get_original_schema().num_attr()];
listProjMap = new boolean[ownerList.get_original_schema().num_attr()];
//quarkTables(0,ownerList.get_original_schema().num_attr(),null)
warnOnSetComplete = true;
//fileSchema.OK();
if(attrValueLimit < 0)
Error.err("InstanceReader::InstanceReader: negative"
+ " value is not allowed for attrValueLimit->fatal_error");
//construct the assimilation map to be used during set functions
construct_assim_map();
//take ownership of the list we're building
ownerList = null;
//OK();
}
/** Attempts to match values for two fixed value set nominals. Prints an error
* message on failure.
* @param name The name of the attribute.
* @param a1 The first nominal being compared.
* @param a2 The second nominal being compared.
*/
public void match_values(String name, NominalAttrInfo a1, NominalAttrInfo a2) {
boolean error = false;
//ASSERT(a1.is_fixed());
//ASSERT(a2.is_fixed());
if(a1.num_values() != a2.num_values())
error = true;
else{
// for(int i = 0;i<a1.num_values();i++) //CHANGED FOR ZOO TESTSET -JL
for(int i = Globals.FIRST_NOMINAL_VAL; i < a1.num_values();i++)
if(a1.get_value(i) != a2.get_value(i))
error = true;
}
if(error){
Error.err("InstanceReader::match_values: mismatch"
+" in fixed nominals for attribute \"" + name + "\": ");
Error.err("taining version: ");
a1.display_attr_values();
Error.err("testing version: ");
a2.display_attr_values();
Error.err(" -->fatal_error");
}
}
/** Constructs the assimilation map used to map attribute numbers used in the
* assimilation schema (set functions) into numbers used in the list's schema.
*
*/
private void construct_assim_map() {
//mark the label column as mapped to the label
if(fileSchema.get_label_column() != unmapped)
assimMap[fileSchema.get_label_column()] = mapToLabel;
//for each attribute name in the file schema (test data), find the
//same name in the list's schema(training data) and establish the mapping
//No attributes in the list's schema may be left unaccounted for.
int numDestAttr = get_schema().num_attr();
boolean[] checklist = new boolean[numDestAttr];
for(int i=0;i<checklist.length;i++)checklist[i]=false;
int checkCount = 0;
for(int i=0;i<fileSchema.num_attr();i++){
for(int j=0;j<numDestAttr;j++){
String name = fileSchema.attrInfos[i].name();
if(name.equals(get_schema().attr_name(j))) {
//make sure the column is not mapped to some other column.
//if it is mapped to the label or weight, then ignore it.
//ASSERT(assimMap[i] != false);
if(assimMap[i] == unmapped){
assimMap[i] = j;
checkCount++;
checklist[j] = true;
//assimilate attribute infos. Thre are some rules here:
// 1. if the types don't match, it is an error.
// 2. if both are fixed nominals, the exact values must match
// 3. if the list's schema is an unfixed nominal, use it
// 4. if the list's schema is a fixed nominl, but the file
// schema specifies an unfixed noinal, create an unfixed
// nominal with the values from the list's schema's
// fixed nominal.
AttrInfo testAI = fileSchema.attrInfos[i];
AttrInfo trainAI = get_schema().attr_info(j);
if(trainAI.can_cast_to_nominal()) {
//make sure the nominal types match
if(!testAI.can_cast_to_nominal())
Error.err("InstanceReader::constuct_"
+ "assim_map: training schema requires a nominal "
+ "for attribute \"" +name+"\" -->fatal_error");
// other nominal checks
NominalAttrInfo testNAI = testAI.cast_to_nominal();
NominalAttrInfo trainNAI = trainAI.cast_to_nominal();
//check fixed/unfixed status
if(trainNAI.is_fixed()) {
if(testNAI.is_fixed()) {
//by rule#2, the exact values must match
match_values(name, trainNAI, testNAI);
}
else{
//replace attribute info for test data with the
//training version, but make unfixed(rule #4)
fileSchema.set_attr_info(i,trainAI);
fileSchema.attrInfos[i].cast_to_nominal().fix_values(false);
}
}
else {
//just use the training version (rule #3)
fileSchema.set_attr_info(i,trainAI);
}
}
else if(trainAI.can_cast_to_real()) {
if(!testAI.can_cast_to_real())
Error.err("InstanceReader::construct_"
+"assim_map: training schema requires a numerical "
+"value for attribute \"" +name+"\" -->fatal_error");
}
else
Error.err("InstanceReader::construct_"
+"assim_map: training schema contains an attribute \""
+name+"\" which is neither real nor nominal-->fatal_error");
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -