?? instancelist.java
字號:
fileSchema = new FileSchema(trainList.get_original_schema());
weighted = false;
bagCounters = null;
init_max_vals();
read_data(testName, true);
}
/** Checks if this InstanceList has a set of bagcounters yet.
* @return False if BagCounters is set to null, True otherwise.
*/
public boolean has_counters() {
return bagCounters != null;
}
/** Creates and fills bagCounters.
* @return The BagCounters object created.
*/
public BagCounters counters() {
ensure_counters();
return bagCounters;
}
/** Fills bagCounters by adding all instances into it.
*/
public void ensure_counters() {
if(bagCounters == null) {
//Construct counters by adding each instance in turn
bagCounters = new BagCounters(get_schema());
if(!no_instances()) {
// ListIterator pix = instances.listIterator();
Instance inst = null;
// for(;pix.hasNext();inst = (Instance)pix.next())
for(ListIterator pix = instances.listIterator();
pix.hasNext();){
inst =(Instance)pix.next();
bagCounters.add_instance(inst);
}
}
}
}
/** Reads the data from the supplied file. InstanceList.read_data() takes
* time proportional to the number of instances * the complexity of
* read_data_line() + complexity of free_instances().
* @param file The name of the file containing the data set.
* @param isTest Indicator of whether this is a test data set. True
* indicates this is a test data set, False otherwise.
*/
public void read_data(String file, boolean isTest) {
GetEnv getenv = new GetEnv();
removeUnknownInstances = getenv.get_option_bool("REMOVE_UNKOWN_INST");
corruptToUnknownRate = getenv.get_option_real_range("CORRUPT_UNKOWN_RATE");
remove_all_instances();
if(bagCounters!=null)
bagCounters = null;
try{
BufferedReader dataFile = new BufferedReader(new FileReader(file));
/*SECTION ADDED BY JL*/
StreamTokenizer dataStream = new StreamTokenizer(dataFile);
dataStream.eolIsSignificant(true);
dataStream.commentChar((int)'|');
dataStream.ordinaryChar((int)'?');
dataStream.ordinaryChar((int)',');
dataStream.ordinaryChar((int)'.');
dataStream.wordChars((int)'_',(int)'_');
dataStream.wordChars((int)' ',(int)' ');
// dataStream.parseNumbers();
if(fileSchema.attrInfos[0] instanceof RealAttrInfo)
{parseNumbers(dataStream,true);}
else {parseNumbers(dataStream,false);}
/*END OF SECTION ADDED BY JL*/
InstanceList thisList = this;
InstanceReader reader = new InstanceReader(thisList, maxAttrVals, isTest);
fileSchema.skip_white_comments_same_line(dataFile);
try{
/*SECTION ADDED BY JL*/
while(dataStream.nextToken() != StreamTokenizer.TT_EOF){
if(dataStream.ttype != StreamTokenizer.TT_EOL){
read_data_line(dataStream, isTest, reader);
if(num_instances() % 100 == 0)
; //GLOBLOG(1,'.',flush);
}
}
/*END OF SECTION ADDED BY JL*/
/*REPLACES THIS SECTION
while(dataFile.ready()){
read_data_line(dataFile, isTest, reader);
if(num_instances() % 100 == 0)
; //GLOBLOG(1,'.',flush);
}
/*END OF SECTION REPLACED*/
//done reading; release the list
reader.release_list();
if(!removeUnknownInstances)
;//GLOBLOG(1," done.");
else{
int num = num_instances();
//GLOBLOG(1,' '); //show we finished reading
remove_inst_with_unknown_attr();
int newNum = num_instances();
if(newNum < num)
;//GLOBLOG(1,"Removed " + num-newNum +" instances.");
else
;//GLOBLOG(1,"done.");
}
if(no_instances())
System.out.println("InstanceList.read_data WARNING: no"
+ " instances in file");
unknownSeed = -1;
mrandomForUnknowns = null;
if(corruptToUnknownRate > 0){
if(unknownSeed == -1) { //get seed first time
unknownSeed = getenv.get_option_int("UNKOWN_RATE_SEED");
mrandomForUnknowns = new Random(unknownSeed);
}
corrupt_values_to_unknown(corruptToUnknownRate, mrandomForUnknowns);
}
//remove any nominals which have no values other than unknowns here
try{
remove_unknown_attributes(); //causes problems!
}catch(CloneNotSupportedException e){
Error.err("Clone not supported exception caught");}
//apply the loss matrix (from the FileSchema) now
fileSchema.apply_loss_spec(schema);
//some comments about next two lines
Schema newSchema = schema; //SchemaRC -> Schema
try{
set_schema(newSchema);
}catch(CloneNotSupportedException e){
Error.err("Clone not supported exception caught");}
}catch(IOException e){Error.err("InstanceList.read_data"
+" ERROR");}
}catch(FileNotFoundException e){Error.err("-"
+" Data file NOT found");}
}
/** Removes all instances that have unknown attributes from the data set.
*/
//change for C45
// private void remove_inst_with_unknown_attr()
public void remove_inst_with_unknown_attr() {
ListIterator pix = instances.listIterator(0);
while(pix.hasNext()) {
boolean hasUnknownAttr = false;
Instance instance = (Instance)pix.next();
for(int attrNum=0;attrNum<num_attr() && !hasUnknownAttr;attrNum++) {
AttrInfo attrInfo = attr_info(attrNum);
AttrValue attrValue = instance.get_value(attrNum);
if(attrInfo.is_unknown(attrValue))
hasUnknownAttr = true;
}
if(hasUnknownAttr)
remove_instance(pix,instance); //removes from list last element seen by next()
}
}
/** Removes the specified Instance from the ListIterator of Instances
* supplied.
* @param pix The ListIterator containing the Instance.
* @param instance The Instance to be removed.
*/
public void remove_instance(ListIterator pix,Instance instance) {
if(instance==null)
Error.err("InstanceList.remove_instance: tried "
+"to dereference a null instance -->fatal_error");
pix.remove();//instance_list().del(instance);
//Remove from counters if we have them
if(bagCounters!=null)
bagCounters.del_instance(instance);
//Update totalWeight cache
totalWeight = instance.get_weight() -1 ;
}
/** Removes all Instance objects stored in this InstanceList object.
*/
public void remove_all_instances() {
//drop_counters();
MLJ.ASSERT(instances != null,"InstanceList.remove_all_instances: instance is null");
while(!no_instances())
instances.removeFirst();
totalWeight = 0;
}
/** Returns the number of instances in the InstanceList.
* InstanceList.num_instances() takes time proportional to the number of
* instances in the List.
* @return An integer value of the number of Instances contained in this list.
*/
public int num_instances() {
return instances.size();
}
/** Returns the number of categories that the instances in the List can have.
* Only works if the Label is of a nominal attribute.
* @return An integer value of the number of categories.
*/
public int num_categories() {
return nominal_label_info().num_values();
}
/** Returns the nominal label information contained in this InstanceList's
* schema.
* @return The information on the nominal labels contained in the schema.
*/
public NominalAttrInfo nominal_label_info() {
return label_info().cast_to_nominal();
}
/** Returns the label information contained in this InstanceList's
* schema.
* @return The information on the labels contained in the schema.
*/
public AttrInfo label_info() {
return get_schema().label_info();
}
/** Checks if this InstanceList contains Instances.
* @return Returns True if there are no Instances in this InstanceList, False
* otherwise.
*/
public boolean no_instances() {
return instances.size() == 0;
}
/** This function projects out any attributes which have only unknown values.
*
* @throws CloneNotSupportedException If InstanceList.project_in_place encounters an exception during cloning of the
* Schema.
*/
private void remove_unknown_attributes() throws CloneNotSupportedException {
boolean[] attrMask = new boolean[num_attr()];
for(int i=0;i<attrMask.length;i++)attrMask[i] = true;
for(int i=0;i<num_attr();i++)
if(schema.attr_info(i).can_cast_to_nominal() &&
schema.nominal_attr_info(i).num_values() == 0)
attrMask[i] = false;
project_in_place(attrMask);
}
/** Returns the list of Instances stored in this InstanceList.
* @return A LinkedList containing the Instances sotred in this InstanceList.
*/
public LinkedList instance_list() {
return instances;
}
/** This function is very similar to project(), except that the list is
* projected "in place"--attributes are removed directly from the list
* and the schema is updated.
* @param projMask An array of boolean values representing which attributes shall be use in this
* InstanceList object. Values of projMask are related by order to the atributes.
* Values of TRUE indicate that attribute will be used, FALSE indicates the
* attribute will not be used.
* @throws CloneNotSupportedException if the cloning process in Schema encounters an exception.
*/
public void project_in_place(boolean[] projMask) throws CloneNotSupportedException {
MLJ.ASSERT(schema != null,"InstanceList.project_in_place: schema is null");
Schema newSchema = new Schema(schema.project(projMask));
//Project all instances in the list "in place" --we cheat a bit
// here because we have instances in the list with different
// schemas. However, we clean everything up at the end and check
// the schemas carefully.
int numInstBefore = num_instances();
//ListIterator temp = instances.listIterator(0);
int index = 0;
for(int i=0;i<numInstBefore;i++) {
//Work ona temporary pix; otherwise we'll remove an instance
// before advancing the pix which is bad.
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -