?? relieffattributeeval.java
字號:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.attributeSelection;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import org.agentacademy.modules.dataminer.core.Attribute;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Option;
import org.agentacademy.modules.dataminer.core.OptionHandler;
import org.agentacademy.modules.dataminer.core.Utils;
import org.apache.log4j.Logger;
/**
* Class for Evaluating attributes individually using ReliefF. <p>
*
* For more information see: <p>
*
* Kira, K. and Rendell, L. A. (1992). A practical approach to feature
* selection. In D. Sleeman and P. Edwards, editors, <i>Proceedings of
* the International Conference on Machine Learning,</i> pages 249-256.
* Morgan Kaufmann. <p>
*
* Kononenko, I. (1994). Estimating attributes: analysis and extensions of
* Relief. In De Raedt, L. and Bergadano, F., editors, <i> Machine Learning:
* ECML-94, </i> pages 171-182. Springer Verlag. <p>
*
* Marko Robnik Sikonja, Igor Kononenko: An adaptation of Relief for attribute
* estimation on regression. In D.Fisher (ed.): <i> Machine Learning,
* Proceedings of 14th International Conference on Machine Learning ICML'97,
* </i> Nashville, TN, 1997. <p>
*
*
* Valid options are:
*
* -M <number of instances> <br>
* Specify the number of instances to sample when estimating attributes. <br>
* If not specified then all instances will be used. <p>
*
* -D <seed> <br>
* Seed for randomly sampling instances. <p>
*
* -K <number of neighbours> <br>
* Number of nearest neighbours to use for estimating attributes. <br>
* (Default is 10). <p>
*
* -W <br>
* Weight nearest neighbours by distance. <p>
*
* -A <sigma> <br>
* Specify sigma value (used in an exp function to control how quickly <br>
* weights decrease for more distant instances). Use in conjunction with <br>
* -W. Sensible values = 1/5 to 1/10 the number of nearest neighbours. <br>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision: 1.3 $
*/
public class ReliefFAttributeEval
extends AttributeEvaluator
implements OptionHandler
{
public static Logger log = Logger.getLogger(ReliefFAttributeEval.class);
/** The training instances */
private Instances m_trainInstances;
/** The class index */
private int m_classIndex;
/** The number of attributes */
private int m_numAttribs;
/** The number of instances */
private int m_numInstances;
/** Numeric class */
private boolean m_numericClass;
/** The number of classes if class is nominal */
private int m_numClasses;
/**
* Used to hold the probability of a different class val given nearest
* instances (numeric class)
*/
private double m_ndc;
/**
* Used to hold the prob of different value of an attribute given
* nearest instances (numeric class case)
*/
private double[] m_nda;
/**
* Used to hold the prob of a different class val and different att
* val given nearest instances (numeric class case)
*/
private double[] m_ndcda;
/** Holds the weights that relief assigns to attributes */
private double[] m_weights;
/** Prior class probabilities (discrete class case) */
private double[] m_classProbs;
/**
* The number of instances to sample when estimating attributes
* default == -1, use all instances
*/
private int m_sampleM;
/** The number of nearest hits/misses */
private int m_Knn;
/** k nearest scores + instance indexes for n classes */
private double[][][] m_karray;
/** Upper bound for numeric attributes */
private double[] m_maxArray;
/** Lower bound for numeric attributes */
private double[] m_minArray;
/** Keep track of the farthest instance for each class */
private double[] m_worst;
/** Index in the m_karray of the farthest instance for each class */
private int[] m_index;
/** Number of nearest neighbours stored of each class */
private int[] m_stored;
/** Random number seed used for sampling instances */
private int m_seed;
/**
* used to (optionally) weight nearest neighbours by their distance
* from the instance in question. Each entry holds
* exp(-((rank(r_i, i_j)/sigma)^2)) where rank(r_i,i_j) is the rank of
* instance i_j in a sequence of instances ordered by the distance
* from r_i. sigma is a user defined parameter, default=20
**/
private double[] m_weightsByRank;
private int m_sigma;
/** Weight by distance rather than equal weights */
private boolean m_weightByDistance;
/**
* Returns a string describing this attribute evaluator
* @return a description of the evaluator suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "ReliefFAttributeEval :\n\nEvaluates the worth of an attribute by "
+"repeatedly sampling an instance and considering the value of the "
+"given attribute for the nearest instance of the same and different "
+"class. Can operate on both discrete and continuous class data.\n";
}
/**
* Constructor
*/
public ReliefFAttributeEval () {
resetOptions();
}
/**
* Returns an enumeration describing the available options.
* @return an enumeration of all the available options.
**/
public Enumeration listOptions () {
Vector newVector = new Vector(4);
newVector
.addElement(new Option("\tSpecify the number of instances to\n"
+ "\tsample when estimating attributes.\n"
+ "\tIf not specified, then all instances\n"
+ "\twill be used.", "M", 1
, "-M <num instances>"));
newVector.
addElement(new Option("\tSeed for randomly sampling instances.\n"
+ "\t(Default = 1)", "D", 1
, "-D <seed>"));
newVector.
addElement(new Option("\tNumber of nearest neighbours (k) used\n"
+ "\tto estimate attribute relevances\n"
+ "\t(Default = 10).", "K", 1
, "-K <number of neighbours>"));
newVector.
addElement(new Option("\tWeight nearest neighbours by distance\n", "W"
, 0, "-W"));
newVector.
addElement(new Option("\tSpecify sigma value (used in an exp\n"
+ "\tfunction to control how quickly\n"
+ "\tweights for more distant instances\n"
+ "\tdecrease. Use in conjunction with -W.\n"
+ "\tSensible value=1/5 to 1/10 of the\n"
+ "\tnumber of nearest neighbours.\n"
+ "\t(Default = 2)", "A", 1, "-A <num>"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* Valid options are: <p>
*
* -M <number of instances> <br>
* Specify the number of instances to sample when estimating attributes. <br>
* If not specified then all instances will be used. <p>
*
* -D <seed> <br>
* Seed for randomly sampling instances. <p>
*
* -K <number of neighbours> <br>
* Number of nearest neighbours to use for estimating attributes. <br>
* (Default is 10). <p>
*
* -W <br>
* Weight nearest neighbours by distance. <p>
*
* -A <sigma> <br>
* Specify sigma value (used in an exp function to control how quickly <br>
* weights decrease for more distant instances). Use in conjunction with <br>
* -W. Sensible values = 1/5 to 1/10 the number of nearest neighbours. <br>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception
{
String optionString;
resetOptions();
setWeightByDistance(Utils.getFlag('W', options));
optionString = Utils.getOption('M', options);
if (optionString.length() != 0) {
setSampleSize(Integer.parseInt(optionString));
}
optionString = Utils.getOption('D', options);
if (optionString.length() != 0) {
setSeed(Integer.parseInt(optionString));
}
optionString = Utils.getOption('K', options);
if (optionString.length() != 0) {
setNumNeighbours(Integer.parseInt(optionString));
}
optionString = Utils.getOption('A', options);
if (optionString.length() != 0) {
setWeightByDistance(true); // turn on weighting by distance
setSigma(Integer.parseInt(optionString));
}
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String sigmaTipText() {
return "Set influence of nearest neighbours. Used in an exp function to "
+"control how quickly weights decrease for more distant instances. "
+"Use in conjunction with weightByDistance. Sensible values = 1/5 to "
+"1/10 the number of nearest neighbours.";
}
/**
* Sets the sigma value.
*
* @param s the value of sigma (> 0)
* @exception Exception if s is not positive
*/
public void setSigma (int s)
throws Exception
{
if (s <= 0) {
throw new Exception("value of sigma must be > 0!");
}
m_sigma = s;
}
/**
* Get the value of sigma.
*
* @return the sigma value.
*/
public int getSigma () {
return m_sigma;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numNeighboursTipText() {
return "Number of nearest neighbours for attribute estimation.";
}
/**
* Set the number of nearest neighbours
*
* @param n the number of nearest neighbours.
*/
public void setNumNeighbours (int n) {
m_Knn = n;
}
/**
* Get the number of nearest neighbours
*
* @return the number of nearest neighbours
*/
public int getNumNeighbours () {
return m_Knn;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "Random seed for sampling instances.";
}
/**
* Set the random number seed for randomly sampling instances.
*
* @param s the random number seed.
*/
public void setSeed (int s) {
m_seed = s;
}
/**
* Get the seed used for randomly sampling instances.
*
* @return the random number seed.
*/
public int getSeed () {
return m_seed;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String sampleSizeTipText() {
return "Number of instances to sample. Default (-1) indicates that all "
+"instances will be used for attribute estimation.";
}
/**
* Set the number of instances to sample for attribute estimation
*
* @param s the number of instances to sample.
*/
public void setSampleSize (int s) {
m_sampleM = s;
}
/**
* Get the number of instances used for estimating attributes
*
* @return the number of instances.
*/
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -