?? cls.java
字號:
/**
*
* AgentAcademy - an open source Data Mining framework for
* training intelligent agents
*
* Copyright (C) 2001-2003 AA Consortium.
*
* This library is open source software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.0 of the License, or (at your option) any later
* version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
package org.agentacademy.modules.dataminer.classifiers;
/**
* <p>Title: The Data Miner prototype</p>
* <p>Description: A prototype for the DataMiner (DM), the Agent Academy (AA) module responsible for performing data mining on the contents of the Agent Use Repository (AUR). The extracted knowledge is to be sent back to the AUR in the form of a PMML document.</p>
* <p>Copyright: Copyright (c) 2002</p>
* <p>Company: CERTH</p>
* @author asymeon
* @version 0.3
*/
import java.util.Enumeration;
import java.util.Vector;
import org.agentacademy.modules.dataminer.classifiers.evaluation.DistributionClassifier;
import org.agentacademy.modules.dataminer.core.AdditionalMeasureProducer;
import org.agentacademy.modules.dataminer.core.Drawable;
import org.agentacademy.modules.dataminer.core.Instance;
import org.agentacademy.modules.dataminer.core.Instances;
import org.agentacademy.modules.dataminer.core.Matchable;
import org.agentacademy.modules.dataminer.core.Option;
import org.agentacademy.modules.dataminer.core.OptionHandler;
import org.agentacademy.modules.dataminer.core.Summarizable;
import org.agentacademy.modules.dataminer.core.Utils;
import org.agentacademy.modules.dataminer.core.WeightedInstancesHandler;
/**
* Class for generating an unpruned or a pruned CLS decision tree.
*
* Valid options are: <p>
*
* -U <br>
* Use unpruned tree.<p>
*
* -C confidence <br>
* Set confidence threshold for pruning. (Default: 0.25) <p>
*
* -M number <br>
* Set minimum number of instances per leaf. (Default: 2) <p>
*
* -B <br>
* Use binary splits for nominal attributes. <p>
*
* -S <br>
* Don't perform subtree raising. <p>
*
* -L <br>
* Do not clean up after the tree has been built. <p>
*
* -A <br>
* If set, Laplace smoothing is used for predicted probabilites. <p>
*
*/
public class CLS extends DistributionClassifier implements OptionHandler,
Drawable, Matchable, Sourcable, WeightedInstancesHandler, Summarizable,
AdditionalMeasureProducer {
// To maintain the same version number after adding m_ClassAttribute
static final long serialVersionUID = -217733168393644444L;
/** The decision tree */
private ClassifierTree m_root;
/** Unpruned tree? */
private boolean m_unpruned = false;
/** Confidence level */
private float m_CF = 0.25f;
/** Minimum number of instances */
private int m_minNumObj = 2;
/** Determines whether probabilities are smoothed using
Laplace correction when predictions are generated */
private boolean m_useLaplace = false;
/** Use reduced error pruning? */
private boolean m_reducedErrorPruning = false;
/** Number of folds for reduced error pruning. */
private int m_numFolds = 3;
/** Binary splits on nominal attributes? */
private boolean m_binarySplits = false;
/** Subtree raising to be performed? */
private boolean m_subtreeRaising = true;
/** Cleanup after the tree has been built. */
boolean m_noCleanup = false;
/**
* Generates the classifier.
*
* @exception Exception if classifier can't be built successfully
*/
public void buildClassifier(Instances instances)
throws Exception{
ModelSelection modSelection;
if (m_binarySplits)
modSelection = new BinC45ModelSelection(m_minNumObj, instances);
else
modSelection = new C45ModelSelection(m_minNumObj, instances);
if (!m_reducedErrorPruning)
m_root = new C45PruneableClassifierTree(modSelection, !m_unpruned, m_CF,
m_subtreeRaising, !m_noCleanup);
else
m_root = new PruneableClassifierTree0(modSelection, !m_unpruned, m_numFolds,
!m_noCleanup);
m_root.buildClassifier(instances);
if (m_binarySplits) {
((BinC45ModelSelection)modSelection).cleanup();
} else {
((C45ModelSelection)modSelection).cleanup();
}
}
/**
* Classifies an instance.
*
* @exception Exception if instance can't be classified successfully
*/
public double classifyInstance(Instance instance) throws Exception {
return m_root.classifyInstance(instance);
}
/**
* Returns class probabilities for an instance.
*
* @exception Exception if distribution can't be computed successfully
*/
public final double [] distributionForInstance(Instance instance)
throws Exception {
return m_root.distributionForInstance(instance, m_useLaplace);
}
/**
* Returns graph describing the tree.
*
* @exception Exception if graph can't be computed
*/
public String graph() throws Exception {
return m_root.graph();
}
/**
* Returns tree in prefix order.
*
* @exception Exception if something goes wrong
*/
public String prefix() throws Exception {
return m_root.prefix();
}
/**
* Returns tree as an if-then statement.
*
* @return the tree as a Java if-then type statement
* @exception Exception if something goes wrong
*/
public String toSource(String className) throws Exception {
StringBuffer [] source = m_root.toSource(className);
return
"class " + className + " {\n\n"
+" public static double classify(Object [] i)\n"
+" throws Exception {\n\n"
+" double p = Double.NaN;\n"
+ source[0] // Assignment code
+" return p;\n"
+" }\n"
+ source[1] // Support code
+"}\n";
}
/**
* Returns an enumeration describing the available options.
*
* Valid options are: <p>
*
* -U <br>
* Use unpruned tree.<p>
*
* -C confidence <br>
* Set confidence threshold for pruning. (Default: 0.25) <p>
*
* -M number <br>
* Set minimum number of instances per leaf. (Default: 2) <p>
*
* -B <br>
* Use binary splits for nominal attributes. <p>
*
* -S <br>
* Don't perform subtree raising. <p>
*
* -L <br>
* Do not clean up after the tree has been built.
*
* -A <br>
* If set, Laplace smoothing is used for predicted probabilites. <p>
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(9);
newVector.
addElement(new Option("\tUse unpruned tree.",
"U", 0, "-U"));
newVector.
addElement(new Option("\tSet confidence threshold for pruning.\n" +
"\t(default 0.25)",
"C", 1, "-C <pruning confidence>"));
newVector.
addElement(new Option("\tSet minimum number of instances per leaf.\n" +
"\t(default 2)",
"M", 1, "-M <minimum number of instances>"));
newVector.
addElement(new Option("\tUse binary splits only.",
"B", 0, "-B"));
newVector.
addElement(new Option("\tDon't perform subtree raising.",
"S", 0, "-S"));
newVector.
addElement(new Option("\tDo not clean up after the tree has been built.",
"L", 0, "-L"));
newVector.
addElement(new Option("\tLaplace smoothing for predicted probabilities.",
"A", 0, "-A"));
return newVector.elements();
}
/**
* Parses a given list of options.
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception{
// Other options
String minNumString = Utils.getOption('M', options);
if (minNumString.length() != 0) {
m_minNumObj = Integer.parseInt(minNumString);
} else {
m_minNumObj = 2;
}
m_binarySplits = Utils.getFlag('B', options);
m_useLaplace = Utils.getFlag('A', options);
// Pruning options
m_unpruned = Utils.getFlag('U', options);
m_subtreeRaising = !Utils.getFlag('S', options);
m_noCleanup = Utils.getFlag('L', options);
if ((m_unpruned) && (!m_subtreeRaising)) {
throw new Exception("Subtree raising doesn't need to be unset for unpruned tree!");
}
m_reducedErrorPruning = Utils.getFlag('R', options);
if ((m_unpruned) && (m_reducedErrorPruning)) {
throw new Exception("Unpruned tree and reduced error pruning can't be selected " +
"simultaneously!");
}
String confidenceString = Utils.getOption('C', options);
if (confidenceString.length() != 0) {
if (m_reducedErrorPruning) {
throw new Exception("Setting the confidence doesn't make sense " +
"for reduced error pruning.");
} else if (m_unpruned) {
throw new Exception("Doesn't make sense to change confidence for unpruned "
+"tree!");
} else {
m_CF = (new Float(confidenceString)).floatValue();
if ((m_CF <= 0) || (m_CF >= 1)) {
throw new Exception("Confidence has to be greater than zero and smaller " +
"than one!");
}
}
} else {
m_CF = 0.25f;
}
String numFoldsString = Utils.getOption('N', options);
if (numFoldsString.length() != 0) {
if (!m_reducedErrorPruning) {
throw new Exception("Setting the number of folds" +
" doesn't make sense if" +
" reduced error pruning is not selected.");
} else {
m_numFolds = Integer.parseInt(numFoldsString);
}
} else {
m_numFolds = 3;
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -