?? lmt.java
字號:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * LMT.java * Copyright (C) 2003 Niels Landwehr * */package weka.classifiers.trees;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.trees.j48.C45ModelSelection;import weka.classifiers.trees.j48.ModelSelection;import weka.classifiers.trees.lmt.LMTNode;import weka.classifiers.trees.lmt.ResidualModelSelection;import weka.core.AdditionalMeasureProducer;import weka.core.Capabilities;import weka.core.Drawable;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.supervised.attribute.NominalToBinary;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Classifier for building 'logistic model trees', which are classification trees with logistic regression functions at the leaves. The algorithm can deal with binary and multi-class target variables, numeric and nominal attributes and missing values.<br/> * <br/> * For more information see: <br/> * <br/> * Niels Landwehr, Mark Hall, Eibe Frank (2005). Logistic Model Trees.<br/> * <br/> * Marc Sumner, Eibe Frank, Mark Hall: Speeding up Logistic Model Tree Induction. In: 9th European Conference on Principles and Practice of Knowledge Discovery in Databases, 675-683, 2005. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @article{Landwehr2005, * author = {Niels Landwehr and Mark Hall and Eibe Frank}, * booktitle = {Machine Learning}, * number = {1-2}, * pages = {161-205}, * title = {Logistic Model Trees}, * volume = {95}, * year = {2005} * } * * @inproceedings{Sumner2005, * author = {Marc Sumner and Eibe Frank and Mark Hall}, * booktitle = {9th European Conference on Principles and Practice of Knowledge Discovery in Databases}, * pages = {675-683}, * publisher = {Springer}, * title = {Speeding up Logistic Model Tree Induction}, * year = {2005} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -B * Binary splits (convert nominal attributes to binary ones)</pre> * * <pre> -R * Split on residuals instead of class values</pre> * * <pre> -C * Use cross-validation for boosting at all nodes (i.e., disable heuristic)</pre> * * <pre> -P * Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.</pre> * * <pre> -I <numIterations> * Set fixed number of iterations for LogitBoost (instead of using cross-validation)</pre> * * <pre> -M <numInstances> * Set minimum number of instances at which a node can be split (default 15)</pre> * * <pre> -W <beta> * Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.</pre> * * <pre> -A * The AIC is used to choose the best iteration.</pre> * <!-- options-end --> * * @author Niels Landwehr * @author Marc Sumner * @version $Revision: 1.8 $ */public class LMT extends Classifier implements OptionHandler, AdditionalMeasureProducer, Drawable, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = -1113212459618104943L; /** Filter to replace missing values*/ protected ReplaceMissingValues m_replaceMissing; /** Filter to replace nominal attributes*/ protected NominalToBinary m_nominalToBinary; /** root of the logistic model tree*/ protected LMTNode m_tree; /** use heuristic that determines the number of LogitBoost iterations only once in the beginning?*/ protected boolean m_fastRegression; /** convert nominal attributes to binary ?*/ protected boolean m_convertNominal; /** split on residuals?*/ protected boolean m_splitOnResiduals; /**use error on probabilties instead of misclassification for stopping criterion of LogitBoost?*/ protected boolean m_errorOnProbabilities; /**minimum number of instances at which a node is considered for splitting*/ protected int m_minNumInstances; /**if non-zero, use fixed number of iterations for LogitBoost*/ protected int m_numBoostingIterations; /**Threshold for trimming weights. Instances with a weight lower than this (as a percentage * of total weights) are not included in the regression fit. **/ protected double m_weightTrimBeta; /** If true, the AIC is used to choose the best LogitBoost iteration*/ private boolean m_useAIC = false; /** * Creates an instance of LMT with standard options */ public LMT() { m_fastRegression = true; m_numBoostingIterations = -1; m_minNumInstances = 15; m_weightTrimBeta = 0; m_useAIC = false; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.DATE_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.NOMINAL_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); return result; } /** * Builds the classifier. * * @param data the data to train with * @throws Exception if classifier can't be built successfully */ public void buildClassifier(Instances data) throws Exception{ // can classifier handle the data? getCapabilities().testWithFail(data); // remove instances with missing class Instances filteredData = new Instances(data); filteredData.deleteWithMissingClass(); //replace missing values m_replaceMissing = new ReplaceMissingValues(); m_replaceMissing.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_replaceMissing); //possibly convert nominal attributes globally if (m_convertNominal) { m_nominalToBinary = new NominalToBinary(); m_nominalToBinary.setInputFormat(filteredData); filteredData = Filter.useFilter(filteredData, m_nominalToBinary); } int minNumInstances = 2; //create ModelSelection object, either for splits on the residuals or for splits on the class value ModelSelection modSelection; if (m_splitOnResiduals) { modSelection = new ResidualModelSelection(minNumInstances); } else { modSelection = new C45ModelSelection(minNumInstances, filteredData); } //create tree root m_tree = new LMTNode(modSelection, m_numBoostingIterations, m_fastRegression, m_errorOnProbabilities, m_minNumInstances, m_weightTrimBeta, m_useAIC); //build tree m_tree.buildClassifier(filteredData); if (modSelection instanceof C45ModelSelection) ((C45ModelSelection)modSelection).cleanup(); } /** * Returns class probabilities for an instance. * * @param instance the instance to compute the distribution for * @return the class probabilities * @throws Exception if distribution can't be computed successfully */ public double [] distributionForInstance(Instance instance) throws Exception { //replace missing values m_replaceMissing.input(instance); instance = m_replaceMissing.output(); //possibly convert nominal attributes if (m_convertNominal) { m_nominalToBinary.input(instance); instance = m_nominalToBinary.output(); } return m_tree.distributionForInstance(instance); } /** * Classifies an instance. * * @param instance the instance to classify * @return the classification * @throws Exception if instance can't be classified successfully */ public double classifyInstance(Instance instance) throws Exception { double maxProb = -1; int maxIndex = 0; //classify by maximum probability double[] probs = distributionForInstance(instance); for (int j = 0; j < instance.numClasses(); j++) { if (Utils.gr(probs[j], maxProb)) { maxIndex = j; maxProb = probs[j]; } } return (double)maxIndex; } /** * Returns a description of the classifier. * * @return a string representation of the classifier */ public String toString() { if (m_tree!=null) { return "Logistic model tree \n------------------\n" + m_tree.toString(); } else { return "No tree build"; } } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(8); newVector.addElement(new Option("\tBinary splits (convert nominal attributes to binary ones)", "B", 0, "-B")); newVector.addElement(new Option("\tSplit on residuals instead of class values", "R", 0, "-R")); newVector.addElement(new Option("\tUse cross-validation for boosting at all nodes (i.e., disable heuristic)", "C", 0, "-C")); newVector.addElement(new Option("\tUse error on probabilities instead of misclassification error "+ "for stopping criterion of LogitBoost.", "P", 0, "-P")); newVector.addElement(new Option("\tSet fixed number of iterations for LogitBoost (instead of using "+ "cross-validation)", "I",1,"-I <numIterations>")); newVector.addElement(new Option("\tSet minimum number of instances at which a node can be split (default 15)", "M",1,"-M <numInstances>")); newVector.addElement(new Option("\tSet beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.", "W",1,"-W <beta>")); newVector.addElement(new Option("\tThe AIC is used to choose the best iteration.", "A", 0, "-A")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -B * Binary splits (convert nominal attributes to binary ones)</pre> * * <pre> -R * Split on residuals instead of class values</pre> * * <pre> -C * Use cross-validation for boosting at all nodes (i.e., disable heuristic)</pre> * * <pre> -P * Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.</pre> * * <pre> -I <numIterations> * Set fixed number of iterations for LogitBoost (instead of using cross-validation)</pre> * * <pre> -M <numInstances> * Set minimum number of instances at which a node can be split (default 15)</pre> * * <pre> -W <beta> * Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.</pre> * * <pre> -A * The AIC is used to choose the best iteration.</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setConvertNominal(Utils.getFlag('B', options)); setSplitOnResiduals(Utils.getFlag('R', options)); setFastRegression(!Utils.getFlag('C', options)); setErrorOnProbabilities(Utils.getFlag('P', options)); String optionString = Utils.getOption('I', options); if (optionString.length() != 0) { setNumBoostingIterations((new Integer(optionString)).intValue()); } optionString = Utils.getOption('M', options); if (optionString.length() != 0) { setMinNumInstances((new Integer(optionString)).intValue()); } optionString = Utils.getOption('W', options); if (optionString.length() != 0) { setWeightTrimBeta((new Double(optionString)).doubleValue()); } setUseAIC(Utils.getFlag('A', options)); Utils.checkForRemainingOptions(options); } /**
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -