?? subspacecluster.java
字號:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * SubspaceCluster.java * Copyright (C) 2001 University of Waikato, Hamilton, New Zealand * */package weka.datagenerators.clusterers;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.Range;import weka.core.Tag;import weka.core.Utils;import weka.datagenerators.ClusterDefinition;import weka.datagenerators.ClusterGenerator;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * A data generator that produces data points in hyperrectangular subspace clusters. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -h * Prints this help.</pre> * * <pre> -o <file> * The name of the output file, otherwise the generated data is * printed to stdout.</pre> * * <pre> -r <name> * The name of the relation.</pre> * * <pre> -d * Whether to print debug informations.</pre> * * <pre> -S * The seed for random function (default 1)</pre> * * <pre> -a <num> * The number of attributes (default 1).</pre> * * <pre> -c * Class Flag, if set, the cluster is listed in extra attribute.</pre> * * <pre> -b <range> * The indices for boolean attributes.</pre> * * <pre> -m <range> * The indices for nominal attributes.</pre> * * <pre> -P <num> * The noise rate in percent (default 0.0). * Can be between 0% and 30%. (Remark: The original * algorithm only allows noise up to 10%.)</pre> * * <pre> -C <cluster-definition> * A cluster definition of class 'SubspaceClusterDefinition' * (definition needs to be quoted to be recognized as * a single argument).</pre> * * <pre> * Options specific to weka.datagenerators.clusterers.SubspaceClusterDefinition: * </pre> * * <pre> -A <range> * Generates randomly distributed instances in the cluster.</pre> * * <pre> -U <range> * Generates uniformly distributed instances in the cluster.</pre> * * <pre> -G <range> * Generates gaussian distributed instances in the cluster.</pre> * * <pre> -D <num>,<num> * The attribute min/max (-A and -U) or mean/stddev (-G) for * the cluster.</pre> * * <pre> -N <num>..<num> * The range of number of instances per cluster (default 1..50).</pre> * * <pre> -I * Uses integer instead of continuous values (default continuous).</pre> * <!-- options-end --> * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.4 $ */public class SubspaceCluster extends ClusterGenerator { /** for serialization */ static final long serialVersionUID = -3454999858505621128L; /** noise rate in percent (option P, between 0 and 30)*/ protected double m_NoiseRate; /** cluster list */ protected ClusterDefinition[] m_Clusters; /** if nominal, store number of values */ protected int[] m_numValues; /** store global min values */ protected double[] m_globalMinValue; /** store global max values */ protected double[] m_globalMaxValue; /** cluster type: uniform/random */ public static final int UNIFORM_RANDOM = 0; /** cluster type: total uniform */ public static final int TOTAL_UNIFORM = 1; /** cluster type: gaussian */ public static final int GAUSSIAN = 2; /** the tags for the cluster types */ public static final Tag[] TAGS_CLUSTERTYPE = { new Tag(UNIFORM_RANDOM, "uniform/random"), new Tag(TOTAL_UNIFORM, "total uniform"), new Tag(GAUSSIAN, "gaussian") }; /** cluster subtype: continuous */ public static final int CONTINUOUS = 0; /** cluster subtype: integer */ public static final int INTEGER = 1; /** the tags for the cluster types */ public static final Tag[] TAGS_CLUSTERSUBTYPE = { new Tag(CONTINUOUS, "continuous"), new Tag(INTEGER, "integer") }; /** * initializes the generator, sets the number of clusters to 0, since user * has to specify them explicitly */ public SubspaceCluster() { super(); setNoiseRate(defaultNoiseRate()); } /** * Returns a string describing this data generator. * * @return a description of the data generator suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "A data generator that produces data points in " + "hyperrectangular subspace clusters."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options */ public Enumeration listOptions() { Vector result = enumToVector(super.listOptions()); result.addElement(new Option( "\tThe noise rate in percent (default " + defaultNoiseRate() + ").\n" + "\tCan be between 0% and 30%. (Remark: The original \n" + "\talgorithm only allows noise up to 10%.)", "P", 1, "-P <num>")); result.addElement(new Option( "\tA cluster definition of class '" + SubspaceClusterDefinition.class.getName().replaceAll(".*\\.", "") + "'\n" + "\t(definition needs to be quoted to be recognized as \n" + "\ta single argument).", "C", 1, "-C <cluster-definition>")); result.addElement(new Option( "", "", 0, "\nOptions specific to " + SubspaceClusterDefinition.class.getName() + ":")); result.addAll( enumToVector(new SubspaceClusterDefinition(this).listOptions())); return result.elements(); } /** * Parses a list of options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -h * Prints this help.</pre> * * <pre> -o <file> * The name of the output file, otherwise the generated data is * printed to stdout.</pre> * * <pre> -r <name> * The name of the relation.</pre> * * <pre> -d * Whether to print debug informations.</pre> * * <pre> -S * The seed for random function (default 1)</pre> * * <pre> -a <num> * The number of attributes (default 1).</pre> * * <pre> -c * Class Flag, if set, the cluster is listed in extra attribute.</pre> * * <pre> -b <range> * The indices for boolean attributes.</pre> * * <pre> -m <range> * The indices for nominal attributes.</pre> * * <pre> -P <num> * The noise rate in percent (default 0.0). * Can be between 0% and 30%. (Remark: The original * algorithm only allows noise up to 10%.)</pre> * * <pre> -C <cluster-definition> * A cluster definition of class 'SubspaceClusterDefinition' * (definition needs to be quoted to be recognized as * a single argument).</pre> * * <pre> * Options specific to weka.datagenerators.clusterers.SubspaceClusterDefinition: * </pre> * * <pre> -A <range> * Generates randomly distributed instances in the cluster.</pre> * * <pre> -U <range> * Generates uniformly distributed instances in the cluster.</pre> * * <pre> -G <range> * Generates gaussian distributed instances in the cluster.</pre> * * <pre> -D <num>,<num> * The attribute min/max (-A and -U) or mean/stddev (-G) for * the cluster.</pre> * * <pre> -N <num>..<num> * The range of number of instances per cluster (default 1..50).</pre> * * <pre> -I * Uses integer instead of continuous values (default continuous).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; SubspaceClusterDefinition cl; Vector list; int clCount; super.setOptions(options); m_numValues = new int[getNumAttributes()]; // numValues might be changed by a cluster definition // (only relevant for nominal data) for (int i = 0; i < getNumAttributes(); i++) m_numValues[i] = 1; tmpStr = Utils.getOption('P', options); if (tmpStr.length() != 0) setNoiseRate(Double.parseDouble(tmpStr)); else setNoiseRate(defaultNoiseRate()); // cluster definitions list = new Vector(); clCount = 0; do { tmpStr = Utils.getOption('C', options); if (tmpStr.length() != 0) { clCount++; cl = new SubspaceClusterDefinition(this); cl.setOptions(Utils.splitOptions(tmpStr)); list.add(cl); } } while (tmpStr.length() != 0); m_Clusters = (ClusterDefinition[]) list.toArray(new ClusterDefinition[list.size()]); // in case no cluster definition was provided, make sure that there's at // least one definition present -> see getClusters() getClusters(); } /** * Gets the current settings of the datagenerator. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; String[] options; int i; result = new Vector(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); result.add("-P"); result.add("" + getNoiseRate()); for (i = 0; i < getClusters().length; i++) { result.add("-C"); result.add(Utils.joinOptions(getClusters()[i].getOptions())); } return (String[]) result.toArray(new String[result.size()]); } /** * returns the current cluster definitions, if necessary initializes them * * @return the current cluster definitions */ protected ClusterDefinition[] getClusters() { if ( (m_Clusters == null) || (m_Clusters.length == 0) ) { if (m_Clusters != null) System.out.println("NOTE: at least 1 cluster definition is necessary, " + "created default one."); m_Clusters = new ClusterDefinition[]{new SubspaceClusterDefinition(this)}; } return m_Clusters; } /** * returns the default number of attributes * * @return the default number of attributes */ protected int defaultNumAttributes() { return 1; } /** * Sets the number of attributes the dataset should have. * @param numAttributes the new number of attributes */ public void setNumAttributes(int numAttributes) { super.setNumAttributes(numAttributes); m_numValues = new int[getNumAttributes()]; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String numAttributesTipText() { return "The number of attributes the generated data will contain (Note: they must be covered by the cluster definitions!)"; } /** * returns the default noise rate * * @return the default noise rate */ protected double defaultNoiseRate() { return 0.0; } /** * Gets the percentage of noise set. * * @return the percentage of noise set */ public double getNoiseRate() { return m_NoiseRate; } /** * Sets the percentage of noise set. * * @param newNoiseRate new percentage of noise */ public void setNoiseRate(double newNoiseRate) { m_NoiseRate = newNoiseRate; } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String noiseRateTipText() { return "The noise rate to use."; } /** * returns the currently set clusters * * @return the currently set clusters */ public ClusterDefinition[] getClusterDefinitions() { return getClusters(); } /** * sets the clusters to use * * @param value the clusters do use * @throws Exception if clusters are not the correct class */ public void setClusterDefinitions(ClusterDefinition[] value) throws Exception { String indexStr; indexStr = ""; m_Clusters = value; for (int i = 0; i < getClusters().length; i++) { if (!(getClusters()[i] instanceof SubspaceClusterDefinition)) { if (indexStr.length() != 0) indexStr += ","; indexStr += "" + (i+1); } getClusters()[i].setParent(this); getClusters()[i].setOptions(getClusters()[i].getOptions()); // for initializing! } // any wrong classes encountered? if (indexStr.length() != 0) throw new Exception("These cluster definitions are not '" + SubspaceClusterDefinition.class.getName() + "': " + indexStr); } /** * Returns the tip text for this property * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String clusterDefinitionsTipText() { return "The clusters to use."; } /** * Checks, whether all attributes are covered by cluster definitions and * returns TRUE in that case. * * @return whether all attributes are covered */ protected boolean checkCoverage() { int i; int n; int[] count; Range r; String attrIndex; SubspaceClusterDefinition cl;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -