?? subspaceclusterdefinition.java
字號:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * SubspaceClusterDefinition.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.datagenerators.clusterers;import weka.core.Option;import weka.core.Range;import weka.core.SelectedTag;import weka.core.Utils;import weka.datagenerators.ClusterDefinition;import weka.datagenerators.ClusterGenerator;import java.util.Enumeration;import java.util.Random;import java.util.StringTokenizer;import java.util.Vector;/** <!-- globalinfo-start --> * A single cluster for the SubspaceCluster datagenerator * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -A <range> * Generates randomly distributed instances in the cluster.</pre> * * <pre> -U <range> * Generates uniformly distributed instances in the cluster.</pre> * * <pre> -G <range> * Generates gaussian distributed instances in the cluster.</pre> * * <pre> -D <num>,<num> * The attribute min/max (-A and -U) or mean/stddev (-G) for * the cluster.</pre> * * <pre> -N <num>..<num> * The range of number of instances per cluster (default 1..50).</pre> * * <pre> -I * Uses integer instead of continuous values (default continuous).</pre> * <!-- options-end --> * * @author Gabi Schmidberger (gabi@cs.waikato.ac.nz) * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.4 $ * @see SubspaceCluster */public class SubspaceClusterDefinition extends ClusterDefinition { /** for serialization */ static final long serialVersionUID = 3135678125044007231L; /** cluster type */ protected int m_clustertype; /** cluster subtypes */ protected int m_clustersubtype; /** number of attributes the cluster is defined for */ protected int m_numClusterAttributes; /** number of instances for this cluster */ protected int m_numInstances; /** minimal number of instances for this cluster */ protected int m_MinInstNum; /** maximal number of instances for this cluster */ protected int m_MaxInstNum; /** range of atttributes */ protected Range m_AttrIndexRange; /** attributes of this cluster */ protected boolean[] m_attributes; /** global indices of the attributes of the cluster */ protected int[] m_attrIndices; /** ranges of each attribute (min); not used if gaussian */ protected double[] m_minValue; /** ranges of each attribute (max); not used if gaussian */ protected double[] m_maxValue; /** mean ; only used if gaussian */ protected double[] m_meanValue; /** standarddev; only used if gaussian */ protected double[] m_stddevValue; /** * initializes the cluster, without a parent cluster (necessary for GOE) */ public SubspaceClusterDefinition() { super(); } /** * initializes the cluster with default values * * @param parent the datagenerator this cluster belongs to */ public SubspaceClusterDefinition(ClusterGenerator parent) { super(parent); } /** * sets the default values * * @throws Exception if setting of defaults fails */ protected void setDefaults() throws Exception { setClusterType(defaultClusterType()); setClusterSubType(defaultClusterSubType()); setMinInstNum(defaultMinInstNum()); setMaxInstNum(defaultMaxInstNum()); setAttrIndexRange(defaultAttrIndexRange()); m_numClusterAttributes = 1; setValuesList(defaultValuesList()); } /** * Returns a string describing this data generator. * * @return a description of the data generator suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "A single cluster for the SubspaceCluster datagenerator"; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options */ public Enumeration listOptions() { Vector result = new Vector(); result.addElement(new Option( "\tGenerates randomly distributed instances in the cluster.", "A", 1, "-A <range>")); result.addElement(new Option( "\tGenerates uniformly distributed instances in the cluster.", "U", 1, "-U <range>")); result.addElement(new Option( "\tGenerates gaussian distributed instances in the cluster.", "G", 1, "-G <range>")); result.addElement(new Option( "\tThe attribute min/max (-A and -U) or mean/stddev (-G) for\n" + "\tthe cluster.", "D", 1, "-D <num>,<num>")); result.addElement(new Option( "\tThe range of number of instances per cluster (default " + defaultMinInstNum() + ".." + defaultMaxInstNum() + ").", "N", 1, "-N <num>..<num>")); result.addElement(new Option( "\tUses integer instead of continuous values (default continuous).", "I", 0, "-I")); return result.elements(); } /** * Parses a list of options for this object. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -A <range> * Generates randomly distributed instances in the cluster.</pre> * * <pre> -U <range> * Generates uniformly distributed instances in the cluster.</pre> * * <pre> -G <range> * Generates gaussian distributed instances in the cluster.</pre> * * <pre> -D <num>,<num> * The attribute min/max (-A and -U) or mean/stddev (-G) for * the cluster.</pre> * * <pre> -N <num>..<num> * The range of number of instances per cluster (default 1..50).</pre> * * <pre> -I * Uses integer instead of continuous values (default continuous).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; String fromToStr; int typeCount; typeCount = 0; fromToStr = ""; tmpStr = Utils.getOption('A', options); if (tmpStr.length() != 0) { fromToStr = tmpStr; setClusterType( new SelectedTag( SubspaceCluster.UNIFORM_RANDOM, SubspaceCluster.TAGS_CLUSTERTYPE)); typeCount++; } tmpStr = Utils.getOption('U', options); if (tmpStr.length() != 0) { fromToStr = tmpStr; setClusterType( new SelectedTag( SubspaceCluster.TOTAL_UNIFORM, SubspaceCluster.TAGS_CLUSTERTYPE)); typeCount++; } tmpStr = Utils.getOption('G', options); if (tmpStr.length() != 0) { fromToStr = tmpStr; setClusterType( new SelectedTag( SubspaceCluster.GAUSSIAN, SubspaceCluster.TAGS_CLUSTERTYPE)); typeCount++; } // default is uniform/random if (typeCount == 0) setClusterType( new SelectedTag( SubspaceCluster.UNIFORM_RANDOM, SubspaceCluster.TAGS_CLUSTERTYPE)); else if (typeCount > 1) throw new Exception("Only one cluster type can be specified!"); setAttrIndexRange(fromToStr); tmpStr = Utils.getOption('D', options); if (isGaussian()) { if (tmpStr.length() != 0) setMeanStddev(tmpStr); else setMeanStddev(defaultMeanStddev()); } else { if (tmpStr.length() != 0) setValuesList(tmpStr); else setValuesList(defaultValuesList()); } tmpStr = Utils.getOption('N', options); if (tmpStr.length() != 0) setInstNums(tmpStr); else setInstNums(defaultMinInstNum() + ".." + defaultMaxInstNum()); if (Utils.getFlag('I', options)) setClusterSubType( new SelectedTag( SubspaceCluster.INTEGER, SubspaceCluster.TAGS_CLUSTERSUBTYPE)); else setClusterSubType( new SelectedTag( SubspaceCluster.CONTINUOUS, SubspaceCluster.TAGS_CLUSTERSUBTYPE)); } /** * Gets the current settings of the datagenerator BIRCHCluster. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; result = new Vector(); if (isRandom()) { result.add("-A"); result.add("" + getAttrIndexRange()); result.add("-D"); result.add("" + getValuesList()); } else if (isUniform()) { result.add("-U"); result.add("" + getAttrIndexRange()); result.add("-D"); result.add("" + getValuesList()); } else if (isGaussian()) { result.add("-G"); result.add("" + getAttrIndexRange()); result.add("-D"); result.add("" + getMeanStddev()); } result.add("-N"); result.add("" + getInstNums()); if (m_clustersubtype == SubspaceCluster.INTEGER) result.add("-I"); return (String[]) result.toArray(new String[result.size()]); } /** * Make a string from the attribues list. * * @return the attributes as string */ public String attributesToString() { StringBuffer text = new StringBuffer(); int j = 0; for (int i = 0; i < m_attributes.length; i++) { if (m_attributes[i]) { if (isGaussian()) { text.append(" Attribute: " + i); text.append(" Mean: "+ m_meanValue[j]); text.append(" StdDev: "+m_stddevValue[j]+"\n%"); } else { text.append(" Attribute: " + i); text.append(" Range: "+ m_minValue[j]); text.append(" - "+m_maxValue[j]+"\n%"); } j++; } } return text.toString(); } /** * Make a string from the cluster features. * * @return the cluster features as string */ public String toString() { StringBuffer text = new StringBuffer(); text.append("attributes " + attributesToString() + "\n"); text.append("number of instances " + getInstNums()); return text.toString(); } /** * sets the parent datagenerator this cluster belongs to * @param parent the parent datagenerator */ public void setParent(SubspaceCluster parent) { super.setParent(parent); m_AttrIndexRange.setUpper(getParent().getNumAttributes()); } /** * returns the default attribute index range * * @return the default attribute index range */ protected String defaultAttrIndexRange() { return "1"; } /** * Sets which attributes are used in the cluster * attributes among the selection will be discretized. * * @param rangeList a string representing the list of attributes. Since * the string will typically come from a user, attributes are indexed from * 1. <br/> * eg: first-3,5,6-last */ public void setAttrIndexRange(String rangeList) { m_numClusterAttributes = 0; if (m_AttrIndexRange == null) m_AttrIndexRange = new Range(); m_AttrIndexRange.setRanges(rangeList); if (getParent() != null) { m_AttrIndexRange.setUpper(getParent().getNumAttributes()); m_attributes = new boolean [getParent().getNumAttributes()]; for (int i = 0; i < m_attributes.length; i++) { if (m_AttrIndexRange.isInRange(i)) { m_numClusterAttributes++; m_attributes[i] = true; } else { m_attributes[i] = false; } } //store translation from attr in cluster to attr in whole dataset m_attrIndices = new int[m_numClusterAttributes]; int clusterI = -1; for (int i = 0; i < m_attributes.length; i++) { if (m_AttrIndexRange.isInRange(i)) { clusterI++; m_attrIndices[clusterI] = i; } } } } /** * returns the attribute range(s). * * @return the attribute range(s). */ public String getAttrIndexRange() { return m_AttrIndexRange.getRanges(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -