?? kmeans.java
字號:
* Returns the number of clusters.
*
* @return the number of clusters generated for a training dataset.
* @exception Exception if number of clusters could not be returned
* successfully
*/
public int numberOfClusters() throws Exception {
return m_NumClusters;
}
/**
* Returns an enumeration describing the available options.. <p>
*
* Valid options are:<p>
*
* -N <number of clusters> <br>
* Specify the number of clusters to generate. If omitted,
* EM will use cross validation to select the number of clusters
* automatically. <p>
*
* -S <seed> <br>
* Specify random number seed. <p>
*
* @return an enumeration of all the available options.
*
**/
public Enumeration listOptions () {
Vector newVector = new Vector(2);
newVector.addElement(new Option("\tnumber of clusters. (default = 2)."
, "N", 1, "-N <num>"));
newVector.addElement(new Option("\trandom number seed.\n (default 10)"
, "S", 1, "-S <num>"));
return newVector.elements();
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String numClustersTipText() {
return "set number of clusters";
}
/**
* set the number of clusters to generate
*
* @param n the number of clusters to generate
*/
public void setNumClusters(int n) {
m_NumClusters = n;
}
/**
* gets the number of clusters to generate
*
* @return the number of clusters to generate
*/
public int getNumClusters() {
return m_NumClusters;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String seedTipText() {
return "random number seed";
}
/**
* Set the random number seed
*
* @param s the seed
*/
public void setSeed (int s) {
m_Seed = s;
}
/**
* Get the random number seed
*
* @return the seed
*/
public int getSeed () {
return m_Seed;
}
/**
* Parses a given list of options.
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*
**/
public void setOptions (String[] options)
throws Exception {
String optionString = Utils.getOption('N', options);
if (optionString.length() != 0) {
setNumClusters(Integer.parseInt(optionString));
}
optionString = Utils.getOption('S', options);
if (optionString.length() != 0) {
setSeed(Integer.parseInt(optionString));
}
}
/**
* Gets the current settings of KMeans
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[4];
int current = 0;
options[current++] = "-N";
options[current++] = "" + getNumClusters();
options[current++] = "-S";
options[current++] = "" + getSeed();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* return a string describing this clusterer
*
* @return a description of the clusterer as a string
*/
public String toString() {
StringBuffer temp = new StringBuffer();
temp.append("\nkMeans\n======\n");
temp.append("\nNumber of iterations: " + m_Iterations+"\n");
temp.append("\nCluster centroids:\n");
for (int i = 0; i < m_NumClusters; i++) {
temp.append("\nCluster "+i+"\n\t");
for (int j = 0; j < m_ClusterCentroids.numAttributes(); j++) {
if (m_ClusterCentroids.attribute(j).isNominal()) {
temp.append(" "+m_ClusterCentroids.attribute(j).
value((int)m_ClusterCentroids.instance(i).value(j)));
} else {
temp.append(" "+m_ClusterCentroids.instance(i).value(j));
}
}
}
pmmlDocument (0);
return temp.toString();
}
/**
* Main method for testing this class.
*
* @param argv should contain the following arguments: <p>
* -t training file [-N number of clusters]
*/
public static void main (String[] argv) {
try {
System.out.println(ClusterEvaluation.
evaluateClusterer(new KMeans(), argv));
}
catch (Exception e) {
log.error (e.getMessage());
log.error(e.getStackTrace().toString());
}
}
/**
* Creates the PMML document
*/
private void pmmlDocument (int depth) {
try{
Element versionElement = pmmlIntro();
Element headerElement = header ();
Element dataDictionaryElement = dataDictionary () ;
Element clusteringModelElement = clusteringModel ();
Element miningSchemaElement = miningSchema ();
Element clusteringFieldElement;
Element clusterElement;
versionElement.addContent(headerElement);
versionElement.addContent(dataDictionaryElement);
versionElement.addContent(clusteringModelElement);
clusteringModelElement.addContent(miningSchemaElement);
FastVector miningSchemaVector = m_instances.getVector();
String attributeName;
for (int i=0; i < miningSchemaVector.size();i++){
org.agentacademy.modules.dataminer.core.Attribute a = (org.agentacademy.modules.dataminer.core.Attribute) miningSchemaVector.elementAt(i);
attributeName = a.name();
clusteringFieldElement = clusteringField (attributeName);
clusteringModelElement.addContent(clusteringFieldElement);
}
String clusterText;
for (int betta = 0; betta < m_NumClusters; betta++) {
clusterText ="";
for (int gamma = 0; gamma < m_ClusterCentroids.numAttributes(); gamma++) {
if (m_ClusterCentroids.attribute(gamma).isNominal()) {
clusterText += " " +m_ClusterCentroids.attribute(gamma).
value((int)m_ClusterCentroids.instance(betta).value(gamma));
}
else {
clusterText += " "+m_ClusterCentroids.instance(betta).value(gamma);
}
}
clusterElement = clusterDescriptor (clusterText);
clusteringModelElement.addContent(clusterElement);
}
// Create the XML document
DocType dtd = new DocType("pmml_2_0.dtd");
Document pmmlDocument = new Document (versionElement,dtd);
}
catch (Exception e){
log.error("PMML Document Exception: " + e);
log.error(e.getStackTrace().toString());
}
}
/**
* Creates the pmml Element
*/
private Element pmmlIntro () throws Exception {
Element pmmlIntro = new Element ("PMML");
return pmmlIntro;
}
/**
* Create the Header Element
*/
private Element header () throws Exception {
Element header = new Element ("Header");
String headerString = " Clustering Model of ";
headerString = "The Clustering Model of Data Mined Data";
header.setAttribute("copyright", "issel.ee.auth.gr");
header.setAttribute("description", headerString);
Element applicationNameElement = new Element ("Application");
applicationNameElement.setAttribute("name", "Agent Academy Data Miner");
applicationNameElement.setAttribute("version", "0.3");
header.addContent(applicationNameElement);
return header;
}
/**
* Create the DataDictionary for the pre - specified XML file
*/
private Element dataDictionary () throws Exception {
Element dataDictionary = new Element ("DataDictionary");
Element dataFieldElement;
Element attributeValueElement;
FastVector headerVector = m_instances.getVector();
String attributeName;
String attributeType;
String attributeValueString;
int numberOfFields = m_instances.numAttributes();
String numberOfFieldsString = String.valueOf(numberOfFields);
dataDictionary.setAttribute("numberOfFields", numberOfFieldsString );
for (int i=0; i < headerVector.size();i++){
dataFieldElement = new Element ("DataField");
org.agentacademy.modules.dataminer.core.Attribute a = null;
a = (org.agentacademy.modules.dataminer.core.Attribute) headerVector.elementAt(i);
attributeName = a.name();
dataFieldElement.setAttribute("name", attributeName);
if (a.isNominal()){
attributeType = "categorical";
dataFieldElement.setAttribute("optype",attributeType);
//
Enumeration enumerateValues = a.enumerateValues();
while (enumerateValues.hasMoreElements()){
attributeValueString = (String)enumerateValues.nextElement();
attributeValueElement = new Element ("Value");
attributeValueElement.setAttribute("value", attributeValueString);
dataFieldElement.addContent(attributeValueElement);
}
}
else if (a.isNumeric()){
attributeType = "continuous";
dataFieldElement.setAttribute("optype",attributeType);
}
else if (a.isRegular()){
attributeType = "ordinal";
dataFieldElement.setAttribute("optype",attributeType);
}
else {
attributeType = "string";
dataFieldElement.setAttribute("optype",attributeType);
}
dataDictionary.addContent(dataFieldElement);
}
return dataDictionary;
}
/**
* Creates the Clustering Model Element
*/
private Element clusteringModel () throws Exception {
Element clusteringModelElement = new Element ("ClusteringModel");
clusteringModelElement.setAttribute("modelName", m_instances.relationName());
clusteringModelElement.setAttribute("modelClass", "centerBased");
clusteringModelElement.setAttribute("numberOfClusters", String.valueOf(m_NumClusters));
return clusteringModelElement;
}
/**
* Creates the Mining Schema Element
*/
private Element miningSchema () throws Exception {
Element miningSchema = new Element ("MiningSchema");
String attributeName;
FastVector miningSchemaVector = m_instances.getVector();
for (int i=0; i < miningSchemaVector.size();i++){
Element miningFieldElement = new Element ("MiningField");
org.agentacademy.modules.dataminer.core.Attribute a = (org.agentacademy.modules.dataminer.core.Attribute) miningSchemaVector.elementAt(i);
attributeName = a.name();
miningFieldElement.setAttribute("name", attributeName);
miningSchema.addContent(miningFieldElement);
}
return miningSchema;
}
/**
* Create the Clustering Field Element
*/
private Element clusteringField (String fieldString) {
Element clusteringFieldElement = new Element ("ClusteringField");
clusteringFieldElement.setAttribute("field",fieldString);
clusteringFieldElement.setAttribute("compareFunction","squaredEuclidean");
return clusteringFieldElement;
}
/**
* Create the Cluster Description Element
*/
private Element clusterDescriptor (String string) throws Exception {
Element clusterElement = new Element ("Cluster");
Element arrayElement = new Element ("Array");
int numberOfFields, counter = 0;
String clusterText = string;
counter ++;
clusterElement.setAttribute("name", String.valueOf(counter));
arrayElement.setAttribute("n",String.valueOf(m_instances.numAttributes()));
arrayElement.setText(clusterText);
clusterElement.addContent(arrayElement);
return clusterElement;
}
}
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -