?? sun.java
字號:
import java.io.*;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.StringTokenizer;
public class ReadTrain {
//定義文件路徑
private String filePath = "E:/data_set_ALL_AML_train.txt";
//臨時存放讀出的一行string
private String dataString;
//用于排序,合并連續值屬性的臨時空間
private int[][] temp = new int[2][38];
//定義目標屬性熵
public double last_entropy;
//AML在數組里用1表示,ALL在數組里用0表示
public double AML = 11.0;
public double ALL = 27.0;
//記錄最大的信息增益
public double max_max_value = 0;
//root的位置,即列好
public int root;
public static ArrayList external = new ArrayList();
ReadTrain(){
}
//計算熵值***********************
//******************************
public double comput_entropy(int p,int m,double num){
double num1;
double num2;
if(p == 0 || m == 0){
return 0.0;
}
else{
num1 = -(p/num)*(Math.log(p/num)/Math.log(2.0));
num2 = -(m/num)*(Math.log(m/num)/Math.log(2.0));
return num1+num2;
}
}
public void Read_in_train(){
try{
FileInputStream fis = new FileInputStream(filePath);
InputStreamReader isr = new InputStreamReader(fis);
BufferedReader br = new BufferedReader(isr);
int j = 0;
//從第二行開始
br.readLine();
while((dataString = br.readLine()) != null){
StringTokenizer column = new StringTokenizer(dataString," ");
//從第三列開始
column.nextToken();
column.nextToken();
int i = 0;
while(column.hasMoreTokens()){
DecisionTree.primal[i][j] = Integer.parseInt(column.nextToken());
i++;
}
j++;
}
}catch(Exception e){
System.out.println(e);
}
//創建目標屬性,0代表ALL,1代表AML
for(int i = 0;i< 38;i++){
if(i<27)
DecisionTree.primal[i][7129] = 0;
else
DecisionTree.primal[i][7129] = 1;
}
last_entropy = comput_entropy(27,11,38.0);
//System.out.println(DecisionTree.primal[28][7122]);
}
/*
*
*
*/
//將數組里的數全不轉換成布爾類型
public void Make_boolean(){
//for(int j = 0;j<1;j++){
for(int j = 0;j<7129;j++){
for(int i = 0;i<38;i++){
temp[0][i] = DecisionTree.primal[i][j];
temp[1][i] = DecisionTree.primal[i][7129];
}
//定義交換臨時空間
int a;
int b;
//對temp第一行進行冒泡排序
for(int i = 0;i < 37;i++)
for(int k = 0;k < 37-i;k++){
if(temp[0][k] > temp[0][k+1]){
a = temp[0][k+1];
b = temp[1][k+1];
temp[0][k+1] = temp[0][k];
temp[1][k+1] = temp[1][k];
temp[0][k] = a;
temp[1][k] = b;
}
}
//for(int i = 0;i < 37;i++)
//System.out.println(temp[0][i] + " " + temp[1][i]);
//求閾值
//記錄最大閾值
double max_value = 0.0;
double current = 0.0;
//定義閾值
double yuzhi = 0.0;
int positive = 0; //目標屬性1的數量
int minus = 0;//目標屬性0的數量
double entropy_1;
double entropy_2;
for(int i = 0;i<37;i++){
if(temp[1][i] == 1)
positive++;
else
minus++;
if( temp[1][i] != temp[1][i+1]){
//System.out.println(i + " " + temp[0][i]);
//計算一次點為閾值的信息增益Gain(S,A)
entropy_1 = comput_entropy(positive,minus,(double)(i+1));
entropy_2 = comput_entropy(11-positive,27-minus,(double)(37-i));
//System.out.println(entropy_1);
//System.out.println(entropy_2);
current = last_entropy - entropy_1*(i+1)/38 - entropy_2*(37-i)/38;
//System.out.println(current);
if(current > max_value){
max_value = current;
yuzhi = ( temp[0][i] + temp[0][i+1] )/2;
if(max_value > max_max_value){
//記錄不同屬性最大的信息增益,記住他的位置即可
max_max_value = max_value;
root = j;
}
}
}
}
DecisionTree.middle[j] = yuzhi;
for(int i= 0;i<38;i++){
if(DecisionTree.primal[i][j] < yuzhi)
DecisionTree.primal[i][j] = 0;
else
DecisionTree.primal[i][j] = 1;
}
}//總的for結束
}//make_boolean方法結束
// 把二維數組做成ArrayList形式,ArrayList對象中做成LinkedList形式:)
// 39行,7130列,前38行為普通屬性值,后1行為屬性描述,前7129列為屬性值,后1行為屬性描述
public void Make_List(){
//數組轉化成LinkedList,ArrayList
for(int i = 0;i<38;i++){
LinkedList inner = new LinkedList();
for(int j = 0;j<7130;j++){
Integer ob = new Integer(DecisionTree.primal[i][j]);
inner.add(ob);
}
external.add(inner);
}
//最后一行做成標簽行
LinkedList label = new LinkedList();
for(int i = 0;i<7129;i++)
label.add(new Integer(i));
external.add(label);
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -