亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關(guān)于我們
? 蟲蟲下載站

?? rlearner.java

?? 用java寫的一個(gè)強(qiáng)化學(xué)習(xí)程序
?? JAVA
字號:
import java.util.Vector;
import java.lang.*;
import java.lang.reflect.*;

public class RLearner {

    RLWorld thisWorld;
    RLPolicy policy;

    // Learning types
    public static final int Q_LEARNING = 1;
    public static final int SARSA = 2;
    public static final int Q_LAMBDA = 3; // Good parms were lambda=0.05, gamma=0.1, alpha=0.01, epsilon=0.1

    // Action selection types
    public static final int E_GREEDY = 1;
    public static final int SOFTMAX = 2;

    int learningMethod;
    int actionSelection;

    double epsilon;
    double temp;

    double alpha;
    double gamma;
    double lambda;

    int[] dimSize;
    int[] state;
    int[] newstate;
    int action;
    double reward;

    int epochs;
	public int epochsdone;
	
    Thread thisThread;
    public boolean running;

    Vector trace = new Vector();
    int[] saPair;

    long timer;

    boolean random = false;
	Runnable a;

    public RLearner( RLWorld world) {
		// Getting the world from the invoking method.
		thisWorld = world;

		// Get dimensions of the world.
		dimSize = thisWorld.getDimension();
	
		// Creating new policy with dimensions to suit the world.
		policy = new RLPolicy( dimSize );

		// Initializing the policy with the initial values defined by the world.
		policy.initValues( thisWorld.getInitValues() );
	
		learningMethod = Q_LEARNING;  //Q_LAMBDA;//SARSA;
		actionSelection = E_GREEDY;
	
		// set default values
		epsilon = 0.1;
		temp = 1;

		alpha = 1; // For CliffWorld alpha = 1 is good
		gamma = 0.1;
		lambda = 0.1;  // For CliffWorld gamma = 0.1, l = 0.5 (l*g=0.05)is a good choice.

		System.out.println( "RLearner initialised" );
	
    }

    // execute one trial
	public void runTrial() {
		System.out.println( "Learning! ("+epochs+" epochs)\n" );
		for( int i = 0 ; i < epochs ; i++ ) {
				if( ! running ) break;
		
				runEpoch();
				
			if( i % 1000 == 0 ) {
			    // give text output
			    timer = ( System.currentTimeMillis() - timer );
			    System.out.println("Epoch:" + i + " : " + timer);
			    timer = System.currentTimeMillis();
			}
		}
	}
	
	// execute one epoch
	public void runEpoch() {
	
		// Reset state to start position defined by the world.
		state = thisWorld.resetState();
		
		switch( learningMethod ) {
	    
		case Q_LEARNING : {
	    
	    	double this_Q;
		    double max_Q;
		    double new_Q;

			while( ! thisWorld.endState() ) {
		    
			    if( ! running ) break;
					action = selectAction( state );
		    		newstate = thisWorld.getNextState( action );
				    reward = thisWorld.getReward();
		    
				    this_Q = policy.getQValue( state, action );
				    max_Q = policy.getMaxQValue( newstate );

				    // Calculate new Value for Q
				    new_Q = this_Q + alpha * ( reward + gamma * max_Q - this_Q );
				    policy.setQValue( state, action, new_Q );

				    // Set state to the new state.
				    state = newstate;
			}
		
	    
	    }

	case SARSA : {
	    
	    int newaction;
	    double this_Q;
	    double next_Q;
	    double new_Q;

	    action = selectAction( state );
		while( ! thisWorld.endState() ) {
		
		    if( ! running ) break;
		    
		    newstate = thisWorld.getNextState( action );
		    reward = thisWorld.getReward();
		    
   		    newaction = selectAction( newstate );
		    
		    this_Q = policy.getQValue( state, action );
		    next_Q = policy.getQValue( newstate, newaction );
		    
		    new_Q = this_Q + alpha * ( reward + gamma * next_Q - this_Q );
		    
		    policy.setQValue( state, action, new_Q );
		    
		    // Set state to the new state and action to the new action.
		    state = newstate;
		    action = newaction;
		}
		
	}

	case Q_LAMBDA : {
	    
	    double max_Q;
	    double this_Q;
	    double new_Q;
	    double delta;

		// Remove all eligibility traces. 
		trace.removeAllElements();
		
		while( ! thisWorld.endState() ) {
		    
		    if( ! running ) break;
		    
		    action = selectAction( state );
		    
		    // Store state-action pair in eligibility trace.
		    saPair = new int[dimSize.length];
		    System.arraycopy( state, 0, saPair, 0, state.length );
		    saPair[state.length] = action;
		    trace.add( saPair );

		    // Store only 10 traced states.
		    if( trace.size() == 11 )
			trace.removeElementAt( 0 );
		    		    
		    newstate = thisWorld.getNextState( action );
		    reward = thisWorld.getReward();
		    
		    max_Q = policy.getMaxQValue( newstate );
		    this_Q = policy.getQValue( state, action );
		    
		    // Calculate new Value for Q
		    delta = reward + gamma * max_Q - this_Q;
		    new_Q = this_Q + alpha * delta;

		    policy.setQValue( state, action, new_Q );
		    
		    // Update values for the trace.
		    for( int e = trace.size() - 2 ; e >= 0 ; e-- ) {
			
			saPair = (int[]) trace.get( e );
			
			System.arraycopy( saPair, 0, state, 0, state.length );
			action = saPair[state.length];

			this_Q = policy.getQValue( state, action );
			new_Q = this_Q + alpha * delta * Math.pow( gamma * lambda, ( trace.size() - 1 - e ) );

			policy.setQValue( state, action, new_Q );

			//System.out.println("Set Q:" + new_Q + "for " + state[0] + "," + state[1] + " action " + action );
		    }
		    
		    if( random ) trace.removeAllElements();

		    // Set state to the new state.
		    state = newstate; 
		    

		}
		
		} // case
	} // switch
    } // runEpoch
    
    private int selectAction( int[] state ) {

	double[] qValues = policy.getQValuesAt( state );
	int selectedAction = -1;
    
	switch (actionSelection) {
	    
	case E_GREEDY : {
	    
	    random = false;
	    double maxQ = -Double.MAX_VALUE;
	    int[] doubleValues = new int[qValues.length];
	    int maxDV = 0;
	    
	    //Explore
	    if ( Math.random() < epsilon ) {
		selectedAction = -1;
		random = true;
	    }
	    else {
	    
		for( int action = 0 ; action < qValues.length ; action++ ) {
		    
		    if( qValues[action] > maxQ ) {
			selectedAction = action;
			maxQ = qValues[action];
			maxDV = 0;
			doubleValues[maxDV] = selectedAction;
		    }
		    else if( qValues[action] == maxQ ) {
			maxDV++;
			doubleValues[maxDV] = action; 
		    }
		}
		
		if( maxDV > 0 ) {
		    int randomIndex = (int) ( Math.random() * ( maxDV + 1 ) );
		    selectedAction = doubleValues[ randomIndex ];
		}
	    }
	    
	    // Select random action if all qValues == 0 or exploring.
	    if ( selectedAction == -1 ) {
		
		// System.out.println( "Exploring ..." );
		selectedAction = (int) (Math.random() * qValues.length);
	    }
	    
	    // Choose new action if not valid.
	    while( ! thisWorld.validAction( selectedAction ) ) {
		
		selectedAction = (int) (Math.random() * qValues.length);
		// System.out.println( "Invalid action, new one:" + selectedAction);
	    }
	    
	    break;
	}
	
	case SOFTMAX : {
	    
	    int action;
	    double prob[] = new double[ qValues.length ];
	    double sumProb = 0;
	    
	    for( action = 0 ; action < qValues.length ; action++ ) {
		prob[action] = Math.exp( qValues[action] / temp );
		sumProb += prob[action];
	    }
	    for( action = 0 ; action < qValues.length ; action++ )
		prob[action] = prob[action] / sumProb;
	    
	    boolean valid = false;
	    double rndValue;
	    double offset;
	    
	    while( ! valid ) {
		
		rndValue = Math.random();
		offset = 0;
		
		for( action = 0 ; action < qValues.length ; action++ ) {
		    if( rndValue > offset && rndValue < offset + prob[action] )
			selectedAction = action;
		    offset += prob[action];
		    // System.out.println( "Action " + action + " chosen with " + prob[action] );
		}

		if( thisWorld.validAction( selectedAction ) )
		    valid = true;
	    }
	    break;
	    
	}
	}
	return selectedAction;
    }
    
    /* private double getMaxQValue( int[] state, int action ) {
	
	double maxQ = 0;
	
	double[] qValues = policy.getQValuesAt( state );
	
	for( action = 0 ; action < qValues.length ; action++ ) {
	    if( qValues[action] > maxQ ) {
		maxQ = qValues[action];
	    }
	}
	return maxQ;
    }
    */


    public RLPolicy getPolicy() {
	
	return policy;
    }

    public void setAlpha( double a ) {
    
	if( a >= 0 && a < 1 )
	    alpha = a;
    }

    public double getAlpha() {
    
	return alpha;
    } 

    public void setGamma( double g ) {
    
	if( g > 0 && g < 1 )
	    gamma = g;
    }

    public double getGamma() {
	
	return gamma;
    }

    public void setEpsilon( double e ) {

	if( e > 0 && e < 1 )
	    epsilon = e;
    }
    
    public double getEpsilon() {
	
	return epsilon;
    }
    
    public void setEpisodes( int e ) {
	
	if( e > 0 )
	    epochs = e;
    }
    
    public int getEpisodes() {

	return epochs;
    }
    
    public void setActionSelection( int as ) {
	
	switch ( as ) {
	    
	case SOFTMAX : { 
	    actionSelection = SOFTMAX;
	    break;
	}
	case E_GREEDY :
	default : {
	    actionSelection = E_GREEDY;
	}
	
	}
    }
    
    public int getActionSelection() {

	return actionSelection;
    }
    
    public void setLearningMethod( int lm ) {
	
	switch ( lm ) {
	    
	case SARSA : {
	    learningMethod = SARSA;
	    break;
	}
	case Q_LAMBDA : {
	    learningMethod = Q_LAMBDA;
	    break;
	}
	case Q_LEARNING :
	default : { 
	    learningMethod = Q_LEARNING;
	}
	}
    }

    public int getLearningMethod() {

	return learningMethod;
    }

	//AK: let us clear the policy
	public RLPolicy newPolicy() {
		policy = new RLPolicy( dimSize );
		// Initializing the policy with the initial values defined by the world.
		policy.initValues( thisWorld.getInitValues() );
		return policy;
	}
}
	

?? 快捷鍵說明

復(fù)制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
免费人成黄页网站在线一区二区| 亚洲精选视频免费看| 美女一区二区视频| 日韩美女在线视频| 国产在线视视频有精品| 国产日韩欧美综合在线| 99re成人精品视频| 夜夜爽夜夜爽精品视频| 91精品国产综合久久福利软件 | 蜜臀久久99精品久久久久久9 | 日韩欧美另类在线| 国产精品综合二区| 综合在线观看色| 欧美亚洲国产一区二区三区| 日韩成人免费在线| 国产欧美一区二区三区鸳鸯浴| av不卡免费在线观看| 日韩在线a电影| 中文字幕欧美三区| 欧美日韩欧美一区二区| 黑人精品欧美一区二区蜜桃 | 最新国产の精品合集bt伙计| 欧美影院精品一区| 激情文学综合网| 亚洲精品伦理在线| 精品国产精品一区二区夜夜嗨| 成人aa视频在线观看| 亚洲成人精品在线观看| 国产人久久人人人人爽| 欧美在线观看一二区| 国产精品99久久久久久似苏梦涵 | 色哟哟欧美精品| 九九热在线视频观看这里只有精品| 国产精品久久久久aaaa| 欧美日韩国产乱码电影| www.色精品| 美女脱光内衣内裤视频久久网站 | 国产精品视频一区二区三区不卡| 欧美三级视频在线播放| 高清久久久久久| 日韩av在线播放中文字幕| 亚洲女性喷水在线观看一区| 精品国产a毛片| 欧美日韩黄视频| 成人美女在线观看| 久久se精品一区二区| 尤物在线观看一区| 国产精品入口麻豆九色| 日韩欧美另类在线| 91精品免费在线观看| 色婷婷一区二区| 成人动漫中文字幕| 国产成人在线色| 久久99久久精品| 日韩成人一级大片| 首页国产欧美日韩丝袜| 亚洲在线成人精品| 亚洲狼人国产精品| 国产精品久久久久久久久晋中| 精品对白一区国产伦| 91精品在线观看入口| 欧美天天综合网| 色香蕉久久蜜桃| hitomi一区二区三区精品| 国产伦精品一区二区三区在线观看 | 精品国产乱码91久久久久久网站| 在线不卡的av| 51精品视频一区二区三区| 欧美日韩精品高清| 欧美福利电影网| 91精品婷婷国产综合久久竹菊| 在线不卡免费欧美| 欧美一区二区三区视频| 69久久99精品久久久久婷婷| 欧美日韩亚洲高清一区二区| 欧美性猛交一区二区三区精品| 91色porny在线视频| 91丨国产丨九色丨pron| 色婷婷一区二区| 精品视频一区二区三区免费| 欧美丝袜自拍制服另类| 欧美日韩精品久久久| 91精品国产综合久久久蜜臀粉嫩| 欧美日韩一区高清| 在线综合+亚洲+欧美中文字幕| 欧美精品v日韩精品v韩国精品v| 91精品国产高清一区二区三区蜜臀| 欧美军同video69gay| 日韩免费在线观看| 久久亚洲精华国产精华液| 久久久.com| 亚洲色图色小说| 国内精品在线播放| 国产成人在线色| 色视频成人在线观看免| 在线观看91av| 久久久久久久久久久久久女国产乱| 久久精品人人做人人综合| 日本一区免费视频| 亚洲色图自拍偷拍美腿丝袜制服诱惑麻豆| 亚洲色图都市小说| 午夜电影网一区| 国产制服丝袜一区| 99久久免费国产| 91精品国产91久久久久久最新毛片| 日韩女优电影在线观看| 国产精品国产自产拍在线| 亚洲自拍与偷拍| 久久成人免费网| 91香蕉视频黄| 日韩一区二区三区视频在线 | 自拍偷拍亚洲综合| 日韩电影在线免费看| 国产成人在线视频网址| 欧美亚洲国产一区二区三区va | av电影在线不卡| 欧美精品一二三| 国产精品色一区二区三区| 亚洲大片精品永久免费| 高清在线成人网| 91精品国产手机| 亚洲免费观看高清完整版在线观看熊 | 中文字幕+乱码+中文字幕一区| 亚洲永久免费av| 丰满放荡岳乱妇91ww| 欧美色手机在线观看| 久久久久久久久99精品| 亚洲h在线观看| 成人av资源网站| 久久综合视频网| 亚洲午夜精品网| 9色porny自拍视频一区二区| 欧美一区二区视频网站| 一区二区三区精品久久久| 成人黄色软件下载| 欧美成人精品3d动漫h| 亚洲影院久久精品| 99久久精品费精品国产一区二区| 精品日韩一区二区三区免费视频| 亚洲美女屁股眼交| 成人午夜av电影| 久久影音资源网| 奇米精品一区二区三区在线观看一| 91啦中文在线观看| 国产精品色在线| 国内成人精品2018免费看| 欧美肥妇bbw| 亚洲二区在线视频| 91丨九色porny丨蝌蚪| 国产精品久久久久9999吃药| 国产福利精品一区二区| 久久老女人爱爱| 久久国产夜色精品鲁鲁99| 欧美精品久久久久久久久老牛影院| 亚洲最大成人综合| 色综合久久久网| 一区二区在线观看免费 | 欧美精品三级在线观看| 亚洲视频小说图片| 成人免费视频国产在线观看| 精品国产一区二区三区四区四 | 亚洲午夜羞羞片| 91看片淫黄大片一级在线观看| 国产目拍亚洲精品99久久精品 | 91香蕉视频mp4| 亚洲色图欧洲色图婷婷| 99re8在线精品视频免费播放| 最近中文字幕一区二区三区| eeuss鲁片一区二区三区在线观看 eeuss鲁片一区二区三区在线看 | 欧美96一区二区免费视频| 91精品国产免费久久综合| 日本亚洲欧美天堂免费| 日韩欧美中文字幕制服| 极品销魂美女一区二区三区| 亚洲男人天堂av| 欧美午夜理伦三级在线观看| 亚洲一区二区三区中文字幕| 欧美性xxxxx极品少妇| 午夜久久久久久久久久一区二区| 欧美日韩中文一区| 免费人成在线不卡| 久久久午夜精品理论片中文字幕| 国产综合久久久久久久久久久久 | 97久久超碰精品国产| 亚洲一级二级三级| 91麻豆精品国产91久久久久久久久| 婷婷国产在线综合| 欧美精品一区二区三区蜜桃视频| 国产一区二区伦理| 亚洲欧洲另类国产综合| 欧美又粗又大又爽| 精品亚洲国产成人av制服丝袜| 国产日韩成人精品| 91搞黄在线观看| 日韩**一区毛片| 日本一区二区在线不卡| 欧美日韩国产美| 国产成人在线视频播放| 亚洲一区二区欧美日韩| 精品久久久久久久久久久久久久久 |