亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? qlearn-cmac-xwin.cpp

?? cmac實現
?? CPP
字號:
// Q-learning for mountain car problem /// Q-learning for mountain car problem // uses CMAC as a function approximator// follows approach described in Sutton/Singh papers// implements TD(lambda) sarsa// Code written by Sridhar Mahadevan// Department of Computer Science and Engineering// University of South Florida// 4202 East Fowler Avenue, ENG 118// Tampa, Florida 33620-5399// mahadeva@csee.usf.edu// http://www.csee.usf.edu/~mahadeva/mypage.html#include "cmac.h" // definition of CMAC class and functions #include "display.h" // Xwindows graphics #include "string.h"#include <unistd.h>#define RUNS 1#define MAX_TRIALS 1000#define VALUE_PLOT_STEP_SIZE 100 // output value function once in N trials#define Q0 0 // for initializing weights to Q0/TILES #define GRID_RES 50double POS_STEP = (POS_RANGE[1] - POS_RANGE[0])/GRID_RES; double VEL_STEP = (VEL_RANGE[1] - VEL_RANGE[0])/GRID_RES; double weight[POS_BINS+1][VEL_BINS+1][TILES][ACTIONS];  // qvalue representation over tiles double eligibility[POS_BINS+1][VEL_BINS+1][TILES][ACTIONS]; // eligibility of a tile static int trial_data[RUNS][MAX_TRIALS]; // keep track of solution time for each trial and run // main routine for running trialsvoid run_trials();  void output_trial_std_dev_data(); // define Q-learning with CMAC as a derived class of cmacclass mcar_qlearn_cmac : public cmac {   // inherit from cmac and mcar public: 	  mcar_qlearn_cmac(double pos, double vel); // constructor initializes state and cmac tiles 				ACTION choose_action(); // choose highest Q-value action, but explore sometimes 				void initialize_weights_eligibilities(); 				void update_eligibilities(ACTION a); // mark all active tiles for a particular action 				double qvalue(ACTION a); //  qvalue  computed as weighted sum over active tiles 				double qvalue(ACTION a, double pos, double vel); // at any desired point				double best_qvalue(double pos, double vel); 				void generate_qvalue_plot(int run, int trial); 				// Given previously active tiles and newly active tiles, update Q values 		void update_weights(double reward, double old_qval, double new_qval); 			private:  // data structures for Q-learning			double exploration_rate; 				double GAMMA;  // discount factor 		double BETA;   // learning rate  		double LAMBDA; // recency parameter		}; // constructor function calls cmac and mcar constructor functions mcar_qlearn_cmac::mcar_qlearn_cmac(double pos, double vel) : cmac(pos, vel) {// initialize Q-values  to 0  for (int pbin=0; pbin <= POS_BINS; pbin++)    for (int vbin=0; vbin <= VEL_BINS; vbin++)      for (int tile=0; tile<TILES; tile++)	for (int act=0; act<ACTIONS; act++)	  {	    weight[pbin][vbin][tile][act] = Q0/TILES; 	    eligibility[pbin][vbin][tile][act] = 0.0; 	  } 													GAMMA = 1.0; // discount factorBETA = 0.5;  // learning rate LAMBDA = 0.9;  // recency parameter exploration_rate = 0.0;  // percentage of randomness}// initialize weight and eligibilitiesvoid mcar_qlearn_cmac::initialize_weights_eligibilities(){  for (int pbin=0; pbin <= POS_BINS; pbin++)    for (int vbin=0; vbin <= VEL_BINS; vbin++)      for (int tile=0; tile<TILES; tile++)		for (int act=0; act<ACTIONS; act++)		  {		    weight[pbin][vbin][tile][act] = Q0/TILES; 		    eligibility[pbin][vbin][tile][act] = 0.0; 		  } }		// compute Q value of current state as weighted sum over active tiles double mcar_qlearn_cmac::qvalue(ACTION a){	double value = 0; 		for (int tile=0; tile<TILES; tile++)		value += weight[atiles[tile].pos_bin][atiles[tile].vel_bin][tile][a]; 			return(value); 	}// compute qvalue at any desired statedouble mcar_qlearn_cmac::qvalue(ACTION a, double pos, double vel){	double value =0; 		// compute active tiles   for (int tile =0; tile<TILES; tile++)    {      int pos_tile = (pos - POS_RANGE[0] + offset[0][tile])/pos_interval; // add offset!!      int vel_tile  = (vel - VEL_RANGE[0] + offset[1][tile])/vel_interval;             value += weight[pos_tile][vel_tile][tile][a];    }        return(value);     }double mcar_qlearn_cmac::best_qvalue(double pos, double vel){	double bvalue = qvalue(coast,pos,vel); 		for (int a=0; a<ACTIONS; a++)      if (qvalue(int_to_act(a), pos, vel) > bvalue)		bvalue = qvalue(int_to_act(a),pos,vel); 		return(bvalue);}// given a position and velocity bin, pick the highest Q-value action with high probability ACTION mcar_qlearn_cmac::choose_action(){  double rvalue;   int bact = choose_random_int_value(2);     rvalue = choose_random_value(); 	  if (rvalue < exploration_rate)  // do a random action     return(choose_random_act());   else     for (int a=0; a<ACTIONS; a++)      if (qvalue(int_to_act(a)) > qvalue(int_to_act(bact)))		bact = a;   return(int_to_act(bact)); }// update eligibilities void mcar_qlearn_cmac::update_eligibilities(ACTION a){	for (int pbin=0; pbin <= POS_BINS; pbin++)		for (int vbin=0; vbin <= VEL_BINS; vbin++)			for (int tile=0; tile<TILES; tile++)				for (int act=0; act<ACTIONS; act++)					eligibility[pbin][vbin][tile][act] *= LAMBDA; // decay eligibilites												for (int tile=0; tile<TILES; tile++)		for (int act = 0; act<ACTIONS; act++)			if (act == a)				eligibility[atiles[tile].pos_bin][atiles[tile].vel_bin][tile][act] = 1;			else 				eligibility[atiles[tile].pos_bin][atiles[tile].vel_bin][tile][act] = 0; 		}// update weights void mcar_qlearn_cmac::update_weights(double reward, double old_qval, double new_qval){// TD(lambda) sarsa update rule for (int pbin=0; pbin <= POS_BINS; pbin++)		for (int vbin=0; vbin <= VEL_BINS; vbin++)			for (int tile=0; tile<TILES; tile++)				for (int act=0; act<ACTIONS; act++)					weight[pbin][vbin][tile][act] +=  		 			 (BETA/TILES)*(reward + new_qval - old_qval) * eligibility[pbin][vbin][tile][act]; }// collect statistics on each trial over runs void output_trial_std_dev_data(){  ofstream out("mcar-sarsa-soln-data");    double sum,sum_sq,std_dev,mean;    for (int j=0; j<MAX_TRIALS; j++)      {   	   sum=0.0;   	   sum_sq=0.0;	   for (int i=0; i<RUNS; i++)	   {	     sum += trial_data[i][j];	     sum_sq += trial_data[i][j]*trial_data[i][j];	    }	   mean = sum/RUNS;//	   std_dev = sqrt((sum_sq - (sum*sum/RUNS))/(RUNS-1));	   	   out <<  j <<  " " << mean << endl; //	     <<  " " << mean - std_dev << " " << mean + std_dev << endl;      }  out.close(); } void mcar_qlearn_cmac::generate_qvalue_plot(int run, int trial){  char name[20];	  sprintf(name,"qvalue-%d-%d",run, trial);	  ofstream output(name); 	  for (int pos=0; pos<= GRID_RES; pos++)		{		  double pvalue = POS_RANGE[0] + POS_STEP*pos; //  + POS_STEP/2; // midpoint of bin					  output << endl; 				  for (int vel=0; vel <= GRID_RES; vel++)		    {		      double vvalue = VEL_RANGE[0] + VEL_STEP*vel;  // + VEL_STEP/2; // midpoint of bin							      double value = best_qvalue(pvalue,vvalue); 							      if (value < 0) value = -value; 							      output << value << "\t"; 							    }		}  output.close(); 	}	void run_trials(){  int count = 0;   char data[30]; // data string to print out    mcar_qlearn_cmac mcq(-0.5,0.0); // initialize state to bottom rest and cmac tiles  for (int run = 0; run<RUNS; run++)    {      int best_changed = 1;       int best_so_far=10000;  // shortest distance to goal so far       mcq.initialize_weights_eligibilities();       // initialize weights and eligibilities to 0.       if (run > 0)  // on subsequent runs, keep the same CMAC tiling 	{	  mcq.set_curr_pos(-0.5);  // restart at bottom resting 	  mcq.set_curr_vel(0.0); 	}	      for (int trial=0; trial< MAX_TRIALS; trial++)	{	  if ((trial+1)%VALUE_PLOT_STEP_SIZE==0)	    mcq.generate_qvalue_plot(run, trial+1);        	  best_changed = 1; 	  double old_qvalue, new_qvalue;  // for TD(lambda) sarsa update rule      	  int done  = 0, i = 0; 	  double r; 	  ACTION a, na; 	  mcq.active_tiles();  // recompute set of active CMAC tiles 			  a = mcq.choose_action(); // pick highest Q-value action with high probability 	  	  old_qvalue = mcq.qvalue(a); // compute q value as weighted sum over active tiles 		  while (!done) // not yet reached goal		{				  i++; 		  sprintf(data, "Run: %d Trial: %d Shortest solution: %d", run, trial, best_so_far); 		  if (best_changed)		    {		      display_data(data,1); 		      best_changed=0;		    }		  else display_data(data,0); 		  //		  usleep(1000);  // microseconds 		  display_mcar(mcq.curr_pos(),mcq.curr_vel()); 		 		  mcq.update_eligibilities(a); // decay and update eligibilities 		 		  mcq.update_position_velocity(a);  // move the car 	 	 		  if (!mcq.reached_goal())		    {	 	 		      mcq.active_tiles();  // recompute set of active tiles 	 		 		      na = mcq.choose_action();  // choose highest Q-value action in new state		 	 		      new_qvalue = mcq.qvalue(na);		 	 		      r = mcq.reward();  // -1 always 		      mcq.update_weights(r,old_qvalue, new_qvalue);  // TD(0) sarsa update 	  // RECOMPUTE QVALUE AFTER WEIGHTS CHANGED! (rich's correction)		      old_qvalue = mcq.qvalue(na); 		      a = na; 		    }		  else 		    {		      // TD(lambda) sarsa update at goal		      mcq.update_weights(-1,old_qvalue, 0);  		      if (i<best_so_far) 			{			  best_so_far = i; 			  best_changed = 1; 			}		      trial_data[run][trial] = i; // record number of steps 		      done=1;		    }		}			  mcq.set_curr_pos(-0.5);  // restart at bottom resting 	  mcq.set_curr_vel(0.0); 		}    }	}// do a bunch of learning runsint main(void)  {  int rinit = -time(NULL)%100;   int rseed = -1000 + rinit*rinit*rinit;   WINDOW = start_display();   WINDOW2 = start_display2();   initialize_random_number_generator(rseed); // set up large negative number as random seed; 	  cout << "Learning run started at " << __TIME__ << " with seed " << rseed << endl;   run_trials();   output_trial_std_dev_data(); }											

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
日韩精品一二三区| 日本aⅴ免费视频一区二区三区| 亚洲6080在线| 精品粉嫩aⅴ一区二区三区四区| 国产精品综合网| 亚洲免费观看高清完整版在线观看 | 欧美性一区二区| 九色综合国产一区二区三区| 大白屁股一区二区视频| 欧美成人bangbros| av高清久久久| 欧美一区二区三区四区视频| 欧美美女直播网站| 成人午夜电影网站| 欧洲精品在线观看| 日韩欧美视频一区| 国产精品毛片久久久久久久| 悠悠色在线精品| 视频一区在线播放| 91在线视频播放地址| 91麻豆精品久久久久蜜臀| 久久久久久亚洲综合影院红桃| 欧美成人欧美edvon| 国产日韩视频一区二区三区| 一区二区三区精品在线观看| 成人ar影院免费观看视频| 欧美三级在线播放| 国产精品沙发午睡系列990531| 亚洲精品免费看| 欧美亚洲国产怡红院影院| 天天色 色综合| 亚洲欧美激情一区二区| 精品国偷自产国产一区| 日韩精品中文字幕一区二区三区 | 日韩一级精品视频在线观看| 国产一区二区主播在线| 国产一区不卡在线| 天天影视色香欲综合网老头| 中文字幕高清一区| 国产一区二三区好的| 日韩理论片一区二区| 国产精品一区三区| 亚洲美女屁股眼交3| eeuss鲁一区二区三区| 美洲天堂一区二卡三卡四卡视频| 国产欧美日韩亚州综合 | 国产日韩欧美一区二区三区乱码 | 欧美日韩精品是欧美日韩精品| 国产精品自在欧美一区| 国产最新精品免费| 中文字幕亚洲视频| 26uuu另类欧美| 欧美国产一区在线| 久久久精品tv| 欧美日本一道本| 亚洲成精国产精品女| 欧美zozozo| 91精品国产麻豆| 国产黄色成人av| 亚洲国产va精品久久久不卡综合| 日韩一区二区精品| 国产麻豆精品在线观看| 国产精品国产馆在线真实露脸 | 亚洲午夜国产一区99re久久| 9191久久久久久久久久久| 国产精品传媒在线| 538在线一区二区精品国产| 91麻豆免费看片| 国产黄人亚洲片| 国产成人自拍在线| 青娱乐精品视频| 亚洲免费观看高清在线观看| 国产高清精品久久久久| 日韩福利视频导航| 极品尤物av久久免费看| 亚洲尤物在线视频观看| 亚洲欧美电影一区二区| 亚洲一区二区三区中文字幕在线 | 成人av在线资源| 日韩欧美成人一区二区| 日韩精品免费专区| 日韩一二三四区| 美女免费视频一区| 亚洲欧洲av另类| 免费高清在线一区| 欧美丝袜丝nylons| 最新高清无码专区| 韩国毛片一区二区三区| 欧美在线免费观看亚洲| 亚洲天堂2014| 99re这里只有精品6| 欧美日韩一区二区电影| a亚洲天堂av| 色婷婷综合五月| 成人高清免费观看| 精品国产一区二区三区av性色 | 亚洲激情自拍偷拍| 麻豆国产一区二区| av成人免费在线观看| 欧美一卡二卡三卡| 国产精品妹子av| 激情av综合网| 欧美国产日韩精品免费观看| 一区二区在线观看不卡| 久久99精品国产.久久久久久 | 亚洲二区在线观看| 日韩成人av影视| 56国语精品自产拍在线观看| 亚洲小说欧美激情另类| 成人免费精品视频| 欧洲另类一二三四区| 久久香蕉国产线看观看99| 婷婷综合在线观看| 欧美精品日韩一区| 免费在线成人网| 欧美精品乱码久久久久久| 欧美精品一区二区在线播放| 亚洲一区中文在线| 国产在线不卡视频| 久久毛片高清国产| 免费看黄色91| 欧美午夜片在线观看| 无吗不卡中文字幕| 制服丝袜亚洲网站| 亚洲欧洲日韩av| 91久久久免费一区二区| 亚洲美女视频在线| 欧美午夜精品理论片a级按摩| 国产精品丝袜黑色高跟| 91丨porny丨户外露出| 樱花影视一区二区| 天堂成人免费av电影一区| 成人av高清在线| 亚洲国产日韩a在线播放| 欧美一区二区三区色| 免费成人结看片| 国产精品久久久久久久久搜平片| 色欲综合视频天天天| 午夜精品视频一区| 国产精品丝袜黑色高跟| 国产精品视频一二三区| 国产精品不卡视频| 日韩一区二区精品在线观看| 日韩一区二区三区电影 | 91精品国产品国语在线不卡| 三级久久三级久久久| 一区二区三区精品视频| 午夜精品久久一牛影视| 天堂一区二区在线免费观看| 午夜精品免费在线观看| 日韩电影免费在线观看网站| 亚洲图片自拍偷拍| 亚洲第一精品在线| 视频一区二区中文字幕| 蜜臀va亚洲va欧美va天堂| 欧美日韩一区国产| 一区二区三区四区av| 日韩欧美亚洲国产另类| 欧美久久一区二区| 色成年激情久久综合| 色婷婷精品久久二区二区蜜臀av| 免费在线观看成人| 美女高潮久久久| 豆国产96在线|亚洲| 国产精品99久久久| 色综合天天性综合| 成人网页在线观看| 日本精品视频一区二区| 91精品国产综合久久精品图片| 91美女视频网站| 日韩欧美色电影| 欧美一区二区三区视频免费播放| 久久久青草青青国产亚洲免观| 欧美日韩精品免费| 亚洲精品在线免费观看视频| 欧美成人女星排行榜| 国产精品久久久爽爽爽麻豆色哟哟 | 欧美大片一区二区| 欧美xxxxx牲另类人与| 最新国产成人在线观看| 日韩中文字幕一区二区三区| 免费观看在线综合色| 美腿丝袜亚洲综合| 99re热视频这里只精品| 欧美日韩一区视频| 亚洲精品在线一区二区| 男男gaygay亚洲| 欧美亚州韩日在线看免费版国语版| 日韩一区二区免费在线电影| 26uuu久久综合| 国产精品丝袜黑色高跟| 美女视频网站黄色亚洲| 日韩精品视频网| 一本色道久久综合亚洲aⅴ蜜桃| 色国产精品一区在线观看| 精品国产凹凸成av人网站| 国产精品久久久一区麻豆最新章节| 亚洲精品视频在线观看网站| 国产乱码精品一区二区三区忘忧草 | 久久精品一区二区三区不卡牛牛|