亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? qlearn-cmac-xwin.cpp

?? CMAC神經(jīng)網(wǎng)絡機械臂控制的設計matlab源碼
?? CPP
字號:
// Q-learning for mountain car problem /// Q-learning for mountain car problem // uses CMAC as a function approximator// follows approach described in Sutton/Singh papers// implements TD(lambda) sarsa// Code written by Sridhar Mahadevan// Department of Computer Science and Engineering// University of South Florida// 4202 East Fowler Avenue, ENG 118// Tampa, Florida 33620-5399// mahadeva@csee.usf.edu// http://www.csee.usf.edu/~mahadeva/mypage.html#include "cmac.h" // definition of CMAC class and functions #include "display.h" // Xwindows graphics #include "string.h"#include <unistd.h>#define RUNS 1#define MAX_TRIALS 1000#define VALUE_PLOT_STEP_SIZE 100 // output value function once in N trials#define Q0 0 // for initializing weights to Q0/TILES #define GRID_RES 50double POS_STEP = (POS_RANGE[1] - POS_RANGE[0])/GRID_RES; double VEL_STEP = (VEL_RANGE[1] - VEL_RANGE[0])/GRID_RES; double weight[POS_BINS+1][VEL_BINS+1][TILES][ACTIONS];  // qvalue representation over tiles double eligibility[POS_BINS+1][VEL_BINS+1][TILES][ACTIONS]; // eligibility of a tile static int trial_data[RUNS][MAX_TRIALS]; // keep track of solution time for each trial and run // main routine for running trialsvoid run_trials();  void output_trial_std_dev_data(); // define Q-learning with CMAC as a derived class of cmacclass mcar_qlearn_cmac : public cmac {   // inherit from cmac and mcar public: 	  mcar_qlearn_cmac(double pos, double vel); // constructor initializes state and cmac tiles 				ACTION choose_action(); // choose highest Q-value action, but explore sometimes 				void initialize_weights_eligibilities(); 				void update_eligibilities(ACTION a); // mark all active tiles for a particular action 				double qvalue(ACTION a); //  qvalue  computed as weighted sum over active tiles 				double qvalue(ACTION a, double pos, double vel); // at any desired point				double best_qvalue(double pos, double vel); 				void generate_qvalue_plot(int run, int trial); 				// Given previously active tiles and newly active tiles, update Q values 		void update_weights(double reward, double old_qval, double new_qval); 			private:  // data structures for Q-learning			double exploration_rate; 				double GAMMA;  // discount factor 		double BETA;   // learning rate  		double LAMBDA; // recency parameter		}; // constructor function calls cmac and mcar constructor functions mcar_qlearn_cmac::mcar_qlearn_cmac(double pos, double vel) : cmac(pos, vel) {// initialize Q-values  to 0  for (int pbin=0; pbin <= POS_BINS; pbin++)    for (int vbin=0; vbin <= VEL_BINS; vbin++)      for (int tile=0; tile<TILES; tile++)	for (int act=0; act<ACTIONS; act++)	  {	    weight[pbin][vbin][tile][act] = Q0/TILES; 	    eligibility[pbin][vbin][tile][act] = 0.0; 	  } 													GAMMA = 1.0; // discount factorBETA = 0.5;  // learning rate LAMBDA = 0.9;  // recency parameter exploration_rate = 0.0;  // percentage of randomness}// initialize weight and eligibilitiesvoid mcar_qlearn_cmac::initialize_weights_eligibilities(){  for (int pbin=0; pbin <= POS_BINS; pbin++)    for (int vbin=0; vbin <= VEL_BINS; vbin++)      for (int tile=0; tile<TILES; tile++)		for (int act=0; act<ACTIONS; act++)		  {		    weight[pbin][vbin][tile][act] = Q0/TILES; 		    eligibility[pbin][vbin][tile][act] = 0.0; 		  } }		// compute Q value of current state as weighted sum over active tiles double mcar_qlearn_cmac::qvalue(ACTION a){	double value = 0; 		for (int tile=0; tile<TILES; tile++)		value += weight[atiles[tile].pos_bin][atiles[tile].vel_bin][tile][a]; 			return(value); 	}// compute qvalue at any desired statedouble mcar_qlearn_cmac::qvalue(ACTION a, double pos, double vel){	double value =0; 		// compute active tiles   for (int tile =0; tile<TILES; tile++)    {      int pos_tile = (pos - POS_RANGE[0] + offset[0][tile])/pos_interval; // add offset!!      int vel_tile  = (vel - VEL_RANGE[0] + offset[1][tile])/vel_interval;             value += weight[pos_tile][vel_tile][tile][a];    }        return(value);     }double mcar_qlearn_cmac::best_qvalue(double pos, double vel){	double bvalue = qvalue(coast,pos,vel); 		for (int a=0; a<ACTIONS; a++)      if (qvalue(int_to_act(a), pos, vel) > bvalue)		bvalue = qvalue(int_to_act(a),pos,vel); 		return(bvalue);}// given a position and velocity bin, pick the highest Q-value action with high probability ACTION mcar_qlearn_cmac::choose_action(){  double rvalue;   int bact = choose_random_int_value(2);     rvalue = choose_random_value(); 	  if (rvalue < exploration_rate)  // do a random action     return(choose_random_act());   else     for (int a=0; a<ACTIONS; a++)      if (qvalue(int_to_act(a)) > qvalue(int_to_act(bact)))		bact = a;   return(int_to_act(bact)); }// update eligibilities void mcar_qlearn_cmac::update_eligibilities(ACTION a){	for (int pbin=0; pbin <= POS_BINS; pbin++)		for (int vbin=0; vbin <= VEL_BINS; vbin++)			for (int tile=0; tile<TILES; tile++)				for (int act=0; act<ACTIONS; act++)					eligibility[pbin][vbin][tile][act] *= LAMBDA; // decay eligibilites												for (int tile=0; tile<TILES; tile++)		for (int act = 0; act<ACTIONS; act++)			if (act == a)				eligibility[atiles[tile].pos_bin][atiles[tile].vel_bin][tile][act] = 1;			else 				eligibility[atiles[tile].pos_bin][atiles[tile].vel_bin][tile][act] = 0; 		}// update weights void mcar_qlearn_cmac::update_weights(double reward, double old_qval, double new_qval){// TD(lambda) sarsa update rule for (int pbin=0; pbin <= POS_BINS; pbin++)		for (int vbin=0; vbin <= VEL_BINS; vbin++)			for (int tile=0; tile<TILES; tile++)				for (int act=0; act<ACTIONS; act++)					weight[pbin][vbin][tile][act] +=  		 			 (BETA/TILES)*(reward + new_qval - old_qval) * eligibility[pbin][vbin][tile][act]; }// collect statistics on each trial over runs void output_trial_std_dev_data(){  ofstream out("mcar-sarsa-soln-data");    double sum,sum_sq,std_dev,mean;    for (int j=0; j<MAX_TRIALS; j++)      {   	   sum=0.0;   	   sum_sq=0.0;	   for (int i=0; i<RUNS; i++)	   {	     sum += trial_data[i][j];	     sum_sq += trial_data[i][j]*trial_data[i][j];	    }	   mean = sum/RUNS;//	   std_dev = sqrt((sum_sq - (sum*sum/RUNS))/(RUNS-1));	   	   out <<  j <<  " " << mean << endl; //	     <<  " " << mean - std_dev << " " << mean + std_dev << endl;      }  out.close(); } void mcar_qlearn_cmac::generate_qvalue_plot(int run, int trial){  char name[20];	  sprintf(name,"qvalue-%d-%d",run, trial);	  ofstream output(name); 	  for (int pos=0; pos<= GRID_RES; pos++)		{		  double pvalue = POS_RANGE[0] + POS_STEP*pos; //  + POS_STEP/2; // midpoint of bin					  output << endl; 				  for (int vel=0; vel <= GRID_RES; vel++)		    {		      double vvalue = VEL_RANGE[0] + VEL_STEP*vel;  // + VEL_STEP/2; // midpoint of bin							      double value = best_qvalue(pvalue,vvalue); 							      if (value < 0) value = -value; 							      output << value << "\t"; 							    }		}  output.close(); 	}	void run_trials(){  int count = 0;   char data[30]; // data string to print out    mcar_qlearn_cmac mcq(-0.5,0.0); // initialize state to bottom rest and cmac tiles  for (int run = 0; run<RUNS; run++)    {      int best_changed = 1;       int best_so_far=10000;  // shortest distance to goal so far       mcq.initialize_weights_eligibilities();       // initialize weights and eligibilities to 0.       if (run > 0)  // on subsequent runs, keep the same CMAC tiling 	{	  mcq.set_curr_pos(-0.5);  // restart at bottom resting 	  mcq.set_curr_vel(0.0); 	}	      for (int trial=0; trial< MAX_TRIALS; trial++)	{	  if ((trial+1)%VALUE_PLOT_STEP_SIZE==0)	    mcq.generate_qvalue_plot(run, trial+1);        	  best_changed = 1; 	  double old_qvalue, new_qvalue;  // for TD(lambda) sarsa update rule      	  int done  = 0, i = 0; 	  double r; 	  ACTION a, na; 	  mcq.active_tiles();  // recompute set of active CMAC tiles 			  a = mcq.choose_action(); // pick highest Q-value action with high probability 	  	  old_qvalue = mcq.qvalue(a); // compute q value as weighted sum over active tiles 		  while (!done) // not yet reached goal		{				  i++; 		  sprintf(data, "Run: %d Trial: %d Shortest solution: %d", run, trial, best_so_far); 		  if (best_changed)		    {		      display_data(data,1); 		      best_changed=0;		    }		  else display_data(data,0); 		  //		  usleep(1000);  // microseconds 		  display_mcar(mcq.curr_pos(),mcq.curr_vel()); 		 		  mcq.update_eligibilities(a); // decay and update eligibilities 		 		  mcq.update_position_velocity(a);  // move the car 	 	 		  if (!mcq.reached_goal())		    {	 	 		      mcq.active_tiles();  // recompute set of active tiles 	 		 		      na = mcq.choose_action();  // choose highest Q-value action in new state		 	 		      new_qvalue = mcq.qvalue(na);		 	 		      r = mcq.reward();  // -1 always 		      mcq.update_weights(r,old_qvalue, new_qvalue);  // TD(0) sarsa update 	  // RECOMPUTE QVALUE AFTER WEIGHTS CHANGED! (rich's correction)		      old_qvalue = mcq.qvalue(na); 		      a = na; 		    }		  else 		    {		      // TD(lambda) sarsa update at goal		      mcq.update_weights(-1,old_qvalue, 0);  		      if (i<best_so_far) 			{			  best_so_far = i; 			  best_changed = 1; 			}		      trial_data[run][trial] = i; // record number of steps 		      done=1;		    }		}			  mcq.set_curr_pos(-0.5);  // restart at bottom resting 	  mcq.set_curr_vel(0.0); 		}    }	}// do a bunch of learning runsint main(void)  {  int rinit = -time(NULL)%100;   int rseed = -1000 + rinit*rinit*rinit;   WINDOW = start_display();   WINDOW2 = start_display2();   initialize_random_number_generator(rseed); // set up large negative number as random seed; 	  cout << "Learning run started at " << __TIME__ << " with seed " << rseed << endl;   run_trials();   output_trial_std_dev_data(); }											

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
亚洲另类在线视频| 成人国产精品免费观看视频| 国产麻豆精品视频| 色综合天天天天做夜夜夜夜做| 欧美一级高清片| 亚洲免费观看视频| 粉嫩在线一区二区三区视频| 欧美一二三区在线| 亚洲福中文字幕伊人影院| 99久久er热在这里只有精品15| 精品乱码亚洲一区二区不卡| 亚洲国产成人91porn| 99re成人精品视频| 国产精品伦理在线| 国产高清不卡一区二区| 精品久久久久久久一区二区蜜臀| 亚洲一二三级电影| 91福利在线看| 亚洲美女淫视频| 成人夜色视频网站在线观看| 精品福利视频一区二区三区| 日本欧美韩国一区三区| 欧美日韩国产小视频在线观看| 亚洲乱码国产乱码精品精的特点| 91在线视频播放地址| 中文字幕国产一区二区| 国产成人午夜高潮毛片| 欧美国产日韩在线观看| 粗大黑人巨茎大战欧美成人| 亚洲国产精品成人综合色在线婷婷 | 国产成人综合在线播放| 久久久久久久综合| 韩日欧美一区二区三区| 久久久久久久久久久久电影| 国产精品原创巨作av| 国产清纯在线一区二区www| 国产成人免费视频网站 | 国产精品久久久久毛片软件| 大胆亚洲人体视频| 亚洲天堂精品视频| 欧美亚洲综合另类| 免费国产亚洲视频| 久久综合丝袜日本网| 粉嫩蜜臀av国产精品网站| 国产精品美女久久久久aⅴ| 91在线免费播放| 午夜精品久久久久久久久| 日韩一区二区电影网| av成人免费在线| 亚洲视频免费看| 欧美午夜在线一二页| 伦理电影国产精品| 国产欧美日本一区二区三区| bt7086福利一区国产| 午夜免费欧美电影| 国产亚洲一二三区| 在线一区二区三区做爰视频网站| 视频在线在亚洲| 国产亚洲欧美日韩俺去了| 色综合久久中文字幕综合网| 日韩高清在线一区| 国产视频一区二区在线| 在线观看免费成人| 国内偷窥港台综合视频在线播放| 亚洲视频一二三区| 日韩精品专区在线| 91一区二区三区在线观看| 污片在线观看一区二区| 国产三级精品三级| 欧美精品123区| 波波电影院一区二区三区| 香港成人在线视频| 日本一区二区视频在线| 欧美高清视频一二三区| 成人sese在线| 美洲天堂一区二卡三卡四卡视频| 国产精品久久久久桃色tv| 欧美一级久久久久久久大片| 91天堂素人约啪| 久久99精品国产.久久久久| 一区二区三区波多野结衣在线观看| 精品国产一区二区亚洲人成毛片| 色婷婷av一区二区三区软件 | 日韩在线观看一区二区| 国产精品欧美一区二区三区| 日韩精品在线一区二区| 精品视频资源站| 日本丶国产丶欧美色综合| 国产成人亚洲综合a∨婷婷| 免费高清在线一区| 天堂在线亚洲视频| 亚洲精品免费播放| 国产精品全国免费观看高清| 精品久久久久久最新网址| 欧美一区二区三区成人| 欧美偷拍一区二区| 一本大道久久a久久综合婷婷 | 亚洲欧美另类综合偷拍| 国产欧美精品区一区二区三区| 精品捆绑美女sm三区| 91精品麻豆日日躁夜夜躁| 欧美综合久久久| 色妹子一区二区| 色综合视频在线观看| 99麻豆久久久国产精品免费| 成人涩涩免费视频| 国产91高潮流白浆在线麻豆| 国产美女在线精品| 国产美女娇喘av呻吟久久| 精品综合久久久久久8888| 老司机免费视频一区二区三区| 日av在线不卡| 美腿丝袜亚洲色图| 韩日av一区二区| 国产精品一二一区| 欧美在线小视频| 91麻豆福利精品推荐| 日本国产一区二区| 欧美调教femdomvk| 欧美老人xxxx18| 欧美一区二区久久久| 欧美xxxx在线观看| 久久精品视频免费| 亚洲少妇最新在线视频| 亚洲自拍偷拍欧美| 天堂久久久久va久久久久| 免费xxxx性欧美18vr| 国内成人精品2018免费看| 国产成人a级片| 色婷婷综合久久久中文字幕| 欧美蜜桃一区二区三区| 91精品国产入口| 亚洲精品一区二区三区香蕉| 久久久亚洲午夜电影| 成人欧美一区二区三区小说| 亚洲大片在线观看| 老司机免费视频一区二区| 成人a区在线观看| 欧美三电影在线| 久久麻豆一区二区| 亚洲精品久久久蜜桃| 亚洲v中文字幕| 国产在线精品不卡| 日本精品裸体写真集在线观看 | 久久国产免费看| www.亚洲色图.com| 91超碰这里只有精品国产| 亚洲精品一区二区三区四区高清| 亚洲视频你懂的| 久久97超碰国产精品超碰| 91蜜桃视频在线| 日韩三级高清在线| 亚洲婷婷综合色高清在线| 麻豆免费看一区二区三区| av亚洲精华国产精华| 91精品国产91综合久久蜜臀| 国产精品久久久久久久久图文区| 亚洲成人动漫在线观看| 成人黄色av网站在线| 日韩天堂在线观看| 亚洲美女视频一区| 国产精品一级在线| 欧美一区二区视频观看视频 | 欧美日韩中文另类| 国产亚洲成年网址在线观看| 视频一区国产视频| 91色porny在线视频| 久久精品人人爽人人爽| 日本不卡视频在线观看| 99v久久综合狠狠综合久久| 日韩精品一区二区三区蜜臀 | 亚洲一区二区偷拍精品| 国产91在线观看丝袜| 日韩免费看网站| 性欧美大战久久久久久久久| 日日夜夜精品视频免费| av电影一区二区| 精品国产免费一区二区三区四区| 亚洲成人av资源| 成人免费电影视频| 欧美va亚洲va国产综合| 亚洲成年人网站在线观看| 色爱区综合激月婷婷| 欧美国产1区2区| 国产精品2024| 久久久精品黄色| 久久机这里只有精品| 欧美一区二区三区公司| 日韩1区2区日韩1区2区| 欧美日韩一区二区不卡| 亚洲18女电影在线观看| 欧美午夜精品理论片a级按摩| 亚洲码国产岛国毛片在线| av午夜精品一区二区三区| 国产精品美女久久久久aⅴ国产馆 国产精品美女久久久久av爽李琼 国产精品美女久久久久高潮 | 久久九九久精品国产免费直播| 久久av资源站| 精品国产百合女同互慰| 国产美女娇喘av呻吟久久| 国产视频亚洲色图|