亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? lrn_directq.c

?? 這個代碼是policy iteration算法關于強化學習的. 請您用winzip 解壓縮
?? C
字號:


#ifdef PGRL_NO_FA

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>


#include "lrn_DirectQ.h"
#include "gaussian.h"
#include "misc.h"

int Episodes_Per_Parameter_Update;
int Max_Num_Grad_Calc;
int Update_Policy_Parameters = 0;

extern int **modes_visited;
extern int Mode_Execute;
extern int Step_To_Execute_Mode;

double **dpdc_t, **dpdv_t, *wrk;
double **drhodc, **drhodv;

double **Q,****dpdc, ****dpdv;

double V_for_Policy = 0.0;

extern int dim;
extern int num_of_gaussians;

#ifdef GRAPHICS
extern int Update_Boundaries;
#endif

int Num_of_Grad_Calculations = 0;

void Initialize_PGRL_DirectQ(void)
{
	char error_text[256];
	FILE *fp;
	int i_tmp,i,j;

	if ((fp = fopen("PGRL_DirectQ.ini", "r")) == NULL)
    {
		sprintf(error_text, "Couldn't open \"%s\"\n", "PGRL_DirectQ.ini");
		My_Error(error_text);
    }
	
	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read Episodes_Per_Parameter_Update\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	Episodes_Per_Parameter_Update = i_tmp;


	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read Max_Num_Grad_Calc\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	Max_Num_Grad_Calc = i_tmp;
	

	fclose(fp);
	

	// the reward file
#ifdef BIAS
	if ((fp = fopen("rb.txt", "w")) == NULL)
	{
		sprintf(error_text, "Couldn't open \"%s\"\n", "rb.txt");
		My_Error(error_text);
	}
	fclose(fp);
	if ((fp = fopen("gb.txt", "w")) == NULL)
	{
		sprintf(error_text, "Couldn't open \"%s\"\n", "g.txt");
		My_Error(error_text);
	}
	fclose(fp);
#else
	if ((fp = fopen("r.txt", "w")) == NULL)
	{
		sprintf(error_text, "Couldn't open \"%s\"\n", "r.txt");
		My_Error(error_text);
	}
	fclose(fp);
	if ((fp = fopen("g.txt", "w")) == NULL)
	{
		sprintf(error_text, "Couldn't open \"%s\"\n", "g.txt");
		My_Error(error_text);
	}
	fclose(fp);
#endif
	//
	
	dpdc_t = (double **)My_Malloc((long)dim  * sizeof(double*));
	dpdv_t = (double **)My_Malloc((long)dim  * sizeof(double*));
	for ( i = 0; i < dim; i++ )
	{
		dpdc_t[i] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
		dpdv_t[i] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
	}

	wrk = (double *)My_Malloc((long)dim  * sizeof(double));
	
	drhodc = (double **)My_Malloc((long)num_of_gaussians  * sizeof(double*));
	drhodv = (double **)My_Malloc((long)num_of_gaussians  * sizeof(double*));
	for ( i = 0; i < num_of_gaussians; i++ )
	{
		drhodc[i] = (double *)My_Malloc((long)dim  * sizeof(double));
		drhodv[i] = (double *)My_Malloc((long)dim  * sizeof(double));
		for ( j = 0; j < dim; j++ )
		{
			drhodc[i][j] = 0.0;
			drhodv[i][j] = 0.0;
		}
	}
	
}


void PGRL_DirectQ(int steps,
				  double **s, double **g, 
				  double **cen, double **var,
				  int *mode, double alpha, double gam,
				  double *r)
{
	double g_tot;
	int i,j,k,q;

	if ( Update_Policy_Parameters == 0 )
	{
		Q = (double **)My_Malloc((long)steps  * sizeof(double*));
		dpdc = (double ****)My_Malloc((long)steps  * sizeof(double***));
		dpdv = (double ****)My_Malloc((long)steps  * sizeof(double***));
		for ( k = 0; k < steps; k++ )
		{
			Q[k] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));

			dpdc[k] = (double ***)My_Malloc((long)num_of_gaussians  * sizeof(double**));
			dpdv[k] = (double ***)My_Malloc((long)num_of_gaussians  * sizeof(double**));
			for ( i = 0; i < num_of_gaussians; i++ )
			{
				Q[k][i] = 0.0;
				dpdc[k][i] = (double **)My_Malloc((long)dim  * sizeof(double*));
				dpdv[k][i] = (double **)My_Malloc((long)dim  * sizeof(double*));
				for ( j = 0; j < dim; j++ )
				{
					dpdc[k][i][j] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
					dpdv[k][i][j] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
					for ( q = 0; q < num_of_gaussians; q++ )
					{
						dpdc[k][i][j][q] = 0.0;
						dpdv[k][i][j][q] = 0.0;
					}
				}
			}
		}
		
		
		for ( i = 1; i < steps; i++ )
		{
			Q[i][mode[i]] = 0.0;
			for ( j = i; j < steps; j++ )
			{
				Q[i][mode[i]] = Q[i][mode[i]] + r[j] * pow(gam,(double)(j-i));
			}
		}

		V_for_Policy = Q[1][mode[1]];
		
		{ // 
			char error_text[256];
			FILE *fp;
			
#ifdef BIAS
			if ((fp = fopen("rb.txt", "a")) == NULL)
			{
				sprintf(error_text, "Couldn't open \"%s\"\n", "rb.txt");
				My_Error(error_text);
			}
#else
			if ((fp = fopen("r.txt", "a")) == NULL)
			{
				sprintf(error_text, "Couldn't open \"%s\"\n", "r.txt");
				My_Error(error_text);
			}
#endif
			fprintf(fp,"%g\n",V_for_Policy);
			fclose(fp);
			printf("%d: %g\n",Num_of_Grad_Calculations,V_for_Policy);
		}
		
		for ( i = 1; i < steps; i++ )
		{
			g_tot = 0.0;
			for ( j = 0; j < num_of_gaussians; j++ )
			{
				g_tot = g_tot + g[i][j];
			}


			for ( k = 0; k < num_of_gaussians; k++ )
			{
				
				avaluate_total_gradient(dim, s[i], cen[k], var[k], 
					dpdc_t, dpdv_t, wrk, k, g[i], num_of_gaussians, g_tot);
				
				for ( j = 0; j < dim; j++ )
				{
					for ( q = 0; q < num_of_gaussians; q++ )
					{
						dpdc[i][k][j][q] = dpdc_t[j][q];
						dpdv[i][k][j][q] = dpdv_t[j][q];
					}
				}
			}
		}
				
		Episodes_Per_Parameter_Update = steps - 1;

		modes_visited = (int **)My_Malloc((long)Episodes_Per_Parameter_Update  * sizeof(int*));
		for ( i = 1; i < Episodes_Per_Parameter_Update; i++ )
		{
			modes_visited[i] = (int *)My_Malloc((long)num_of_gaussians  * sizeof(int));
			for ( j = 0; j < num_of_gaussians; j++ )
			{
				modes_visited[i][j] = 0;
			}
			modes_visited[i][mode[i]] = 1;
		}
		
		Update_Policy_Parameters++;
		Step_To_Execute_Mode = 1;
		for ( i = 0; i < num_of_gaussians; i++ )
		{
			if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
			{
				Mode_Execute = i;
				//modes_visited[Step_To_Execute_Mode][i] = 1;
				break;
			}
		}
	}
	else if ( steps <= Step_To_Execute_Mode )
	{ // update the gradient
		Update_Policy_Parameters = Episodes_Per_Parameter_Update;
	}
	else
	{
		double Q_tmp;
		int inr_step;

		Q_tmp = 0.0;
		for ( j = Step_To_Execute_Mode; j < steps; j++ )
		{
			Q_tmp = Q_tmp + r[j] * pow(gam,(double)(j-Step_To_Execute_Mode));
		}
		
		i = Step_To_Execute_Mode;
		Q[i][mode[i]] = Q_tmp;

		if ( Q[i][mode[i]] == 0.0 )
		{
			printf("Q_tmp[%d][%d] = %g\n",i,mode[i],Q[i][mode[i]]);
		}

		g_tot = 0.0;
		for ( j = 0; j < num_of_gaussians; j++ )
		{
			g_tot = g_tot + g[i][j];
		}
		

		avaluate_total_gradient(dim, s[i], cen[mode[i]], var[mode[i]], 
			dpdc_t, dpdv_t, wrk, mode[i], g[i], num_of_gaussians, g_tot);
		
		for ( j = 0; j < dim; j++ )
		{
			for ( q = 0; q < num_of_gaussians; q++ )
			{
				dpdc[i][mode[i]][j][q] = dpdc_t[j][q];
				dpdv[i][mode[i]][j][q] = dpdv_t[j][q];
			}
		}
		
		modes_visited[Step_To_Execute_Mode][mode[i]] = 1;
		
		inr_step = 1;
		for ( i = 0; i < num_of_gaussians; i++ )
		{
			if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
			{
				inr_step = 0;
				Mode_Execute = i;
				//modes_visited[Step_To_Execute_Mode][i] = 1;
				break;
			}
		}
		if ( inr_step == 1)
		{
			Step_To_Execute_Mode++;
			Update_Policy_Parameters++;
			if ( Update_Policy_Parameters < Episodes_Per_Parameter_Update )
			{
				for ( i = 0; i < num_of_gaussians; i++ )
				{
					if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
					{
						inr_step = 0;
						Mode_Execute = i;
						//modes_visited[Step_To_Execute_Mode][i] = 1;
						break;
					}
				}
			}
		}
	}
	
	
	if ( Episodes_Per_Parameter_Update <= Update_Policy_Parameters )
	{
		double tdc=0.0, tdv=0.0;
		int cont_grad,n,cnt;

		for ( j = 0; j < num_of_gaussians; j++ )
		{
			for ( i = 0; i < dim; i++ )
			{
				drhodc[j][i] = 0.0;
				drhodv[j][i] = 0.0;
			}
		}


#ifdef BIAS
		{
			double t1;
			for ( k = 1; k < Episodes_Per_Parameter_Update; k++ )
			{
				t1 = 0.0;
				for ( n = 0; n < num_of_gaussians; n++ )
				{
					t1 = t1 + Q[k][n];
				}
				t1 = t1 / (double)num_of_gaussians;
				for ( n = 0; n < num_of_gaussians; n++ )
				{
					Q[k][n] = Q[k][n] - t1;
				}
			}
		}
#endif
		

		cnt = 0;
		cont_grad = 1;
		for ( k = 1; k < Episodes_Per_Parameter_Update && (cont_grad == 1); k++ )
		{
			for ( j = 0; j < num_of_gaussians && (cont_grad == 1); j++ )
			{
				cont_grad = modes_visited[k][j];
			}
			cnt++;
			for ( i = 0; i < dim && (cont_grad == 1); i++ )
			{
				for ( j = 0; j < num_of_gaussians && (cont_grad == 1); j++ )
				{
					for ( n = 0; n < num_of_gaussians; n++ )
					{
						drhodc[j][i] = drhodc[j][i] + Q[k][n] * dpdc[k][j][i][n];
						drhodv[j][i] = drhodv[j][i] + Q[k][n] * dpdv[k][j][i][n];
					}
				}
			}
		}
		
		for ( j = 0; j < num_of_gaussians; j++ )
		{
			for ( i = 0; i < dim; i++ )
			{
				drhodc[j][i] = drhodc[j][i] / cnt;
				drhodv[j][i] = drhodv[j][i] / cnt;
			}
		}
		
		
#ifdef UPDATE_POLICY_PARAMETERS
		{
			double tmax;
		{ // 
			char error_text[256];
			FILE *fp;
			
#ifdef BIAS
			if ((fp = fopen("gb.txt", "a")) == NULL)
			{
				sprintf(error_text, "Couldn't open \"%s\"\n", "gb.txt");
				My_Error(error_text);
			}
#else
			if ((fp = fopen("g.txt", "a")) == NULL)
			{
				sprintf(error_text, "Couldn't open \"%s\"\n", "g.txt");
				My_Error(error_text);
			}
#endif
			for ( j = 0; j < num_of_gaussians; j++ )
			{
				for ( i = 0; i < dim; i++ )
				{
					fprintf(fp,"%g\n",drhodc[j][i]);
					fprintf(fp,"%g\n",drhodv[j][i]);
				}
			}
			fclose(fp);
		}
			tdc = 0.0;
			tdv = 0.0;
			for ( j = 0; j < num_of_gaussians; j++ )
			{
				for ( i = 0; i < dim; i++ )
				{
					if ( _isnan(drhodc[j][i]))
					{
						My_Error("Not a Number!");
					}
					if ( _isnan(drhodv[j][i]))
					{
						My_Error("Not a Number!");
					}
					if ( tdc < fabs(drhodc[j][i]) )
					{
						tdc = fabs(drhodc[j][i]);
					}
					if ( tdv < fabs(drhodv[j][i]) )
					{
						tdv = fabs(drhodv[j][i]);
					}
				}
			}
			
			if ( tdc > tdv )
			{
				tmax = tdc;
			}
			else
			{
				tmax = tdv;
			}

			if ( _isnan(tmax))
			{
				My_Error("Not a Number!");
			}
			
			
			for ( j = 0; j < num_of_gaussians; j++ )
			{
				for ( i = 0; i < dim; i++ )
				{
					if ( _isnan(drhodc[j][i]))
					{
						My_Error("Not a Number!");
					}
					if ( _isnan(drhodv[j][i]))
					{
						My_Error("Not a Number!");
					}
					drhodc[j][i] = drhodc[j][i] / tmax;
					if ( _isnan(drhodc[j][i]))
					{
						My_Error("Not a Number!");
					}
					drhodv[j][i] = drhodv[j][i] / tmax;
					if ( _isnan(drhodv[j][i]))
					{
						My_Error("Not a Number!");
					}
				}
			}
		}
		
#ifdef GRAPHICS
		Update_Boundaries = 1;
#endif
		for ( j = 0; j < num_of_gaussians; j++ )
		{
			for ( i = 0; i < dim; i++ )
			{
				cen[j][i] = cen[j][i] + alpha * drhodc[j][i];
				var[j][i] = var[j][i] + alpha * drhodv[j][i];
				if ( var[j][i] < 0.01)
					var[j][i] = 0.01;
			}
		}
#else
		{ // 
			char error_text[256];
			FILE *fp;
			
#ifdef BIAS
			if ((fp = fopen("gb.txt", "a")) == NULL)
			{
				sprintf(error_text, "Couldn't open \"%s\"\n", "gb.txt");
				My_Error(error_text);
			}
#else
			if ((fp = fopen("g.txt", "a")) == NULL)
			{
				sprintf(error_text, "Couldn't open \"%s\"\n", "g.txt");
				My_Error(error_text);
			}
#endif
			for ( j = 0; j < num_of_gaussians; j++ )
			{
				for ( i = 0; i < dim; i++ )
				{
					fprintf(fp,"%g %g ",drhodc[j][i],drhodv[j][i]);
				}
			}
			fprintf(fp,"\n");
			fclose(fp);
		}
#endif
		

		for ( k = 0; k < Episodes_Per_Parameter_Update+1; k++ )
		{
			free(Q[k]);

			for ( i = 0; i < num_of_gaussians; i++ )
			{
				for ( j = 0; j < dim; j++ )
				{
					free(dpdc[k][i][j]);
					free(dpdv[k][i][j]);
				}
				free(dpdc[k][i]);
				free(dpdv[k][i]);
			}
			free(dpdc[k]);
			free(dpdv[k]);
		}
		free(Q);
		free(dpdc);
		free(dpdv);

		for ( i = 1; i < Episodes_Per_Parameter_Update; i++ )
		{
			free(modes_visited[i]);
		}
		free(modes_visited);

		Update_Policy_Parameters = 0;
		Step_To_Execute_Mode = -1;
		Mode_Execute = -1;

#ifdef UPDATE_POLICY_PARAMETERS
		Reset_Random_Seed_For_Paths();
#endif
		Num_of_Grad_Calculations++;
		if ( Num_of_Grad_Calculations > Max_Num_Grad_Calc )
		{
			exit(1);
		}
	}

}

#endif

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
日本一道高清亚洲日美韩| 最新不卡av在线| 麻豆一区二区三| 91精品国产欧美一区二区18| 日本美女一区二区三区视频| 欧美不卡123| 国产精品一品二品| 亚洲柠檬福利资源导航| 精品1区2区3区| 三级在线观看一区二区| 26uuu亚洲婷婷狠狠天堂| www.色精品| 亚洲一级在线观看| www精品美女久久久tv| 97久久人人超碰| 日韩福利视频网| 国产午夜精品美女毛片视频| 91一区一区三区| 日本在线播放一区二区三区| 久久久高清一区二区三区| 一本大道久久a久久综合婷婷| 午夜精品在线视频一区| 日本一区二区三级电影在线观看| 91免费观看视频在线| 青青草国产成人99久久| 综合激情网...| 日韩一级片在线观看| youjizz久久| 久国产精品韩国三级视频| 亚洲同性gay激情无套| 欧美一级欧美三级在线观看| 成人午夜激情影院| 久久精品国内一区二区三区| 亚洲男人的天堂一区二区 | 精品一区二区三区在线播放| 国产精品色哟哟网站| 91麻豆精品久久久久蜜臀 | 91麻豆精品视频| 免费av成人在线| 亚洲激情一二三区| 精品久久久久一区| 欧美性生活大片视频| 成人小视频免费在线观看| 美女视频一区二区| 一区二区三区高清不卡| 国产日韩欧美精品综合| 日韩欧美亚洲国产另类| 欧美日韩小视频| 一本久久综合亚洲鲁鲁五月天 | 欧美国产一区在线| 日韩欧美一级片| 欧美午夜精品一区二区蜜桃| 成人av电影免费观看| 韩国理伦片一区二区三区在线播放| 一级日本不卡的影视| 国产精品毛片高清在线完整版| 日韩美女一区二区三区四区| 在线91免费看| 欧美人成免费网站| 欧美日韩在线三级| 欧美亚男人的天堂| 欧美在线观看18| 91免费国产视频网站| 99久久精品99国产精品| 丰满亚洲少妇av| 国产成人午夜电影网| 国产在线一区观看| 精品亚洲aⅴ乱码一区二区三区| 天天影视涩香欲综合网| 亚洲自拍都市欧美小说| 亚洲最新在线观看| 亚洲制服丝袜一区| 亚洲成人777| 日韩福利电影在线| 奇米影视7777精品一区二区| 日韩精品视频网| 久久精品噜噜噜成人88aⅴ| 美女久久久精品| 久久精品国产网站| 捆绑调教一区二区三区| 经典三级在线一区| 国产乱码精品一区二区三 | 国产女主播一区| 中文字幕第一区综合| 国产精品久久久久一区二区三区共| 国产精品三级久久久久三级| **性色生活片久久毛片| 一区二区在线观看不卡| 亚洲福利视频一区| 日产欧产美韩系列久久99| 久久99精品国产麻豆不卡| 国产一区二区在线影院| 丁香激情综合国产| 在线区一区二视频| 欧美精品在欧美一区二区少妇| 欧美一区二区三区视频在线观看| 日韩网站在线看片你懂的| 精品粉嫩超白一线天av| 国产欧美综合在线| 一区二区三区在线观看欧美| 日本在线播放一区二区三区| 国产精品综合网| 一本高清dvd不卡在线观看| 91精品午夜视频| 精品日韩欧美一区二区| 中文字幕日韩精品一区| 午夜精品一区二区三区免费视频 | 日本中文字幕一区二区有限公司| 久久成人免费网| gogo大胆日本视频一区| 在线综合+亚洲+欧美中文字幕| 亚洲精品一区二区三区精华液| 亚洲欧洲成人精品av97| 日韩高清一区二区| 不卡的av电影| 日韩欧美www| 亚洲欧美日韩国产综合| 久久精品999| 色成年激情久久综合| 26uuu国产电影一区二区| 亚洲免费色视频| 紧缚捆绑精品一区二区| 色偷偷一区二区三区| 精品国产制服丝袜高跟| 亚洲国产欧美日韩另类综合| 国产精品66部| 欧美军同video69gay| 国产精品国产馆在线真实露脸| 奇米在线7777在线精品| 日本黄色一区二区| 久久先锋影音av| 免费视频一区二区| 欧洲另类一二三四区| 国产精品美女久久久久av爽李琼| 七七婷婷婷婷精品国产| 欧美性极品少妇| 国产精品久久久久久福利一牛影视| 丝袜脚交一区二区| 一本大道av伊人久久综合| 久久久久久免费| 美女视频黄免费的久久| 欧美日韩aaaaa| 一区二区三区四区在线播放| 国产ts人妖一区二区| 日韩免费高清视频| 免费高清在线一区| 3atv一区二区三区| 亚洲综合在线电影| 91在线视频观看| 欧美国产精品久久| 国产一区二区成人久久免费影院| 欧美一区二区三区系列电影| 亚洲国产精品嫩草影院| 91蜜桃婷婷狠狠久久综合9色| 国产拍揄自揄精品视频麻豆| 国产一区欧美日韩| 精品国产一区二区三区忘忧草 | 精品美女一区二区| 三级不卡在线观看| 欧美怡红院视频| 亚洲综合999| 欧美日韩国产综合视频在线观看| 亚洲九九爱视频| 色八戒一区二区三区| 亚洲色图丝袜美腿| 91社区在线播放| 日韩美女视频19| 91福利视频网站| 亚洲综合色在线| 欧美日韩一区二区三区免费看| 亚洲国产综合视频在线观看| 色狠狠一区二区三区香蕉| 一区二区三区日韩精品视频| www.欧美日韩国产在线| 亚洲欧洲色图综合| 91浏览器入口在线观看| 亚洲激情男女视频| 欧美日韩激情一区| 蜜桃在线一区二区三区| 精品久久久久香蕉网| 国产精品一区三区| 国产精品美日韩| 91豆麻精品91久久久久久| 一区二区欧美国产| 91精品国产手机| 国产成人在线视频网站| 亚洲免费电影在线| 欧美老肥妇做.爰bbww视频| 久久精品999| 国产精品福利一区| 欧美午夜片在线观看| 久久国产综合精品| 国产精品高潮呻吟| 91麻豆精品久久久久蜜臀| 免播放器亚洲一区| 国产精品久久久久久久久晋中 | 丁香啪啪综合成人亚洲小说| 亚洲另类在线制服丝袜| 777xxx欧美| 国产99精品在线观看|