亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? lrn_pifa.c

?? 這個代碼是policy iteration算法關于強化學習的. 請您用winzip 解壓縮
?? C
?? 第 1 頁 / 共 2 頁
字號:

#ifdef PGRL_FA

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>


#include "lrn_PIFA.h"
#include "gaussian.h"
#include "misc.h"

#define MAX_SV_RATIO 1e22
#define MIN_PI  0.0001

int Episodes_Per_Parameter_Update;
int Max_Num_Grad_Calc;
int Update_Policy_Parameters = 0;

extern int **modes_visited;
extern int Mode_Execute;
extern int Step_To_Execute_Mode;

double **dpdc_t, **dpdv_t, *wrk;
double **drhodc, **drhodv;

double **Q,****dpdc, ****dpdv;

double V_for_Policy = 0.0;

extern int dim;
extern int num_of_gaussians;

#ifdef GRAPHICS
extern int Update_Boundaries;
#endif

int Num_of_Grad_Calculations = 0;

double ***drhodc_coeff, ***drhodv_coeff;
double **p_pi;
double **states_visited;
int total_states_visited;

void Initialize_PGRL_PIFA(void)
{
	char error_text[256];
	FILE *fp;
	int i_tmp,i,j,k;

	if ((fp = fopen("PGRL_DirectQ.ini", "r")) == NULL)
    {
		sprintf(error_text, "Couldn't open \"%s\"\n", "PGRL_DirectQ.ini");
		My_Error(error_text);
    }
	
	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read Episodes_Per_Parameter_Update\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	Episodes_Per_Parameter_Update = i_tmp;


	if (fscanf(fp, "%d", &(i_tmp) ) != 1)
	{
		sprintf(error_text,
			"Initialize_Learning_parameters: cannot read Max_Num_Grad_Calc\n");
		My_Error(error_text);
	}
	skiptoend(fp);
	Max_Num_Grad_Calc = i_tmp;
	

	fclose(fp);
	

	// the reward file
	if ((fp = fopen("rf.txt", "w")) == NULL)
	{
		sprintf(error_text, "Couldn't open \"%s\"\n", "rf.txt");
		My_Error(error_text);
	}
	fclose(fp);
	if ((fp = fopen("gf.txt", "w")) == NULL)
	{
		sprintf(error_text, "Couldn't open \"%s\"\n", "gf.txt");
		My_Error(error_text);
	}
	fclose(fp);
	//
	
	dpdc_t = (double **)My_Malloc((long)dim  * sizeof(double*));
	dpdv_t = (double **)My_Malloc((long)dim  * sizeof(double*));
	for ( i = 0; i < dim; i++ )
	{
		dpdc_t[i] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
		dpdv_t[i] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
	}

	wrk = (double *)My_Malloc((long)dim  * sizeof(double));
	
	drhodc = (double **)My_Malloc((long)num_of_gaussians  * sizeof(double*));
	drhodv = (double **)My_Malloc((long)num_of_gaussians  * sizeof(double*));
	drhodc_coeff = (double ***)My_Malloc((long)num_of_gaussians  * sizeof(double**));
	drhodv_coeff = (double ***)My_Malloc((long)num_of_gaussians  * sizeof(double**));
	for ( i = 0; i < num_of_gaussians; i++ )
	{
		drhodc[i] = (double *)My_Malloc((long)dim  * sizeof(double));
		drhodv[i] = (double *)My_Malloc((long)dim  * sizeof(double));
		drhodc_coeff[i] = (double **)My_Malloc((long)dim  * sizeof(double*));
		drhodv_coeff[i] = (double **)My_Malloc((long)dim  * sizeof(double*));
		for ( j = 0; j < dim; j++ )
		{
			drhodc[i][j] = 0.0;
			drhodv[i][j] = 0.0;
			drhodc_coeff[i][j] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
			drhodv_coeff[i][j] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
			for ( k = 0; k < num_of_gaussians; k++ )
			{
				drhodc_coeff[i][j][k] = 0.0;
				drhodv_coeff[i][j][k] = 0.0;
			}
		}
	}
}


void PGRL_PIFA(int steps,
			   double **s, double **g, 
			   double **cen, double **var,
			   int *mode, double alpha, double gam,
			   double *r)
{
	double g_tot;
	int i,j,k,q;

	if ( Update_Policy_Parameters == 0 )
	{
		total_states_visited = steps - 1;
		states_visited = (double **)My_Malloc((long)total_states_visited  * sizeof(double*));
		for ( k = 0; k < total_states_visited; k++ )
		{
			states_visited[k] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
		}

		Q = (double **)My_Malloc((long)steps  * sizeof(double*));
		p_pi = (double **)My_Malloc((long)steps  * sizeof(double*));
		dpdc = (double ****)My_Malloc((long)steps  * sizeof(double***));
		dpdv = (double ****)My_Malloc((long)steps  * sizeof(double***));
		for ( k = 0; k < steps; k++ )
		{
			Q[k] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
			p_pi[k] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));

			dpdc[k] = (double ***)My_Malloc((long)num_of_gaussians  * sizeof(double**));
			dpdv[k] = (double ***)My_Malloc((long)num_of_gaussians  * sizeof(double**));
			for ( i = 0; i < num_of_gaussians; i++ )
			{
				Q[k][i] = 0.0;
				p_pi[k][i] = 0.0;
				dpdc[k][i] = (double **)My_Malloc((long)dim  * sizeof(double*));
				dpdv[k][i] = (double **)My_Malloc((long)dim  * sizeof(double*));
				for ( j = 0; j < dim; j++ )
				{
					dpdc[k][i][j] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
					dpdv[k][i][j] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
					for ( q = 0; q < num_of_gaussians; q++ )
					{
						dpdc[k][i][j][q] = 0.0;
						dpdv[k][i][j][q] = 0.0;
					}
				}
			}
		}

		
		
		for ( i = 1; i < steps; i++ )
		{
			Q[i][mode[i]] = 0.0;
			for ( j = i; j < steps; j++ )
			{
				Q[i][mode[i]] = Q[i][mode[i]] + r[j] * pow(gam,(double)(j-i));
			}
		}

		V_for_Policy = Q[1][mode[1]];
		
		{ // 
			char error_text[256];
			FILE *fp;
			
			if ((fp = fopen("rf.txt", "a")) == NULL)
			{
				sprintf(error_text, "Couldn't open \"%s\"\n", "rf.txt");
				My_Error(error_text);
			}
			fprintf(fp,"%g\n",V_for_Policy);
			fclose(fp);
			printf("%d: %g\n",Num_of_Grad_Calculations,V_for_Policy);
		}
		
		for ( i = 1; i < steps; i++ )
		{
			g_tot = 0.0;
			for ( j = 0; j < num_of_gaussians; j++ )
			{
				g_tot = g_tot + g[i][j];
			}

			for ( j = 0; j < dim; j++ )
			{
				states_visited[i-1][j] = s[i][j];
			}

			p_pi[i][mode[i]] = g[i][mode[i]]/g_tot;
			if ( p_pi[i][mode[i]] < MIN_PI )
			{
				p_pi[i][mode[i]] = MIN_PI;
			}

			
			avaluate_total_gradient(dim, s[i], cen[mode[i]], var[mode[i]], 
				dpdc_t, dpdv_t, wrk, mode[i], g[i], num_of_gaussians, g_tot);
			
			for ( j = 0; j < dim; j++ )
			{
				for ( q = 0; q < num_of_gaussians; q++ )
				{
					dpdc[i][mode[i]][j][q] = dpdc_t[j][q];
					dpdv[i][mode[i]][j][q] = dpdv_t[j][q];
				}
			}
		}
				
		Episodes_Per_Parameter_Update = steps - 1;

		modes_visited = (int **)My_Malloc((long)Episodes_Per_Parameter_Update  * sizeof(int*));
		for ( i = 1; i < Episodes_Per_Parameter_Update; i++ )
		{
			modes_visited[i] = (int *)My_Malloc((long)num_of_gaussians  * sizeof(int));
			for ( j = 0; j < num_of_gaussians; j++ )
			{
				modes_visited[i][j] = 0;
			}
			modes_visited[i][mode[i]] = 1;
		}
		
		Update_Policy_Parameters++;
		Step_To_Execute_Mode = 1;
		for ( i = 0; i < num_of_gaussians; i++ )
		{
			if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
			{
				Mode_Execute = i;
				//modes_visited[Step_To_Execute_Mode][i] = 1;
				break;
			}
		}
	}
	else if ( steps <= Step_To_Execute_Mode )
	{ // update the gradient
		Update_Policy_Parameters = Episodes_Per_Parameter_Update;
	}
	else
	{
		double Q_tmp;
		int inr_step;

#ifdef TMP777777
		int prev_total_states_visited;
		double **tmp_states_visited;

		prev_total_states_visited = total_states_visited;
		tmp_states_visited = (double **)My_Malloc((long)total_states_visited  * sizeof(double*));
		for ( k = 0; k < total_states_visited; k++ )
		{
			tmp_states_visited[k] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
			for ( j = 0; j < dim; j++ )
			{
				tmp_states_visited[k][j] = states_visited[k][j];
			}
		}


		for ( k = 0; k < total_states_visited; k++ )
		{
			free(states_visited[k]);
		}
		free(states_visited);
		total_states_visited = total_states_visited + steps - 1;
		states_visited = (double **)My_Malloc((long)total_states_visited  * sizeof(double*));
		for ( k = 0; k < total_states_visited; k++ )
		{
			states_visited[k] = (double *)My_Malloc((long)num_of_gaussians  * sizeof(double));
			for ( j = 0; j < dim; j++ )
			{
				if ( k < prev_total_states_visited )
				{
					states_visited[k][j] = tmp_states_visited[k][j];
				}
				else
				{
					states_visited[k][j] = s[k+1-prev_total_states_visited][j];
				}
			}
		}

		for ( k = 0; k < prev_total_states_visited; k++ )
		{
			free(tmp_states_visited[k]);
		}
		free(tmp_states_visited);
#endif



		Q_tmp = 0.0;
		for ( j = Step_To_Execute_Mode; j < steps; j++ )
		{
			Q_tmp = Q_tmp + r[j] * pow(gam,(double)(j-Step_To_Execute_Mode));
		}
		
		i = Step_To_Execute_Mode;
		Q[i][mode[i]] = Q_tmp;

		if ( Q[i][mode[i]] == 0.0 )
		{
			printf("Q_tmp[%d][%d] = %g\n",i,mode[i],Q[i][mode[i]]);
		}

		g_tot = 0.0;
		for ( j = 0; j < num_of_gaussians; j++ )
		{
			g_tot = g_tot + g[i][j];
		}
		
		p_pi[i][mode[i]] = g[i][mode[i]]/g_tot;
		if ( p_pi[i][mode[i]] < MIN_PI )
		{
			p_pi[i][mode[i]] = MIN_PI;
		}

		avaluate_total_gradient(dim, s[i], cen[mode[i]], var[mode[i]], 
			dpdc_t, dpdv_t, wrk, mode[i], g[i], num_of_gaussians, g_tot);
		
		for ( j = 0; j < dim; j++ )
		{
			for ( q = 0; q < num_of_gaussians; q++ )
			{
				dpdc[i][mode[i]][j][q] = dpdc_t[j][q];
				dpdv[i][mode[i]][j][q] = dpdv_t[j][q];
			}
		}
		
		modes_visited[Step_To_Execute_Mode][mode[i]] = 1;
		
		inr_step = 1;
		for ( i = 0; i < num_of_gaussians; i++ )
		{
			if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
			{
				inr_step = 0;
				Mode_Execute = i;
				//modes_visited[Step_To_Execute_Mode][i] = 1;
				break;
			}
		}
		if ( inr_step == 1)
		{
			Step_To_Execute_Mode++;
			Update_Policy_Parameters++;
			if ( Update_Policy_Parameters < Episodes_Per_Parameter_Update )
			{
				for ( i = 0; i < num_of_gaussians; i++ )
				{
					if ( modes_visited[Step_To_Execute_Mode][i] == 0 )
					{
						inr_step = 0;
						Mode_Execute = i;
						//modes_visited[Step_To_Execute_Mode][i] = 1;
						break;
					}
				}
			}
		}
	}
	
	
	if ( Episodes_Per_Parameter_Update <= Update_Policy_Parameters )
	{
		double tdc=0.0, tdv=0.0, **Q_a;
		int cont_grad = 1,cnt;

		for ( j = 0; j < num_of_gaussians; j++ )
		{
			for ( i = 0; i < dim; i++ )
			{
				drhodc[j][i] = 0.0;
				drhodv[j][i] = 0.0;
			}
		}


#ifdef BIAS_FA
		{
			double t1;
			for ( k = 1; k < Episodes_Per_Parameter_Update && (cont_grad == 1); k++ )
			{
				t1 = 0.0;
				for ( n = 0; n < num_of_gaussians; n++ )
				{
					t1 = t1 + Q[k][n];
				}
				t1 = t1 / (double)num_of_gaussians;
				for ( n = 0; n < num_of_gaussians; n++ )
				{
					Q[k][n] = Q[k][n] - t1;
				}
			}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
国产欧美一区二区精品忘忧草 | 成人18精品视频| 不卡的av电影| 色猫猫国产区一区二在线视频| 欧美精品一二三| 中文字幕一区二区三区不卡在线 | 国产一区二三区好的| 国产91对白在线观看九色| 成人精品免费视频| 欧美日韩你懂得| 久久久久国产精品人| 五月天一区二区三区| 色一情一伦一子一伦一区| 欧美一区日本一区韩国一区| 亚洲国产精品精华液ab| 天天色综合成人网| jizzjizzjizz欧美| 欧美精品一区二区三区四区| 一区二区三区精品视频| 高清免费成人av| 日韩手机在线导航| 一区二区三区欧美日| 成人永久看片免费视频天堂| 国产一区二区网址| 色激情天天射综合网| 久久精品视频网| 亚洲精品一二三| 国产成人在线视频免费播放| 日本精品免费观看高清观看| 精品成人一区二区三区四区| 天堂在线一区二区| 在线免费观看日本一区| 亚洲欧洲av一区二区三区久久| 精品亚洲porn| 91精品国产综合久久精品| 一区二区三区欧美久久| 色综合天天在线| 亚洲视频你懂的| 91在线视频在线| 国产精品久久久久久久久免费樱桃 | 欧美视频日韩视频| 中文字幕一区视频| thepron国产精品| 国产精品久久看| 成人黄色在线网站| 国产精品萝li| 大桥未久av一区二区三区中文| 精品日韩在线观看| 精品制服美女久久| 久久久噜噜噜久久中文字幕色伊伊| 麻豆国产欧美一区二区三区| 欧美一区二区成人6969| 久久成人免费日本黄色| 久久久久国产精品人| 国产福利不卡视频| 中文字幕免费一区| 成人av在线播放网址| 一区在线观看免费| 在线观看视频一区二区 | 石原莉奈在线亚洲二区| 6080日韩午夜伦伦午夜伦| 亚洲大片免费看| 日韩一区二区三区在线| 美日韩黄色大片| 久久精品欧美日韩精品| 不卡的av电影| 亚洲制服丝袜在线| 91精品国产美女浴室洗澡无遮挡| 蜜臀久久99精品久久久久宅男| 欧美mv和日韩mv的网站| 成人国产亚洲欧美成人综合网| 亚洲女同女同女同女同女同69| 欧美理论在线播放| 国产一区视频导航| 亚洲精品成a人| 日韩一区二区三区在线观看| 国产99精品国产| 一二三区精品视频| 日韩精品一区二区三区视频播放| 国产精品一级片在线观看| 亚洲同性gay激情无套| 777久久久精品| 风间由美性色一区二区三区| 一区二区三区小说| 精品日本一线二线三线不卡| bt欧美亚洲午夜电影天堂| 国产精品福利在线播放| 91精品免费在线观看| 懂色av一区二区夜夜嗨| 香蕉久久一区二区不卡无毒影院 | 国产日韩欧美综合一区| 色94色欧美sute亚洲线路二| 九九九久久久精品| 一区二区三区中文在线| 久久综合久久鬼色| 欧美浪妇xxxx高跟鞋交| youjizz国产精品| 久久不见久久见中文字幕免费| 中文字幕一区二区三区色视频| 欧美一区二区在线免费观看| 国产**成人网毛片九色 | 精品国产三级a在线观看| 国产麻豆午夜三级精品| 亚洲福利视频一区二区| 欧美成人猛片aaaaaaa| 成人免费看片app下载| 午夜精品免费在线观看| 国产欧美一区视频| 日韩一级二级三级精品视频| av在线综合网| 久久成人免费网站| 国产精品另类一区| 精品电影一区二区三区| 欧美日韩在线不卡| 东方aⅴ免费观看久久av| 亚洲一区二区五区| 中日韩免费视频中文字幕| 色婷婷精品大在线视频| 日本成人超碰在线观看| 综合激情成人伊人| 国产婷婷色一区二区三区在线| 91精品国产综合久久小美女| 欧美在线一区二区| 欧美午夜视频网站| 欧美日韩性生活| 在线免费亚洲电影| 成人激情电影免费在线观看| 国产精品一区在线观看乱码| 亚洲影院在线观看| 亚洲午夜激情av| 亚洲大片一区二区三区| 亚洲国产精品嫩草影院| 亚洲一线二线三线视频| 亚洲自拍偷拍网站| 亚洲一二三四在线观看| 性久久久久久久久| 美女视频黄免费的久久| 蜜桃av一区二区在线观看| 亚洲二区在线视频| 亚洲第一综合色| 成人小视频在线| 国产精品99久久久久| 丁香六月久久综合狠狠色| 丁香网亚洲国际| 91玉足脚交白嫩脚丫在线播放| 91视频精品在这里| 91久久香蕉国产日韩欧美9色| 在线观看日韩av先锋影音电影院| 欧美主播一区二区三区| 4438亚洲最大| 久久久午夜精品理论片中文字幕| 国产欧美一区二区三区在线看蜜臀| 欧美激情一区在线观看| 亚洲靠逼com| 爽爽淫人综合网网站| 国产一区二区0| 91蝌蚪porny| 欧美伦理电影网| 久久亚洲一级片| 亚洲欧洲精品一区二区三区| 亚洲精品亚洲人成人网在线播放| 亚洲国产va精品久久久不卡综合| 欧美a一区二区| 不卡大黄网站免费看| 欧美伦理视频网站| 国产日韩欧美麻豆| 亚洲午夜一二三区视频| 国产综合色视频| 色婷婷久久久亚洲一区二区三区| 欧美嫩在线观看| 欧美国产综合色视频| 亚洲成人精品一区| 国产成人在线看| 欧美久久久一区| 中文字幕av一区二区三区| 亚洲18女电影在线观看| 国产成人综合在线观看| 欧美色区777第一页| 国产欧美精品一区二区色综合| 亚洲国产一区二区三区青草影视| 欧美日韩国产高清一区二区| 久久久久久久久久久久久久久99| 久久精品一区二区三区不卡 | 在线观看免费亚洲| 精品久久国产老人久久综合| 亚洲男女毛片无遮挡| 精品无码三级在线观看视频| 91美女片黄在线观看91美女| 欧美精品一区二区三区蜜臀| 亚洲大片一区二区三区| www.欧美色图| 久久久精品影视| 免费人成黄页网站在线一区二区| 91麻豆蜜桃一区二区三区| 久久久久国产精品人| 美女网站在线免费欧美精品| jlzzjlzz亚洲女人18| 国产欧美日韩麻豆91| 国产一区二区在线影院| 欧美大片国产精品|