亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關(guān)于我們
? 蟲蟲下載站

?? cpolicygradient.cpp

?? 強化學(xué)習(xí)算法(R-Learning)難得的珍貴資料
?? CPP
?? 第 1 頁 / 共 2 頁
字號:
void CLineSearchPolicyGradientUpdater::setWorkingParamters(CFeatureList *gradient, rlt_real stepSize, rlt_real *startParameters, rlt_real *workParameters)
{
	DebugPrint('l', "Applying StepSize: %f\n", stepSize);

	memcpy(workParameters, startParameters, sizeof(rlt_real) * updateFunction->getNumWeights());

	CFeatureList::iterator it = gradient->begin();
	for (; it != gradient->end(); it ++)
	{
		workParameters[(*it)->featureIndex] += stepSize * (*it)->factor;
	}
}

CLineSearchPolicyGradientUpdater::CLineSearchPolicyGradientUpdater(CGradientUpdateFunction *updateFunction, CPolicySameStateEvaluator *policyEvaluator, rlt_real *l_startStepSizes, int l_numStepSizes, int maxSteps) : CPolicyGradientUpdater(updateFunction)
{
	this->evaluator = policyEvaluator;

	startParameters = new rlt_real[updateFunction->getNumWeights()];
	workParameters = new rlt_real[updateFunction->getNumWeights()];

	this->numStepSizes = l_numStepSizes;
	this->maxSteps = maxSteps;
	this->startStepSizes = new rlt_real[numStepSizes];

	memcpy(this->startStepSizes, l_startStepSizes, sizeof(rlt_real) * numStepSizes);

	addParameter("LineSearchStepSizeScale", 1.0);
}

CLineSearchPolicyGradientUpdater::~CLineSearchPolicyGradientUpdater()
{
	delete startParameters;
	delete workParameters;
	delete startStepSizes;
}

void CLineSearchPolicyGradientUpdater::updateWeights(CFeatureList *gradient)
{
	int maxIndex = 0;
	rlt_real maxValue = 0.0;
	rlt_real maxLearnRate = 0.0;

	rlt_real *values = new rlt_real[numStepSizes];
	rlt_real searchValues[3];
	rlt_real searchStepSizes[3];
	
	updateFunction->getWeights(startParameters);

	printf("Searching in Gradient Direction, %d start points\n", numStepSizes);

	//evaluator->getNewStartStates();

	int i = 0;

	DebugPrint('l', "Beginning Line Search\n");
	DebugPrint('l', "Gradient: ");

	if (DebugIsEnabled('l'))
	{
		gradient->saveASCII(DebugGetFileHandle('l'));
		DebugPrint('l', "Gradient Norm: %f\n", gradient->multFeatureList(gradient));
		DebugPrint('l', "\n");
	}

	for (i = 0; i < numStepSizes; i++)
	{
		setWorkingParamters(gradient, startStepSizes[i] * getParameter("LineSearchStepSizeScale"), startParameters, workParameters);
		updateFunction->setWeights(workParameters);
		
		rlt_real newValue = 0.0;
		try
		{
			values[i] = evaluator->evaluatePolicy();
		}
		catch (CMyException *E) 
		{
			values[i] = - 100000000;
		}
		printf("StepSize %f : %f\n", startStepSizes[i] * getParameter("LineSearchStepSizeScale"), values[i]);
		DebugPrint('l', "Finished Evaluation of StepSize %f : Value %f\n", startStepSizes[i], values[i]);


		if (i == 0 || values[i] > maxValue + (fabs(maxValue) * 0.0001))
		{
			maxIndex = i;
			maxValue = values[i];
			maxLearnRate = startStepSizes[i] * getParameter("LineSearchStepSizeScale");
			printf("Found New Maximum\n");
		}
	}
	if (i < maxSteps)
	{
		if (maxIndex == 0 || maxIndex == numStepSizes - 1)
		{
			maxIndex ++;
			printf("Maximum outside the start step intervall, not searching further\n");
		}
		else
		{
			for (int j = 0; j < 3; j ++)
			{
				searchValues[j] = values[maxIndex + j - 1];
				searchStepSizes[j] = startStepSizes[maxIndex + j - 1] * getParameter("LineSearchStepSizeScale");
			}
			while (i < maxSteps)
			{
				i ++;
				if (searchValues[0] / (searchStepSizes[1] - searchStepSizes[0]) > searchValues[2] / (searchStepSizes[2] - searchStepSizes[1]))
				{
					rlt_real newStepSize = (searchStepSizes[0] + searchStepSizes[1]) * 0.5;

					setWorkingParamters(gradient, newStepSize, startParameters, workParameters);
					updateFunction->setWeights(workParameters);

					rlt_real newValue = 0.0;
					try
					{
						newValue = evaluator->evaluatePolicy();
					}
					catch (CMyException *E) 
					{
						newValue = - 100000000;
					}
					
					printf("StepSize %f : %f\n", newStepSize, newValue);
					DebugPrint('l', "Finished Evaluation of StepSize %f : Value %f\n", newStepSize, newValue);

					if (newValue > searchValues[1])
					{
						searchValues[2] = searchValues[1];
						searchValues[1] = newValue;

						searchStepSizes[2] = searchStepSizes[1];
						searchStepSizes[1] = newStepSize;
						printf("Found New Maximum\n");

					}
					else
					{
						searchValues[0] = newValue;

						searchStepSizes[0] =newStepSize;
					}
				}
				else
				{
					rlt_real newStepSize = (searchStepSizes[2] + searchStepSizes[1]) * 0.5;
					setWorkingParamters(gradient, newStepSize, startParameters, workParameters);
					updateFunction->setWeights(workParameters);

					rlt_real newValue = evaluator->evaluatePolicy();

					printf("StepSize %f : %f\n", newStepSize, newValue);
					DebugPrint('l', "Finished Evaluation of StepSize %f : Value %f\n", newStepSize, newValue);
					if (newValue > searchValues[1])
					{
						searchValues[0] = searchValues[1];
						searchValues[1] = newValue;

						searchStepSizes[0] = searchStepSizes[1];
						searchStepSizes[1] = newStepSize;
						printf("Found New Maximum\n");

					}
					else
					{
						searchValues[2] = newValue;
						searchStepSizes[2] = newStepSize;
					}
				}
			}
			maxLearnRate = searchStepSizes[1];

		}
		
	}
	delete [] values;

	DebugPrint('l', "End Line Search, applying step Size %f\n", maxLearnRate);

	printf("Applying maximum stepsize %f\n", maxLearnRate);
	setWorkingParamters(gradient, maxLearnRate, startParameters, workParameters);
	updateFunction->setWeights(workParameters);
}


CPolicyGradientLearner::CPolicyGradientLearner(CPolicyGradientCalculator *gradientCalculator, CPolicyGradientUpdater *gradientUpdater, rlt_real epsilon)
{
	addParameters(gradientCalculator);
	addParameters(gradientUpdater);

	addParameter("GradientResolution", epsilon);
	addParameter("PolicyGradientWeightDecay", 0.0);

	gradient = new CFeatureList();
	hGradient = new CFeatureList();
	gGradient = new CFeatureList();

	this->gradientCalculator = gradientCalculator;
	this->gradientUpdater = gradientUpdater;
}

CPolicyGradientLearner::~CPolicyGradientLearner()
{
	delete gradient;
	delete hGradient;
	delete gGradient;
}

void CPolicyGradientLearner::doUpdate(CFeatureList *gradient)
{
	rlt_real gamma = getParameter("PolicyGradientWeightDecay");

	rlt_real *oldParameters = new rlt_real[gradientUpdater->getUpdateFunction()->getNumWeights()];
	rlt_real *newParameters = new rlt_real[gradientUpdater->getUpdateFunction()->getNumWeights()];
	gradientUpdater->getUpdateFunction()->getWeights(oldParameters);
	gradientUpdater->updateWeights(gradient);
	if (gamma > 0.0)
	{
		gradientUpdater->getUpdateFunction()->getWeights(newParameters);

		printf("Updating Gradient with weight decay %f\n", gamma);
		for (int i = 0; i < gradientUpdater->getUpdateFunction()->getNumWeights(); i++)
		{
			newParameters[i] -=  gamma * oldParameters[i];
		}
		gradientUpdater->getUpdateFunction()->setWeights(newParameters);

	}
	delete [] oldParameters;
	delete [] newParameters;
}

rlt_real CPolicyGradientLearner::learnPolicy(int maxGradientUpdates, CPolicyEvaluator *evaluator, bool useOldGradient)
{
	rlt_real epsilon = getParameter("GradientResolution");
	gradient->clear();

	if (!useOldGradient)
	{
		hGradient->clear();
		gGradient->clear();
	}

	

	rlt_real normG = gGradient->multFeatureList(gGradient);
	DebugPrint('g', "Gradient-Norm: %f\n", normG);

	printf("Gradient-Norm: %f\n", normG);

	int gradientUpdates = 0;

	rlt_real value = 0.0;

	do
	{
		
		if (evaluator)
		{
			value = evaluator->evaluatePolicy();
			printf("Value after %d Gradient Update: %f\n", gradientUpdates, value);
		}
		
		gradient->clear();
		gradientCalculator->getGradient(gradient);

		if (gGradient->size() > 0)
		{
			gGradient->add(gradient);
			normG = gGradient->multFeatureList(gGradient);

			gGradient->multFactor(-1.0);
			gGradient->add(gradient, 1.0);


			rlt_real gamma = gGradient->multFeatureList(gradient) / normG;
			DebugPrint('g', "Calculated Gradient :\n");
			if (DebugIsEnabled('g'))
			{
				gradient->saveASCII(DebugGetFileHandle('g'));
			}
			DebugPrint('g', "PGLearner: Gamma %f", gamma);

			hGradient->multFactor(gamma);
			hGradient->add(gradient);

			if (hGradient->multFeatureList(gradient) < 0)
			{
				hGradient->clear();
				hGradient->add(gradient);
			}

			DebugPrint('g', "Update-Gradient :\n");
			if (DebugIsEnabled('g'))
			{
				hGradient->saveASCII(DebugGetFileHandle('g'));
			}
		}
		else
		{
			hGradient->add(gradient);
			normG = hGradient->multFeatureList(hGradient);

		}
		
		gGradient->clear();
		gGradient->add(gradient);

		
		DebugPrint('g', "Gradient-Norm: %f\n", normG);
		printf("Gradient-Norm: %f\n", normG);

		printf("Updating Gradient...");


		doUpdate(hGradient);
		gradientUpdates ++;
		
	}
	while (normG > epsilon && gradientUpdates < maxGradientUpdates);
	
	if (gradientUpdates < maxGradientUpdates)
	{
		printf("Updating Gradient...");
		doUpdate(hGradient);
		gradientUpdates ++;

		if (evaluator)
		{
			rlt_real value = evaluator->evaluatePolicy();
			printf("Value after %d Gradient Update: %f\n", gradientUpdates, value);
		}
	}
	return value;
}

CPolicyGradientWeightDecayListener::CPolicyGradientWeightDecayListener(CGradientUpdateFunction *updateFunction, rlt_real weightdecay)
{
	addParameter("PolicyGradientWeightDecay", weightdecay);
	this->updateFunction = updateFunction;
	parameters = new rlt_real[updateFunction->getNumWeights()]; 
}

CPolicyGradientWeightDecayListener::~CPolicyGradientWeightDecayListener()
{
	delete [] parameters;
}

void CPolicyGradientWeightDecayListener::newEpisode()
{
	updateFunction->getWeights(parameters);

	rlt_real factor = 1 - getParameter("PolicyGradientWeightDecay");
	for (int i = 0; i < updateFunction->getNumWeights(); i++)
	{
		parameters[i] = factor *  parameters[i];
	}

	updateFunction->setWeights(parameters);
}

?? 快捷鍵說明

復(fù)制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
97se狠狠狠综合亚洲狠狠| 欧美日韩一区在线观看| 懂色av中文字幕一区二区三区 | 日韩和欧美的一区| 日韩高清不卡一区二区| 麻豆视频观看网址久久| 精品一区二区三区免费| 国产乱码精品一区二区三 | 欧美zozo另类异族| 久久久久久久久久久久久夜| 精品国产91乱码一区二区三区| 91精品国产欧美一区二区| 日韩精品在线一区| 日本一区二区久久| 一区二区三区国产精华| 日韩1区2区日韩1区2区| 国产精品综合一区二区| 91免费看片在线观看| 99精品一区二区| 欧美色爱综合网| 日韩一级免费观看| 欧美电影免费观看高清完整版在线观看| 欧美一区二区播放| 日本一区二区三区视频视频| 一区二区三区在线免费播放| 免费成人av在线播放| 激情欧美一区二区三区在线观看| 国产主播一区二区三区| 91网站在线播放| 日韩视频在线永久播放| 国产精品美日韩| 三级亚洲高清视频| 成人一区二区视频| 欧美伦理电影网| 中文字幕乱码日本亚洲一区二区 | www.欧美精品一二区| 欧美精品黑人性xxxx| 久久精品人人爽人人爽| 亚洲国产综合人成综合网站| 日本不卡不码高清免费观看| 国内精品久久久久影院薰衣草| 国产成+人+日韩+欧美+亚洲| 91看片淫黄大片一级在线观看| 911精品国产一区二区在线| 国产精品私房写真福利视频| 日日摸夜夜添夜夜添精品视频| 高清免费成人av| 欧美一级片在线看| 亚洲欧美视频在线观看视频| 国模套图日韩精品一区二区| 欧美最猛黑人xxxxx猛交| 久久人人爽人人爽| 亚洲国产精品影院| av不卡免费电影| 精品国产免费人成在线观看| 亚洲午夜一二三区视频| 国产91精品精华液一区二区三区| 欧美高清视频不卡网| 中文字幕一区二区在线观看| 狠狠狠色丁香婷婷综合激情| 色先锋aa成人| 国产午夜精品一区二区三区嫩草| 亚洲国产精品一区二区久久恐怖片 | 亚洲女人的天堂| 国产麻豆9l精品三级站| 欧美日韩国产免费一区二区| 中文字幕一区二区在线观看| 国产乱码精品一区二区三区av| 91精品国产aⅴ一区二区| 一区二区三区久久久| 成人黄色777网| 久久久美女艺术照精彩视频福利播放| 日韩专区一卡二卡| 欧美日韩一区二区在线观看| 亚洲欧美偷拍卡通变态| 成人h动漫精品一区二区| 久久免费国产精品| 狠狠色伊人亚洲综合成人| 欧美一级免费大片| 日韩中文字幕亚洲一区二区va在线| 91成人网在线| 亚洲美女视频在线观看| 成人一区二区三区在线观看 | 男人的j进女人的j一区| 欧美日韩一区二区三区免费看| 亚洲欧洲综合另类在线| 色综合久久中文综合久久97| 亚洲欧洲成人自拍| 99久久777色| 椎名由奈av一区二区三区| 春色校园综合激情亚洲| 日本一区二区在线不卡| 久国产精品韩国三级视频| 91精品啪在线观看国产60岁| 亚洲va欧美va国产va天堂影院| 色88888久久久久久影院按摩| 亚洲色图视频免费播放| 色综合久久久久久久久久久| 伊人夜夜躁av伊人久久| 91免费小视频| 一区二区三区高清| 欧美日韩成人综合在线一区二区| 亚洲国产精品一区二区久久| 欧美久久久久久蜜桃| 蜜臀精品久久久久久蜜臀| 日韩精品中文字幕一区二区三区| 久久精品国产一区二区三区免费看| 制服.丝袜.亚洲.另类.中文| 天天影视涩香欲综合网| 日韩一区二区在线观看视频播放| 五月激情综合色| 日韩欧美二区三区| 国产福利精品一区| 1024成人网色www| 欧美日韩视频第一区| 蜜臀av亚洲一区中文字幕| 欧美精品一区二区在线播放| 成人综合在线网站| 怡红院av一区二区三区| 欧美一区二区在线视频| 国模娜娜一区二区三区| 国产精品你懂的| 欧美午夜在线观看| 精品一区二区日韩| 国产精品嫩草久久久久| 欧美日韩一区二区三区视频| 久久国产精品99精品国产| 日韩美一区二区三区| 国产精品 日产精品 欧美精品| 国产色一区二区| 日本韩国欧美一区二区三区| 免费欧美高清视频| 亚洲少妇30p| 日韩女优av电影| 成人av小说网| 日韩在线一二三区| 国产日韩欧美精品电影三级在线 | 国产精品麻豆欧美日韩ww| 欧美专区在线观看一区| 久久69国产一区二区蜜臀| 亚洲特级片在线| 日韩欧美一级片| 99re免费视频精品全部| 免费观看日韩av| 17c精品麻豆一区二区免费| 欧美一级片在线看| 91丝袜高跟美女视频| 日韩综合一区二区| 欧美大肚乱孕交hd孕妇| 91丝袜国产在线播放| 久久精品国产网站| 夜夜精品视频一区二区| 欧美精品一区男女天堂| 欧美这里有精品| 高清免费成人av| 久久精品久久综合| 亚洲国产精品久久一线不卡| 久久精品欧美一区二区三区不卡| 欧美日韩国产天堂| www.99精品| 国产一区二区在线观看免费| 亚洲自拍都市欧美小说| 中文字幕av一区二区三区免费看 | 亚洲高清免费在线| 中文字幕国产一区| 日韩午夜三级在线| 欧洲精品中文字幕| 成人一级片在线观看| 午夜a成v人精品| 中文字幕亚洲精品在线观看| 欧美mv日韩mv国产网站app| 欧美午夜电影网| 91视频观看视频| 风间由美一区二区av101| 免费美女久久99| 午夜精品一区在线观看| 樱桃视频在线观看一区| 久久精品综合网| 欧美美女视频在线观看| heyzo一本久久综合| 国产精品一线二线三线精华| 日本不卡一区二区三区| 性做久久久久久| 亚洲一区二区在线免费观看视频| 国产精品婷婷午夜在线观看| 国产婷婷色一区二区三区四区| 久久先锋资源网| 精品粉嫩超白一线天av| 欧美变态凌虐bdsm| 欧美亚洲日本一区| 日韩va欧美va亚洲va久久| 亚洲电影你懂得| 日韩精品亚洲专区| 久久99热99| 国产不卡一区视频| 91视频你懂的| 欧美理论在线播放| 精品免费日韩av| 国产精品久久三区| 一区二区免费看|