亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? cpolicygradient.cpp

?? 強化學習算法(R-Learning)難得的珍貴資料
?? CPP
?? 第 1 頁 / 共 2 頁
字號:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "ril_debug.h"
#include "cpolicygradient.h"
#include "cagent.h"
#include "creinforce.h"

#include <math.h>

CPolicyGradientCalculator::CPolicyGradientCalculator(CAgentController *policy)
{
	this->policy = policy;
}

CGPOMDPGradientCalculator::CGPOMDPGradientCalculator(CRewardFunction *reward, CStochasticPolicy *policy, CAgent *agent, CReinforcementBaseLineCalculator *baseLine, int TStepsPerEpsiode, int Episodes, rlt_real beta) : CPolicyGradientCalculator(policy), CSemiMDPRewardListener(reward)
{
	this->agent = agent;
	this->baseLine = baseLine;

	addParameters(baseLine);
	
	addParameter("GradientEstimationStepsPerEpisode", TStepsPerEpsiode);
	addParameter("GradientEstimationEpisodes", Episodes);
	addParameter("GPOMDPBeta", beta);

	localGradient = new CFeatureList();

	localZTrace = new CFeatureList();

	globalGradient = NULL;

	stochPolicy = policy;
}

CGPOMDPGradientCalculator::~CGPOMDPGradientCalculator()
{
	delete localGradient;
	delete localZTrace;
}

void CGPOMDPGradientCalculator::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *newState)
{
	if (globalGradient)
	{
		localZTrace->multFactor(getParameter("GPOMDPBeta"));
		localGradient->clear();

		stochPolicy->getActionProbabilityLnGradient(oldState, action, action->getActionData(),localGradient);
		localZTrace->add(localGradient, 1.0);
		
		CFeatureList::iterator it = localZTrace->begin();
		if (DebugIsEnabled('g'))
		{
			DebugPrint('g', "reward: %f, baseline %f, -> factor %f\n", reward, baseLine->getReinforcementBaseLine((*it)->featureIndex));
			DebugPrint('g', "Z-trace: ");
			localZTrace->saveASCII(DebugGetFileHandle('g'));
		}

		for (;it != localZTrace->end(); it ++)
		{
			globalGradient->update((*it)->featureIndex, (reward - baseLine->getReinforcementBaseLine((*it)->featureIndex)) * (*it)->factor);
		}
	}
}

void CGPOMDPGradientCalculator::newEpisode()
{
	localZTrace->clear();
}

void CGPOMDPGradientCalculator::getGradient(CFeatureList *gradient)
{
	setGlobalGradient(gradient);

	int TSteps = my_round(getParameter("GradientEstimationStepsPerEpisode"));
	int nEpisodes = my_round(getParameter("GradientEstimationEpisodes"));

	agent->startNewEpisode();
	
	bool bListen = agent->isListenerAdded(this);

	if (!bListen)
	{
		agent->addSemiMDPListener(this);
	}

	printf("Calculating PGradient with %d steps and %d Episodes\n", TSteps,nEpisodes);
	
	int oldSteps = 0;
	int gradientSteps = 0;

	oldSteps = agent->getTotalSteps();

	for (int i = 0; i < nEpisodes; i++)
	{
		agent->startNewEpisode();
		agent->doControllerEpisode(1, TSteps);
		printf("Finished %d Episode\n", i);
	}
	gradientSteps = agent->getTotalSteps() - oldSteps;

	if (!bListen)
	{
		agent->removeSemiMDPListener(this);
	}
	assert(gradientSteps > 0);
	gradient->multFactor(1.0 / gradientSteps);

	if (DebugIsEnabled('g'))
	{
		DebugPrint('g', "Calculated GPOMDP Gradient (%d steps)\n", TSteps);
		gradient->saveASCII(DebugGetFileHandle('g'));
		DebugPrint('g', "\n");
	}

	setGlobalGradient(NULL);
}


CFeatureList* CGPOMDPGradientCalculator::getGlobalGradient()
{
	return globalGradient;
}

void CGPOMDPGradientCalculator::setGlobalGradient(CFeatureList *globalGradient)
{
	this->globalGradient = globalGradient;
}

CPolicyGradientUpdater::CPolicyGradientUpdater(CGradientUpdateFunction *updateFunction)
{
	this->updateFunction = updateFunction;
}

void CPolicyGradientUpdater::addRandomParams(rlt_real randSize)
{
	rlt_real *weights = new rlt_real[updateFunction->getNumWeights()];
	updateFunction->getWeights(weights);

	rlt_real normWeights = 0;
	for (int i = 0; i < updateFunction->getNumWeights(); i++)
	{
		normWeights += pow(weights[i], 2);
	}
	normWeights = sqrt(normWeights);

	for (int i = 0; i <updateFunction->getNumWeights(); i ++)
	{
		weights[i] += CDistributions::getNormalDistributionSample(0, normWeights * randSize / 2);
	}
	updateFunction->setWeights(weights);
	delete weights;
}

CConstantPolicyGradientUpdater::CConstantPolicyGradientUpdater(CGradientUpdateFunction *updateFunction, rlt_real learningRate) : CPolicyGradientUpdater(updateFunction)
{
	addParameter("PolicyGradientFactor", learningRate);
}

void CConstantPolicyGradientUpdater::updateWeights(CFeatureList *gradient)
{
	updateFunction->updateGradient(gradient, getParameter("PolicyGradientFactor"));
}


CGSearchPolicyGradientUpdater::CGSearchPolicyGradientUpdater(CGradientUpdateFunction *updateFunction, CPolicyGradientCalculator *gradientCalculator, rlt_real s0, rlt_real epsilon) : CPolicyGradientUpdater(updateFunction)
{
	this->gradientCalculator = gradientCalculator;

	startParameters = new rlt_real[updateFunction->getNumWeights()];
	workParameters = new rlt_real[updateFunction->getNumWeights()];

	addParameters(gradientCalculator, "GSearch");
	addParameter("GSearchStartStepSize", s0);
	addParameter("GSearchEpsilon",epsilon);
	addParameter("GSearchUseLastStepSize", 0.0);

	addParameter("GSearchMinStepSize", s0 / 256);
	addParameter("GSearchMaxStepSize", s0 * 16);

	lastStepSize = s0;
}

CGSearchPolicyGradientUpdater::~CGSearchPolicyGradientUpdater()
{
	delete [] startParameters;
	delete [] workParameters;
}

void CGSearchPolicyGradientUpdater::setWorkingParamters(CFeatureList *gradient, rlt_real stepSize, rlt_real *startParameters, rlt_real *workParameters)
{
	memcpy(workParameters, startParameters, sizeof(rlt_real) * updateFunction->getNumWeights());

	CFeatureList::iterator it = gradient->begin();
	for (; it != gradient->end(); it ++)
	{
		workParameters[(*it)->featureIndex] += stepSize * (*it)->factor;
	}
}

void CGSearchPolicyGradientUpdater::updateWeights(CFeatureList *gradient)
{

	rlt_real s = getParameter("GSearchStartStepSize");

	rlt_real norm = sqrt(gradient->multFeatureList(gradient));

	if (getParameter("GSearchUseLastStepSize") > 0.5)
	{
		s = lastStepSize;
	}
	printf("Beginning GSEARCH with stepSize %f\n", s);

	rlt_real epsilon = getParameter("GSearchEpsilon");

	updateFunction->getWeights(startParameters);
	setWorkingParamters(gradient, s,startParameters, workParameters);

	updateFunction->setWeights(workParameters);
	CFeatureList *newGradient = new CFeatureList();
	gradientCalculator->getGradient(newGradient);

	rlt_real newGradientNorm = sqrt(newGradient->multFeatureList(newGradient));

	rlt_real prod = gradient->multFeatureList(newGradient);// * 1 / newGradientNorm;;
	rlt_real tempProd = prod;
	rlt_real sPlus = 0;
	rlt_real sMinus = 0;
	rlt_real pPlus = 0;
	rlt_real pMinus = 0;

	rlt_real sMin = getParameter("GSearchMinStepSize");
	rlt_real sMax = getParameter("GSearchMaxStepSize");

	printf("gradient * newgradient: %f\n", tempProd);

	if (prod < 0)
	{
		sPlus = s; 

		while(tempProd < - epsilon && s > sMin)
		{
			sPlus = s;
			pPlus = tempProd;
			s = s / 2;

			printf("GSearch StepSize: %f ", s);
			
			setWorkingParamters(gradient, s, startParameters, workParameters);
			updateFunction->setWeights(workParameters);
			newGradient->clear();
			gradientCalculator->getGradient(newGradient);

			newGradientNorm = sqrt(newGradient->multFeatureList(newGradient));
			tempProd = gradient->multFeatureList(newGradient);// * 1 / newGradientNorm;
			
			printf("GSearch StepSize: %f, gradient * newGradient: %f\n", s,tempProd);

		} 
		sMinus = s;
		pMinus = tempProd;
		if (s < sMin)
		{
			s = sMin;
		}
	}
	else
	{
		sMinus = s;
		while(tempProd > epsilon && s < sMax)
		{
			sMinus = s;
			pMinus = tempProd;

			s = 2 * s;

			setWorkingParamters(gradient, s, startParameters, workParameters);
			updateFunction->setWeights(workParameters);
			newGradient->clear();

			gradientCalculator->getGradient(newGradient);

			newGradientNorm = sqrt(newGradient->multFeatureList(newGradient));
			tempProd = gradient->multFeatureList(newGradient);// * 1 / newGradientNorm;

			printf("GSearch StepSize: %f, gradient * newGradient: %f\n", s,tempProd);
		}
		sPlus = s;
		pPlus = tempProd;

		if (s > sMax)
		{
			s = sMax;
		}
	}


	if (pMinus > 0 && pPlus < 0)
	{
		s = (pPlus * sMinus - pMinus * sPlus) / (pPlus - pMinus);
	}
	else
	{
		s = (sPlus + sMinus) / 2;
	}

	printf("GSearch: s: %f, s+ %f, s- %f, p+ %f, p- %f\n",s, sPlus, sMinus, pPlus, pMinus);

	DebugPrint('g',"GSearch: s: %f, s+ %f, s- %f, p+ %f, p- %f\n",s, sPlus, sMinus, pPlus, pMinus);

	setWorkingParamters(gradient, s, startParameters, workParameters);

	if (DebugIsEnabled('g'))
	{
		DebugPrint('g',"GSearch: Calculated StepSize %f\n", s);
		DebugPrint('g', "GSearch: New calculated Parameters\n");
		updateFunction->saveData(DebugGetFileHandle('g'));
	}
	
	lastStepSize = s;


	updateFunction->setWeights(workParameters);

	rlt_real normWeights = 0;
	
	for (int i = 0; i < updateFunction->getNumWeights(); i ++)
	{
		normWeights += workParameters[i] * workParameters[i];
	}
	normWeights = sqrt(normWeights);
	printf("Weights Norm after Update %f\n", normWeights);
}


?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
国产精品美女久久久久久久| 欧美一卡二卡三卡| 亚洲品质自拍视频网站| 色噜噜狠狠成人网p站| 亚洲午夜电影在线| 91精品麻豆日日躁夜夜躁| 精品午夜久久福利影院| 国产亚洲女人久久久久毛片| 99久久国产综合精品色伊| 亚洲综合精品自拍| 欧美一级二级在线观看| 岛国精品一区二区| 亚洲国产视频直播| 日韩欧美国产一二三区| 成人av在线播放网站| 亚洲精品亚洲人成人网| 在线日韩av片| 天堂在线一区二区| 日韩免费视频一区二区| 五月婷婷久久丁香| 在线亚洲人成电影网站色www| 日韩电影免费在线看| 精品裸体舞一区二区三区| 国产高清久久久久| 亚洲免费观看视频| 91精品欧美一区二区三区综合在 | 青青草伊人久久| 精品国精品国产| a美女胸又www黄视频久久| 亚洲猫色日本管| 777久久久精品| 国产不卡在线播放| 一区二区三区四区乱视频| 91福利社在线观看| 免费成人在线观看视频| 国产欧美一区二区精品性色超碰| 成人av在线影院| 午夜精品福利视频网站| 久久综合五月天婷婷伊人| 成人动漫一区二区| 亚洲444eee在线观看| 日韩丝袜情趣美女图片| 久久成人免费日本黄色| 亚洲色图在线播放| 日韩欧美不卡一区| 日本黄色一区二区| 麻豆传媒一区二区三区| 亚洲欧洲色图综合| 日韩亚洲欧美中文三级| 99国产精品国产精品久久| 天使萌一区二区三区免费观看| 久久免费看少妇高潮| 色琪琪一区二区三区亚洲区| 国产精品一品视频| 日韩国产成人精品| 亚洲欧美偷拍另类a∨色屁股| 欧美xxx久久| 欧美日韩亚洲丝袜制服| 成人性生交大片免费看在线播放| 日韩精品亚洲专区| 久久精品视频一区二区三区| 一本大道久久精品懂色aⅴ| 国产精品1区2区3区在线观看| 丝袜美腿亚洲色图| 一级女性全黄久久生活片免费| 久久精品亚洲精品国产欧美 | 欧美一区二区女人| 99re这里只有精品首页| 国产揄拍国内精品对白| 午夜av一区二区三区| 亚洲三级小视频| 日本一区二区综合亚洲| 精品盗摄一区二区三区| 3d动漫精品啪啪1区2区免费| 91成人国产精品| zzijzzij亚洲日本少妇熟睡| 久草热8精品视频在线观看| 午夜精品福利视频网站| 亚洲精品国产a久久久久久| 国产精品久久久久久久岛一牛影视 | 99re亚洲国产精品| 国产成人免费在线观看| 九九**精品视频免费播放| 婷婷激情综合网| 亚洲成av人片在线观看| 亚洲一区自拍偷拍| 亚洲精品成人悠悠色影视| 亚洲男同性恋视频| 亚洲免费av观看| 中文字幕不卡在线观看| www久久精品| 久久久久久一二三区| 国产日韩三级在线| 国产日产欧美一区二区三区 | 精品奇米国产一区二区三区| 日韩三级伦理片妻子的秘密按摩| 欧美美女一区二区在线观看| 91传媒视频在线播放| 在线观看精品一区| 欧美另类久久久品| 欧美一级爆毛片| 2021国产精品久久精品| 国产三级欧美三级日产三级99| 久久久久久99久久久精品网站| 久久精子c满五个校花| 欧美激情在线看| 日韩毛片高清在线播放| 伊人一区二区三区| 午夜久久久久久久久| 蜜臀久久99精品久久久画质超高清| 日本成人在线电影网| 免播放器亚洲一区| 国产欧美日本一区二区三区| 国产精品每日更新| 亚洲色图欧美在线| 亚洲h在线观看| 极品少妇xxxx偷拍精品少妇| 国产经典欧美精品| 91一区二区在线观看| 欧美亚洲一区二区在线| 91精品国产欧美一区二区成人| 精品国产一二三区| 欧美韩国日本不卡| 亚洲第一精品在线| 国产原创一区二区三区| av在线一区二区| 3d成人h动漫网站入口| 久久久国产精品不卡| 亚洲天堂av老司机| 日韩av午夜在线观看| 粉嫩aⅴ一区二区三区四区| 欧美日韩在线播放| 欧美电视剧在线观看完整版| 久久综合九色欧美综合狠狠| 亚洲精品成人在线| 精品一区二区三区免费| 92国产精品观看| 精品成a人在线观看| 一区二区三区不卡在线观看| 免费亚洲电影在线| 97国产一区二区| 精品99999| 亚洲午夜在线视频| 大白屁股一区二区视频| 欧美一区二区精美| 亚洲欧美二区三区| 成人永久免费视频| 日韩欧美电影在线| 亚洲国产成人高清精品| 精品一区二区av| 91欧美一区二区| 亚洲精品一区二区三区在线观看| 亚洲国产成人av网| 成人黄色片在线观看| 日韩精品中文字幕在线一区| 亚洲小少妇裸体bbw| 成人在线综合网| 精品国产伦一区二区三区观看方式 | 日本视频中文字幕一区二区三区| 国产91精品在线观看| 日韩一区二区免费在线观看| 一区二区三区四区国产精品| 粉嫩av一区二区三区在线播放| 欧美精品一区二区高清在线观看 | 日韩欧美一级二级三级| 一区二区三区在线视频观看58| 国产成人综合视频| 欧美变态口味重另类| 免费在线成人网| 7878成人国产在线观看| 一区二区在线看| 一本大道久久a久久综合婷婷| 中文字幕精品三区| 国产精品911| 日韩精品一区二区三区在线播放| 青青草国产成人av片免费| 在线成人免费视频| 视频一区中文字幕| 91精品国产91综合久久蜜臀| 首页国产欧美日韩丝袜| 欧美狂野另类xxxxoooo| 视频一区欧美日韩| 欧美日韩国产精选| 日韩高清不卡一区二区三区| 色偷偷久久人人79超碰人人澡| 亚洲欧美激情视频在线观看一区二区三区 | 精品少妇一区二区三区在线播放| 日韩福利视频网| 日韩一区国产二区欧美三区| 蜜臀va亚洲va欧美va天堂| 精品国产一区久久| 国产精品一区2区| 国产精品久久免费看| 91在线国产福利| 亚洲最大成人综合| 欧美精品少妇一区二区三区| 日韩黄色在线观看| 精品国产一区二区三区四区四| 国产一区 二区 三区一级| 亚洲国产成人午夜在线一区|