亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

? 歡迎來到蟲蟲下載站! | ?? 資源下載 ?? 資源專輯 ?? 關于我們
? 蟲蟲下載站

?? cdynamicprogramming.cpp

?? 強化學習算法(R-Learning)難得的珍貴資料
?? CPP
字號:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)

//                
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
// 
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "ril_debug.h"
#include "cdynamicprogramming.h"

#include <math.h>

rlt_real CDynamicProgramming::getActionValue(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *discState, CAction *action, rlt_real gamma)
{
	rlt_real V = 0;

	int feature = discState->getDiscreteState(0);

	assert(vFunction != NULL && discState->getStateProperties()->isType(DISCRETESTATE));
	
	CTransitionList *transList;

	CTransitionList::iterator itTrans;

	transList = model->getForwardTransitions(model->getActions()->getIndex(action), discState->getDiscreteState(0));

	    
	for (itTrans = transList->begin(); itTrans != transList->end(); itTrans ++)
	{
		discState->setDiscreteState(0, (*itTrans)->getEndState());
		if (action->isType(MULTISTEPACTION))
		{
			CMultiStepAction *mAction = dynamic_cast<CMultiStepAction *>(action);
			int oldDur = mAction->getDuration();
			CSemiMDPTransition *trans = ((CSemiMDPTransition *)(*itTrans));
			std::map<int,rlt_real>::iterator itDurations = trans->getDurations()->begin();
				
			for (; itDurations != trans->getDurations()->end(); itDurations++)
			{
				mAction->getMultiStepActionData()->duration = (*itDurations).first;
				V += (*itDurations).second * (*itTrans)->getPropability() * (rewardFunc->getReward(feature, mAction, (*itTrans)->getEndState()) + pow(gamma, (*itDurations).first) * vFunction->getValue(discState));
			}
			mAction->getMultiStepActionData()->duration = oldDur;
		}
		else
		{
			V += (*itTrans)->getPropability() * (rewardFunc->getReward(feature, action, (*itTrans)->getEndState()) + gamma * vFunction->getValue(discState));
		}
		DebugPrint('d', "Transition: [%d->%d, %f] ", (*itTrans)->getStartState(), (*itTrans)->getEndState(), (*itTrans)->getPropability());
		
		DebugPrint('d', "Reward: %f, V: %f\n",rewardFunc->getReward(feature, action, (*itTrans)->getEndState()), V );	
	}
	discState->setDiscreteState(0, feature);
	
	DebugPrint('d', "ActionValue (State: %d, Action: %d): %f\n", feature, model->getActions()->getIndex(action), V);

	return V;
}

rlt_real CDynamicProgramming::getBellmanValue(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *feature, rlt_real gamma)
{
	rlt_real maxV = 0, V = 0;

	assert(vFunction != NULL);
	
	CAction *action = NULL;

	for (unsigned int naction = 0; naction < model->getNumActions(); naction ++)
	{
		V = 0;
		
		action = model->getActions()->get(naction);

		V = getActionValue(model, rewardFunc, vFunction, feature, action, gamma);
	
		if (action == 0 || maxV < V)
		{
			maxV = V;
		}
	}
	return maxV;
}

rlt_real CDynamicProgramming::getBellmanError(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardFunc, CAbstractVFunction *vFunction, CState *feature, rlt_real gamma)
{
	return getBellmanValue(model, rewardFunc, vFunction, feature, gamma) - vFunction->getValue(feature);
}


CValueIteration::CValueIteration(CFeatureQFunction *qFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel)
{
	addParameters(qFunction);
	addParameter("DiscountFactor", 0.95);

	this->qFunction = qFunction;

	this->actions = qFunction->getActions();

	this->vFunction = new COptimalVFunctionFromQFunction(qFunction, qFunction->getFeatureCalculator());

	learnVFunction = false;

	init(model, rewardModel);
}

CValueIteration::CValueIteration(CFeatureQFunction *qFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel, CStochasticPolicy *stochPolicy)
{
	addParameters(qFunction);
	addParameter("DiscountFactor", 0.95);

	this->qFunction = qFunction;

	this->actions = qFunction->getActions();
	
	this->vFunction= new CVFunctionFromQFunction(qFunction, stochPolicy, qFunction->getFeatureCalculator());
	
	learnVFunction = false;

	init(model, rewardModel);
}

CValueIteration::CValueIteration(CFeatureVFunction *vFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel)
{	
	addParameters(vFunction);
	addParameter("DiscountFactor", 0.95);


	this->vFunction = vFunction;	
	this->qFunctionFromVFunction= new CQFunctionFromStochasticModel(vFunction, model, rewardModel);
	this->vFunctionFromQFunction = new COptimalVFunctionFromQFunction(qFunctionFromVFunction, vFunction->getStateProperties());

	this->actions = model->getActions();

	learnVFunction = true;

	init(model, rewardModel);
}

CValueIteration::CValueIteration(CFeatureVFunction *vFunction, CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel, CStochasticPolicy *stochPolicy)
{
	addParameters(vFunction);
	addParameter("DiscountFactor", 0.95);

	this->vFunction = vFunction;	
	this->qFunctionFromVFunction = new CQFunctionFromStochasticModel(vFunction, model, rewardModel);
	this->vFunctionFromQFunction = new CVFunctionFromQFunction(qFunctionFromVFunction, stochPolicy, vFunction->getStateProperties());

	this->actions = model->getActions();

	init(model, rewardModel);

	learnVFunction = true;
}


void CValueIteration::init(CAbstractFeatureStochasticModel *model, CFeatureRewardFunction *rewardModel)
{
	addParameter("ValueIterationMaxListSize",  model->getNumFeatures() / 4);

	this->model = model;
	this->rewardModel = rewardModel;

	this->discState = new CState(new CStateProperties(0,1,DISCRETESTATE));
	this->discState->getStateProperties()->setDiscreteStateSize(0, model->getNumFeatures());	

	this->priorityList = new CFeatureList(model->getNumFeatures() / 20, true);
}

CValueIteration::~CValueIteration()
{
	delete priorityList;

	if (learnVFunction)
	{
		delete qFunctionFromVFunction;
		delete this->vFunctionFromQFunction;
	}
	else
	{
		delete vFunction;
	}

	delete discState->getStateProperties();
	delete discState;

}

void CValueIteration::updateFirstFeature()
{
	int feature = 0;
	if (priorityList->size() > 0)
	{
		feature = (*priorityList->begin())->featureIndex;
	}
	else
	{
		feature = ((rand() * RAND_MAX)) % model->getNumFeatures();
	}
	updateFeature(feature);
}

rlt_real CValueIteration::getPriority(CTransition *trans, rlt_real bellE)
{
	if (trans->isType(SEMIMDPTRANSITION))
	{
		CSemiMDPTransition *semiTrans = (CSemiMDPTransition *) trans;
		return semiTrans->getSemiMDPFaktor(getParameter("DiscountFactor")) * trans->getPropability() * bellE;
	}
	else
	{
		return trans->getPropability() * bellE;
	}
}

void CValueIteration::updateFeature(int feature)
{
	DebugPrint('d', "Updating State: %d\n", feature);

	discState->setDiscreteState(0, feature);
	rlt_real oldV = vFunction->getValue(discState);
	rlt_real bellV = 0;
	rlt_real bellE = 0;
	rlt_real actionValue;
	CActionSet::iterator it = actions->begin();

	if (!learnVFunction)
	{
		for (int i = 0; it != actions->end(); it++, i++)
		{	
			actionValue = CDynamicProgramming::getActionValue(model, rewardModel, vFunction, discState, *it, getParameter("DiscountFactor"));
			((CFeatureVFunction *)qFunction->getVFunction(*it))->setFeature(feature, actionValue);
		}
	}
	else
	{
		((CFeatureVFunction *)vFunction)->setFeature(feature, vFunctionFromQFunction->getValue(discState));
	}

	bellV = vFunction->getValue(discState);	
	
	bellE = bellV - oldV;
	DebugPrint('d', "OldV: %f, NewV %f, bellE: %f\n", oldV, bellV, bellE);
	
	priorityList->remove(feature);

	CTransitionList *backTrans = NULL;
	CTransitionList::iterator transIt;
	
	if (fabs(bellE) >= 0.00001)
	{
		for (unsigned int action = 0; action < model->getNumActions(); action ++)
		{
			backTrans = model->getBackwardTransitions(action, feature);
			for (transIt = backTrans->begin(); transIt != backTrans->end(); transIt ++)
			{
				DebugPrint('d', "Adding State %d with Priority: %lf (prob: %lf)\n", (*transIt)->getStartState(), (*transIt)->getPropability() * bellE, (*transIt)->getPropability());
				addPriority((*transIt)->getStartState(),getPriority((*transIt), fabs(bellE)));
			}
		}
	}
}

void CValueIteration::addPriorities(CFeatureList *featList)
{
	CFeatureList::iterator it = featList->begin();
	for (; it != featList->end(); featList ++)
	{
		addPriority((*it)->featureIndex, (*it)->factor);
	}
}

void CValueIteration::addPriority(int feature, rlt_real priority)
{
	priorityList->set(feature, priority + priorityList->getFeatureFactor(feature));



	while (priorityList->size() > getParameter("ValueIterationMaxListSize"))
	{
		priorityList->remove(*priorityList->rbegin());		
	}
}

CAbstractFeatureStochasticModel *CValueIteration::getTheoreticalModel()
{
	return model;
}

CAbstractVFunction *CValueIteration::getVFunction()
{
	return vFunction;
}

int CValueIteration::getMaxListSize()
{
	return my_round(getParameter("ValueIterationMaxListSize"));
}

void CValueIteration::setMaxListSize(int maxListSize)
{
	setParameter("ValueIterationMaxListSize", maxListSize);
}

void CValueIteration::doUpdateSteps(int kSteps)
{
	for (int k = 0; k < kSteps; k ++)
	{
		updateFirstFeature();
	}
}

void CValueIteration::doUpdateStepsUntilEmptyList(int kSteps)
{
	for (int k = 0; k < kSteps && priorityList->size() > 0; k ++)
	{
		updateFirstFeature();
	}

}

CFeatureQFunction *CValueIteration::getQFunction()
{
	return qFunction;
}

CStochasticPolicy *CValueIteration::getStochasticPolicy()
{
	return stochPolicy;
}

void CValueIteration::doUpdateBackwardStates(int feature)
{
	CTransitionList *backTrans = NULL;
	CTransitionList::iterator transIt;
	
	for (unsigned int action = 0; action < model->getNumActions(); action ++)
	{
		backTrans = model->getBackwardTransitions(action, feature);
		for (transIt = backTrans->begin(); transIt != backTrans->end(); transIt ++)
		{
			updateFeature((*transIt)->getStartState());
		}
	}
}

?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频
欧美成人乱码一区二区三区| 午夜电影一区二区| 亚洲一区二区美女| 国产精品影视天天线| 欧美色区777第一页| 国产精品久久毛片av大全日韩| 亚洲成av人片一区二区梦乃| 福利一区二区在线| 欧美精品一区二区三| 亚洲一区二区影院| 91在线视频网址| 国产午夜精品一区二区| 精品无码三级在线观看视频| 欧美日韩成人综合| 亚洲永久免费视频| jlzzjlzz亚洲女人18| 国产色91在线| 捆绑紧缚一区二区三区视频| 欧美人动与zoxxxx乱| 亚洲美女一区二区三区| av网站免费线看精品| 国产亚洲综合在线| 国产精品一区在线| 欧美一区二区播放| 午夜精品福利在线| 欧美亚洲国产bt| 亚洲一区在线免费观看| 色欧美乱欧美15图片| 亚洲欧洲精品成人久久奇米网| 丁香啪啪综合成人亚洲小说| 久久噜噜亚洲综合| 国产成人精品www牛牛影视| 久久久久久久av麻豆果冻| 韩国精品一区二区| 国产亚洲精品bt天堂精选| 久久99精品久久久久久久久久久久| 欧美一区二区免费观在线| 日韩高清在线不卡| 欧美成人精品福利| 国产一区二区三区综合| 久久久久国产免费免费| 国产成人av影院| 中文字幕综合网| 91国模大尺度私拍在线视频| 亚洲一区在线观看视频| 欧美精品在线观看一区二区| 蜜臀av一级做a爰片久久| 久久天堂av综合合色蜜桃网| 国产美女娇喘av呻吟久久 | 国产传媒久久文化传媒| 国产三级精品在线| 99久久免费精品| 午夜精品久久久久久久99樱桃| 欧美精品日日鲁夜夜添| 精品伊人久久久久7777人| 亚洲国产精品传媒在线观看| 在线观看亚洲精品| 三级欧美在线一区| 久久久久国产精品麻豆ai换脸| 91香蕉视频污| 日韩av午夜在线观看| 国产日韩成人精品| 欧美在线免费视屏| 国产一区二区在线影院| 一区二区三区国产豹纹内裤在线| 欧美日韩dvd在线观看| 国产成人免费xxxxxxxx| 婷婷中文字幕一区三区| 中文字幕欧美三区| 欧美日韩精品免费观看视频| 国产精品白丝av| 午夜精品一区二区三区免费视频| 久久久久久97三级| 91精品国产一区二区三区 | 欧美日韩日本视频| 国产乱子轮精品视频| 亚洲一区成人在线| 中文字幕第一页久久| 欧美酷刑日本凌虐凌虐| 一道本成人在线| 国产成人综合网| 日韩—二三区免费观看av| 亚洲婷婷综合久久一本伊一区| 日韩一区二区视频在线观看| 色婷婷av一区二区三区大白胸| 韩国三级电影一区二区| 日韩国产一二三区| 亚洲欧美日韩国产成人精品影院| 久久久三级国产网站| 日韩午夜精品电影| 欧美日韩成人综合天天影院| 91网站视频在线观看| 国产自产高清不卡| 日韩成人伦理电影在线观看| 亚洲视频在线观看三级| 日本一区二区三区久久久久久久久不| 欧美日韩国产大片| 欧美私人免费视频| 91精品1区2区| 91视视频在线观看入口直接观看www| 国产精品一区二区黑丝| 狠狠色丁香婷综合久久| 青青草伊人久久| 日韩一区精品字幕| 亚洲第一狼人社区| 婷婷久久综合九色国产成人| 亚洲h动漫在线| 亚洲444eee在线观看| 天天综合日日夜夜精品| 亚洲国产精品影院| 亚洲成人av在线电影| 亚洲成人av免费| 日韩不卡一区二区| 理论电影国产精品| 国产在线精品免费av| 国产一区三区三区| 国产福利91精品一区二区三区| 国产一区二区三区四区在线观看| 国产一区二区三区四区五区美女| 国产在线精品视频| 国产成人精品免费| 99这里都是精品| 色又黄又爽网站www久久| 欧美在线免费观看亚洲| 欧美日本高清视频在线观看| 日韩一二三区视频| 国产日韩欧美电影| 自拍偷拍亚洲欧美日韩| 亚洲综合丝袜美腿| 日本欧美大码aⅴ在线播放| 精品在线播放午夜| 成人免费电影视频| 欧洲精品一区二区三区在线观看| 在线精品国精品国产尤物884a| 欧美三级韩国三级日本三斤| 欧美一级在线免费| 久久久噜噜噜久噜久久综合| 国产精品免费视频一区| 亚洲精品免费看| 久久狠狠亚洲综合| 高潮精品一区videoshd| 精品视频123区在线观看| 91精品国产欧美一区二区成人| 久久夜色精品一区| 亚洲精品成人精品456| 青青草成人在线观看| 成人一区二区在线观看| 欧美亚洲综合久久| 久久在线观看免费| 亚洲综合免费观看高清完整版在线 | 日本一区二区成人| 亚洲综合精品自拍| 国内久久精品视频| 色狠狠色狠狠综合| 久久久久久久久久久99999| 一区二区三区在线播放| 久久99国产精品免费网站| 色哟哟一区二区三区| 精品成人免费观看| 亚洲国产色一区| 风流少妇一区二区| 制服丝袜国产精品| 亚洲人成7777| 国产成人精品一区二区三区网站观看| 欧美日韩综合在线免费观看| 国产欧美中文在线| 免费不卡在线观看| 日本丶国产丶欧美色综合| 国产婷婷色一区二区三区| 三级精品在线观看| 色94色欧美sute亚洲线路一ni| 久久这里都是精品| 美女尤物国产一区| 欧美影院精品一区| 亚洲图片欧美激情| 粉嫩高潮美女一区二区三区| 欧美一区二区三区婷婷月色| 亚洲免费三区一区二区| 国产精品一区二区在线观看不卡| 欧美一区二区不卡视频| 亚洲va欧美va人人爽| 色女孩综合影院| 亚洲欧美一区二区视频| 播五月开心婷婷综合| 久久这里只有精品视频网| 麻豆国产91在线播放| 欧美高清性hdvideosex| 亚洲成av人片观看| 欧美日韩和欧美的一区二区| 夜夜嗨av一区二区三区网页| 91在线云播放| 亚洲欧洲国产日本综合| 91在线免费播放| 成人免费在线视频观看| yourporn久久国产精品| 中文字幕成人在线观看| 成人免费不卡视频| 亚洲人精品午夜| 欧美综合亚洲图片综合区| 国产成人一区二区精品非洲|