?? cmontecarlo.cpp
字號:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cmontecarlo.h"
#include <math.h>
CPolicyEvaluator::CPolicyEvaluator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode) : CSemiMDPRewardListener(rewardFunction)
{
this->agent = agent;
this->nEpisodes = nEpisodes;
this->nStepsPerEpisode = nStepsPerEpisode;
}
rlt_real CPolicyEvaluator::evaluatePolicy()
{
rlt_real value = 0;
agent->addSemiMDPListener(this);
for (int i = 0; i < nEpisodes; i ++)
{
agent->startNewEpisode();
agent->doControllerEpisode(1, nStepsPerEpisode);
value += this->getEpisodeValue();
}
value /= nEpisodes;
agent->removeSemiMDPListener(this);
return value;
}
CAverageRewardCalculator::CAverageRewardCalculator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode, rlt_real minReward) : CPolicyEvaluator(agent, rewardFunction, nEpisodes, nStepsPerEpisode)
{
nSteps = 0;
averageReward = 0;
this->minReward = minReward;
}
rlt_real CAverageRewardCalculator::getEpisodeValue()
{
if (agent->getEnvironmentModel()->isFailed())
{
averageReward += minReward * (nStepsPerEpisode - nSteps);
nSteps = nStepsPerEpisode;
}
return averageReward / nSteps;
}
void CAverageRewardCalculator::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
averageReward += reward;
nSteps ++;
}
void CAverageRewardCalculator::newEpisode()
{
averageReward = 0;
nSteps = 0;
}
CValueCalculator::CValueCalculator(CAgent *agent, CRewardFunction *rewardFunction, int nEpisodes, int nStepsPerEpisode, rlt_real gamma) : CPolicyEvaluator(agent, rewardFunction, nEpisodes, nStepsPerEpisode)
{
nSteps = 0;
value = 0;
addParameter("DiscountFactor", gamma);
}
rlt_real CValueCalculator::getEpisodeValue()
{
return value;
}
void CValueCalculator::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
value += pow(getParameter("DiscountFactor"), nSteps) * reward;
nSteps += action->getDuration();
}
void CValueCalculator::newEpisode()
{
value = 0;
nSteps = 0;
}
CPolicySameStateEvaluator::CPolicySameStateEvaluator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode) : CPolicyEvaluator(agent, rewardFunction, startStates->getNumStates(), nStepsPerEpisode)
{
this->startStates = startStates;
this->environment = environment;
}
rlt_real CPolicySameStateEvaluator::evaluatePolicy()
{
rlt_real value = 0;
agent->addSemiMDPListener(this);
CState *startState = new CState(environment->getStateProperties());
for (int i = 0; i < nEpisodes; i ++)
{
agent->startNewEpisode();
startStates->getState(i, startState);
environment->setState(startState);
agent->doControllerEpisode(1, nStepsPerEpisode);
value += this->getEpisodeValue();
}
value /= nEpisodes;
agent->removeSemiMDPListener(this);
delete startState;
return value;
}
void CPolicySameStateEvaluator::getNewStartStates()
{
int numStartStates = startStates->getNumStates();
CState *startState = new CState(environment->getStateProperties());
startStates->clear();
for (int i = 0; i < numStartStates; i++)
{
environment->resetModel();
environment->getState(startState);
startStates->addState(startState);
}
delete startState;
}
CAverageRewardSameStateCalculator::CAverageRewardSameStateCalculator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode, rlt_real minReward) : CPolicySameStateEvaluator(agent, rewardFunction, environment, startStates, nStepsPerEpisode)
{
nSteps = 0;
averageReward = 0;
this->minReward = minReward;
}
rlt_real CAverageRewardSameStateCalculator::getEpisodeValue()
{
if (agent->getEnvironmentModel()->isFailed())
{
averageReward += minReward * (nStepsPerEpisode - nSteps);
nSteps = nStepsPerEpisode;
}
return averageReward / nSteps;
}
void CAverageRewardSameStateCalculator::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
averageReward += reward;
nSteps ++;
}
void CAverageRewardSameStateCalculator::newEpisode()
{
averageReward = 0;
nSteps = 0;
}
CValueSameStateCalculator::CValueSameStateCalculator(CAgent *agent, CRewardFunction *rewardFunction, CTransitionFunctionEnvironment *environment, CStateList *startStates, int nStepsPerEpisode, rlt_real gamma) : CPolicySameStateEvaluator(agent, rewardFunction, environment, startStates, nStepsPerEpisode)
{
nSteps = 0;
value = 0;
addParameter("DiscountFactor", gamma);
}
rlt_real CValueSameStateCalculator::getEpisodeValue()
{
return value;
}
void CValueSameStateCalculator::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
value += pow(getParameter("DiscountFactor"), nSteps) * reward;
nSteps += action->getDuration();
}
void CValueSameStateCalculator::newEpisode()
{
value = 0;
nSteps = 0;
}
CPolicyGreedynessEvaluator::CPolicyGreedynessEvaluator(CAgent *agent, CRewardFunction *reward, int nEpisodes, int nStepsPerEpsiode, CAgentController *l_greedyPolicy) : CPolicyEvaluator(agent, reward, nEpisodes, nStepsPerEpsiode)
{
nGreedyActions = 0;
greedyPolicy = l_greedyPolicy;
actionDataSet = new CActionDataSet(greedyPolicy->getActions());
}
CPolicyGreedynessEvaluator::~CPolicyGreedynessEvaluator()
{
delete actionDataSet;
}
rlt_real CPolicyGreedynessEvaluator::getEpisodeValue()
{
return (rlt_real) nGreedyActions / (rlt_real) nStepsPerEpisode;
}
void CPolicyGreedynessEvaluator::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *nextState)
{
CAction *greedyAction = greedyPolicy->getNextAction(oldState);
if (action->isSameAction(greedyAction, actionDataSet->getActionData(greedyAction)))
{
nGreedyActions ++;
}
}
void CPolicyGreedynessEvaluator::newEpisode()
{
nGreedyActions = 0;
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -