?? crewardmodel.cpp
字號:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ril_debug.h"
#include "crewardmodel.h"
#include <math.h>
CFeatureRewardModel::CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CAbstractFeatureStochasticEstimatedModel *model, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function), CActionObject(actions)
{
this->rewardTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());
for (int i = 0; i < rewardTable->getSize(); i++)
{
rewardTable->set1D(i, new CFeatureMap());
}
this->model = model;
this->bExternVisitSparse = true;
}
CFeatureRewardModel::CFeatureRewardModel(CActionSet *actions, CRewardFunction *function, CStateModifier *discretizer) : CFeatureRewardFunction(discretizer), CSemiMDPRewardListener(function), CActionObject(actions)
{
int i;
this->rewardTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());
for (i = 0; i < rewardTable->getSize(); i++)
{
rewardTable->set1D(i, new CFeatureMap());
}
this->visitTable = new CMyArray2D<CFeatureMap *>(getNumActions(), discretizer->getDiscreteStateSize());
for (i = 0; i < visitTable->getSize(); i++)
{
visitTable->set1D(i, new CFeatureMap());
}
this->bExternVisitSparse = false;
}
CFeatureRewardModel::~CFeatureRewardModel()
{
for (int i = 0; i < rewardTable->getSize(); i++)
{
delete rewardTable->get1D(i);
}
delete rewardTable;
if (!bExternVisitSparse)
{
for (int i = 0; i < visitTable->getSize(); i++)
{
delete visitTable->get1D(i);
}
delete visitTable;
}
}
rlt_real CFeatureRewardModel::getTransitionVisits(int oldState, int action, int newState)
{
rlt_real visits = 0.0;
if (!this->bExternVisitSparse)
{
visits = visitTable->get(action, oldState)->getValue(newState);
}
else
{
CTransition *trans = model->getForwardTransitions(action, oldState)->getTransition(newState);
if (trans == NULL)
{
visits = 0;
}
else
{
visits = trans->getPropability() * model->getStateActionVisits(oldState, action);
}
}
return visits;
}
rlt_real CFeatureRewardModel::getReward(int oldState, CAction *action, int newState)
{
int actionIndex = getActions()->getIndex(action);
rlt_real transVisits = getTransitionVisits(oldState, actionIndex, newState);
//assert(visitSparse->getFaktor(oldState, actionIndex, newState) > 0);
if (transVisits > 0)
{
return rewardTable->get(actionIndex, oldState)->getValue(newState) / transVisits;
}
else
{
return 0.0;
}
}
void CFeatureRewardModel::nextStep(CStateCollection *oldState, CAction *action, rlt_real reward, CStateCollection *newState)
{
CFeatureMap *featMap;
CState *oldS = oldState->getState(properties);
CState *newS = newState->getState(properties);
rlt_real oldreward = 0.0;
rlt_real visits = 0.0;
int actionIndex = getActions()->getIndex(action);
int type = oldS->getStateProperties()->getType() & (DISCRETESTATE | FEATURESTATE);
switch (type)
{
case FEATURESTATE:
{
for (unsigned int oldIndex = 0; oldIndex < oldS->getNumDiscreteStates(); oldIndex++)
{
int oldFeature = oldS->getDiscreteState(oldIndex);
featMap = rewardTable->get(actionIndex, oldFeature);
for (unsigned int newIndex = 0; newIndex < newS->getNumDiscreteStates(); newIndex++)
{
int newFeature = newS->getDiscreteState(newIndex);
oldreward = featMap->getValue(newFeature);
(*featMap)[newFeature] = oldreward + reward * newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);
if (!bExternVisitSparse)
{
visits = visitTable->get(actionIndex, oldFeature)->getValue(newFeature);
(*visitTable->get(actionIndex, oldFeature))[newFeature] = visits + newS->getContinuousState(newIndex) * oldS->getContinuousState(oldIndex);;
}
}
}
break;
}
case DISCRETESTATE:
{
featMap = rewardTable->get(actionIndex, oldS->getDiscreteState(0));
oldreward = featMap->getValue(newS->getDiscreteState(0));
int feata = oldS->getDiscreteState(0);
int featb = newS->getDiscreteState(0);
(*featMap)[featb] = oldreward + reward;
if (!bExternVisitSparse)
{
visits = visitTable->get(actionIndex, feata)->getValue(featb);
(*visitTable->get(actionIndex, feata))[featb] = visits + 1.0;
}
break;
}
}
}
void CFeatureRewardModel::saveData(FILE *stream)
{
CFeatureMap::iterator mapIt;
CFeatureMap *featMap;
fprintf(stream, "Reward Table\n");
for (unsigned int action = 0; action < getNumActions(); action ++)
{
fprintf(stream, "Action %d:\n", action);
for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
{
featMap = rewardTable->get(action, startState);
fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size());
for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++)
{
fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second);
}
fprintf(stream, "\n");
}
fprintf(stream, "\n");
}
if (!this->bExternVisitSparse)
{
fprintf(stream, "Visit Table\n");
for (unsigned int action = 0; action < getNumActions(); action ++)
{
fprintf(stream, "Action %d:\n", action);
for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
{
featMap = visitTable->get(action, startState);
fprintf(stream, "Startstate %d [%d]: ", startState, featMap->size());
for (mapIt = featMap->begin(); mapIt != featMap->end(); mapIt ++)
{
fprintf(stream, "(%d %f)", (*mapIt).first, (*mapIt).second);
}
fprintf(stream, "\n");
}
fprintf(stream, "\n");
}
}
}
void CFeatureRewardModel::loadData(FILE *stream)
{
CFeatureMap *featMap;
fscanf(stream, "Reward Table\n");
int buf, numVal = 0, endState;
rlt_real reward;
for (unsigned int action = 0; action < getNumActions(); action ++)
{
fscanf(stream, "Action %d:\n", &buf);
for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
{
featMap = rewardTable->get(action, startState);
featMap->clear();
fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal);
for (int i = 0; i < numVal; i ++)
{
fscanf(stream, "(%d %lf)", &endState, &reward);
(*featMap)[endState] = reward;
}
fscanf(stream, "\n");
}
fscanf(stream, "\n");
}
if (!this->bExternVisitSparse)
{
fprintf(stream, "Visit Table\n");
for (unsigned int action = 0; action < getNumActions(); action ++)
{
fscanf(stream, "Action %d:\n", &buf);
for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
{
featMap = visitTable->get(action, startState);
featMap->clear();
fscanf(stream, "Startstate %d [%d]: ", &buf, &numVal);
for (int i = 0; i < numVal; i ++)
{
fscanf(stream, "(%d %lf)", &endState, &reward);
(*featMap)[endState] = reward;
}
fscanf(stream, "\n");
}
fscanf(stream, "\n");
}
}
}
void CFeatureRewardModel::resetData()
{
CFeatureMap *featMap;
for (unsigned int action = 0; action < getNumActions(); action ++)
{
for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
{
featMap = rewardTable->get(action, startState);
featMap->clear();
}
}
if (!this->bExternVisitSparse)
{
for (unsigned int action = 0; action < getNumActions(); action ++)
{
for (unsigned int startState = 0; startState < discretizer->getDiscreteStateSize(); startState ++)
{
featMap = visitTable->get(action, startState);
featMap->clear();
}
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -