?? cagent.cpp
字號:
// Copyright (C) 200
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cagent.h"
#include "cepisode.h"
#include "ril_debug.h"
#include "ccontinuousactions.h"
#include <assert.h>
#ifdef WIN32
#include <conio.h>
bool RIL_Toolbox_KeyboardHit()
{
bool result = _kbhit() != 0;
if (result)
{
while (_kbhit() != 0) _getch();
}
return result;
}
void RIL_Toolbox_Set_Keypress()
{
}
void RIL_Toolbox_Reset_Keypress()
{
}
#else // UNIX
#include <poll.h>
#include <termios.h>
#include <unistd.h>
static struct termios RIL_Toolbox_stored_settings;
void RIL_Toolbox_Set_Keypress()
{
struct termios new_settings;
tcgetattr(0, &RIL_Toolbox_stored_settings);
new_settings = RIL_Toolbox_stored_settings;
/* Disable canonical mode, and set buffer size to 1 byte */
new_settings.c_lflag &= (~ICANON);
new_settings.c_cc[VTIME] = 0;
new_settings.c_cc[VMIN] = 1;
tcsetattr(0, TCSANOW, &new_settings);
return;
}
void RIL_Toolbox_Reset_Keypress()
{
tcsetattr(0, TCSANOW, &RIL_Toolbox_stored_settings);
return;
}
bool RIL_Toolbox_KeyboardHit()
{
pollfd p;
p.fd = STDIN_FILENO;
p.events = POLLIN;
int numfds = poll(&p, 1, 1);
return (numfds && p.revents);
}
#endif // WIN32
CSemiMDPSender::CSemiMDPSender()
{
SMDPListeners = new std::list<CSemiMDPListener *>();
}
CSemiMDPSender::~CSemiMDPSender()
{
delete SMDPListeners;
}
void CSemiMDPSender::addSemiMDPListener(CSemiMDPListener *listener)
{
if (!isListenerAdded(listener))
{
SMDPListeners->push_back(listener);
}
}
void CSemiMDPSender::removeSemiMDPListener(CSemiMDPListener *listener)
{
SMDPListeners->remove(listener);
}
bool CSemiMDPSender::isListenerAdded(CSemiMDPListener *listener)
{
for (std::list<CSemiMDPListener *>::iterator it = SMDPListeners->begin(); it != SMDPListeners->end(); it++)
{
if ((*it) == listener)
{
return true;
}
}
return false;
}
void CSemiMDPSender::startNewEpisode()
{
for (std::list<CSemiMDPListener *>::iterator it = SMDPListeners->begin(); it != SMDPListeners->end(); it++)
{
if ((*it)->enabled)
{
(*it)->newEpisode();
}
}
}
void CSemiMDPSender::sendNextStep(CStateCollection *lastState, CAction *action, CStateCollection *currentState)
{
int i = 0;
clock_t ticks1, ticks2;
for (std::list<CSemiMDPListener *>::iterator it = SMDPListeners->begin(); it != SMDPListeners->end(); it++, i++)
{
if ((*it)->enabled)
{
ticks1 = clock();
(*it)->nextStep(lastState, action, currentState);
ticks2 = clock();
DebugPrint('t', "Time needed for listener %d: %d\n", i,ticks2-ticks1);
}
}
}
void CSemiMDPSender::sendIntermediateStep(CStateCollection *lastState, CAction *action, CStateCollection *currentState)
{
for (std::list<CSemiMDPListener *>::iterator it = SMDPListeners->begin(); it != SMDPListeners->end(); it++)
{
(*it)->intermediateStep(lastState, action, currentState);
}
}
CSemiMarkovDecisionProcess::CSemiMarkovDecisionProcess() : CDeterministicController(new CActionSet())
{
this->lastAction = NULL;
currentSteps = 0;
currentEpisodeNumber = 0;
totalSteps = 0;
}
CSemiMarkovDecisionProcess::~CSemiMarkovDecisionProcess()
{
delete actions;
}
/*
For the intermediate steps within an Extendedaction all the States occured while the ExtendedAction hasn't been finished, are also send with as the tuple
Intermediate_State-Action-current_State. The duration of the Extendedaction gets also reduced in the intermediate Steps.
*/
/** When the given action is finished (only MultiStepAction has the ability to be not finished) the step is sended to al Listeners. The Method also updates currentSteps.
@see CSemiMDPListener
*/
void CSemiMarkovDecisionProcess::sendNextStep(CStateCollection *lastState, CAction *action, CStateCollection *currentState)
{
currentSteps++;
totalSteps ++;
bool finished = true;
int duration = 1;
// Action has finished ?
if (action->isType(MULTISTEPACTION))
{
CMultiStepActionData *multiAction = dynamic_cast<CMultiStepAction *>(action)->getMultiStepActionData();
finished = multiAction->finished;
// get Duration
duration = multiAction->duration;
if (action->isType(PRIMITIVEACTION))
{
// if there was a multistep-primitiv action, the intermediate steps hasn't been
// recognized, so update currentSteps
currentSteps += duration - 1;
}
}
if (finished)
{
CDeterministicController::nextStep(lastState, action, currentState);
// No ExtendedAction, send normal Step
CSemiMDPSender::sendNextStep(lastState, action, currentState);
}
}
CAction* CSemiMarkovDecisionProcess::getLastAction()
{
return lastAction;
}
void CSemiMarkovDecisionProcess::startNewEpisode()
{
CDeterministicController::newEpisode();
CSemiMDPSender::startNewEpisode();
currentSteps = 0;
currentEpisodeNumber ++;
isFirstStep = true;
}
void CSemiMarkovDecisionProcess::addAction(CAction *action)
{
actions->add(action);
actionDataSet->addActionData(action);
}
CHierarchicalSemiMarkovDecisionProcess::CHierarchicalSemiMarkovDecisionProcess(CEpisode *loggedEpisode) : CSemiMarkovDecisionProcess(), CStateModifiersObject(loggedEpisode->getStateProperties())
{
this->currentEpisode = loggedEpisode;
pastState = new CStateCollectionImpl(currentEpisode->getStateProperties());
currentState = new CStateCollectionImpl(currentEpisode->getStateProperties());
addStateModifiers(currentEpisode->getStateModifiers());
}
CHierarchicalSemiMarkovDecisionProcess::CHierarchicalSemiMarkovDecisionProcess(CStateProperties *modelProperties, std::list<CStateModifier *> *modifiers) :CSemiMarkovDecisionProcess(), CStateModifiersObject(modelProperties)
{
this->currentEpisode = NULL;
pastState = new CStateCollectionImpl(modelProperties);
currentState = new CStateCollectionImpl(modelProperties);
if (modifiers)
{
addStateModifiers(modifiers);
}
}
CHierarchicalSemiMarkovDecisionProcess::~CHierarchicalSemiMarkovDecisionProcess()
{
delete pastState;
delete currentState;
}
void CHierarchicalSemiMarkovDecisionProcess::addStateModifier(CStateModifier *modifier)
{
pastState->addStateModifier(modifier);
currentState->addStateModifier(modifier);
CStateModifiersObject::addStateModifier(modifier);
}
void CHierarchicalSemiMarkovDecisionProcess::removeStateModifier(CStateModifier *modifier)
{
pastState->removeStateModifier(modifier);
currentState->removeStateModifier(modifier);
CStateModifiersObject::removeStateModifier(modifier);
}
void CHierarchicalSemiMarkovDecisionProcess::sendNextStep(CAction *action)
{
CDeterministicController::nextStep(pastState, action, currentState);
CSemiMarkovDecisionProcess::sendNextStep(pastState, action, currentState);
if (action->isType(EXTENDEDACTION))
{
CExtendedAction *mAction = dynamic_cast<CExtendedAction *>(action);
if (mAction->getMultiStepActionData()->finished && mAction->sendIntermediateSteps && currentEpisode != NULL)
{
// send the Intermediate Steps and the "rlt_real" Step of the ExtendedAction
int oldDuration = mAction->getDuration();
int episodeIndex = currentEpisode->getNumSteps() - 1;
CAction *interAction = currentEpisode->getAction(episodeIndex);
// set new duration of the extendedAction
mAction->getMultiStepActionData()->duration = interAction->getDuration();
// Send intermediate Steps
if (mAction->sendIntermediateSteps)
{
interAction = currentEpisode->getAction(episodeIndex);
// set new duration of the extendedAction
mAction->getMultiStepActionData()->duration = interAction->getDuration();
while (mAction->getMultiStepActionData()->duration < oldDuration)
{
assert(episodeIndex > 0);
currentEpisode->getStateCollection(episodeIndex, pastState);
CSemiMDPSender::sendIntermediateStep(pastState, mAction, currentState);
episodeIndex --;
// set new duration of the extendedAction
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -