?? ctestscripts.cpp
字號:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ctestscripts.h"
#include "clearneddynamicmodel.h"
#include "cpegasus.h"
#include "cprioritizedsweeping.h"
#include "ccartpolemodel.h"
#include "cacrobotmodel.h"
#include "cexploration.h"
#include "ril_debug.h"
#include <sstream>
CMyTestSuiteCollection::CMyTestSuiteCollection(char *testSuiteDirectory)
{
modelEnvironment = NULL;
dynModel = NULL;
agent = NULL;
staticContActions = NULL;
rewardFunction = NULL;
rbfCalculator = NULL;
this->testSuiteDirectory = testSuiteDirectory;
//initModelVariables();
}
CMyTestSuiteCollection::~CMyTestSuiteCollection()
{
delete agent;
delete staticContActions;
delete rewardFunction;
// delete rbfCalculator;
delete modelEnvironment;
delete dynModel;
}
void CMyTestSuiteCollection::addVRBFTestSuites()
{
agent->addStateModifier(rbfCalculator);
// VFunctionLearner
CVFunctionNumericInputDerivationCalculator *vFunctionInputDerivation = new CVFunctionNumericInputDerivationCalculator(dynModel->getStateProperties(), rbfVFunction, 0.025, agent->getStateModifiers());
CAbstractQFunction *qFunctionFromTransitionFunction = new CQFunctionFromTransitionFunction(staticContActions, rbfVFunction, dynModel, rewardFunction, agent->getStateModifiers());
CAbstractQFunction *contqFunctionFromTransitionFunction = new CContinuousTimeQFunctionFromTransitionFunction(staticContActions, vFunctionInputDerivation, dynModel, rewardFunction, agent->getStateModifiers());
CVFunctionLearner *vLearnerDiscDirect = new CVFunctionGradientLearner(rewardFunction, rbfVFunction, new CDiscreteResidual(0.95), new CDirectGradient());
CVFunctionLearner *vLearnerEulerDirect = new CVFunctionGradientLearner(rewardFunction, rbfVFunction, new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0), new CDirectGradient());
CVFunctionLearner *vLearnerCoulomDirect = new CVFunctionGradientLearner(rewardFunction, rbfVFunction, new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CDirectGradient());
CVFunctionLearner *vLearnerDiscConstBeta = new CVFunctionGradientLearner(rewardFunction, rbfVFunction, new CDiscreteResidual(0.95), new CResidualBetaFunction(new CConstantBetaCalculator(0.4), new CDiscreteResidual(0.95)));
CVFunctionLearner *vLearnerEulerConstBeta = new CVFunctionGradientLearner(rewardFunction, rbfVFunction, new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0),new CResidualBetaFunction(new CConstantBetaCalculator(0.4), new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0)));
CVFunctionLearner *vLearnerCoulomConstBeta = new CVFunctionGradientLearner(rewardFunction, rbfVFunction, new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CResidualBetaFunction(new CConstantBetaCalculator(0.4), new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0)));
CVFunctionLearner *vLearnerDiscVarBeta = new CVFunctionResidualLearner(rewardFunction, rbfVFunction, new CDiscreteResidual(0.95), new CDiscreteResidual(0.95), new CVariableBetaCalculator(0.01, 0.9));
CVFunctionLearner *vLearnerEulerVarBeta = new CVFunctionResidualLearner(rewardFunction, rbfVFunction, new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0), new CContinuousEulerResidual(dynModel->getTimeIntervall(), 1.0), new CVariableBetaCalculator(0.01, 0.9));
CVFunctionLearner *vLearnerCoulomVarBeta = new CVFunctionResidualLearner(rewardFunction, rbfVFunction, new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CContinuousCoulomResidual(dynModel->getTimeIntervall(), 1.0), new CVariableBetaCalculator(0.01, 0.9));
/*CContinuousActionPolicy *discVMPolicy = new CContinuousActionPolicy(dynModel->getContinuousAction(), new CGreedyDistribution(), qFunctionFromTransitionFunction, staticContActions, -1.0);
CContinuousActionPolicy *contAddPolicy = new CContinuousActionPolicy(dynModel->getContinuousAction(), new CSoftMaxDistribution(), contqFunctionFromTransitionFunction, staticContActions, -1.0);*/
CAgentController *discVMPolicy = new CVMStochasticPolicy(staticContActions, new CSoftMaxDistribution(10), rbfVFunction, dynModel, rewardFunction, agent->getStateModifiers());
CAgentController *contVMPolicy = new CContinuousTimeVMPolicy(staticContActions, new CSoftMaxDistribution(10), vFunctionInputDerivation, dynModel, rewardFunction);
CContinuousTimeAndActionSigmoidVMPolicy *contSigPolicy = new CContinuousTimeAndActionSigmoidVMPolicy(dynModel->getContinuousAction(), vFunctionInputDerivation, dynModel);
contSigPolicy->setParameter("SigmoidPolicyCFactor", 100);
CContinuousTimeAndActionBangBangVMPolicy *contBangBangPolicy = new CContinuousTimeAndActionBangBangVMPolicy(dynModel->getContinuousAction(), vFunctionInputDerivation, dynModel);
contSigPolicy->setRandomController(contExploration);
contBangBangPolicy->setRandomController(contExploration);
CContinuousActionPolicy *discAddPolicy = new CContinuousActionPolicy(dynModel->getContinuousAction(), new CSoftMaxDistribution(1000.0), qFunctionFromTransitionFunction, staticContActions, 5.5);
CContinuousActionPolicy *contAddPolicy = new CContinuousActionPolicy(dynModel->getContinuousAction(), new CSoftMaxDistribution(1000.0), contqFunctionFromTransitionFunction, staticContActions, 5.5);
contSigPolicy->setRandomController(contExploration);
contBangBangPolicy->setRandomController(contExploration);
addTestSuite(new CListenerTestSuite(agent, vLearnerDiscDirect, discVMPolicy, rbfVFunction, "VRBFDiscDirectDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerEulerDirect, discVMPolicy, rbfVFunction, "VRBFEulerDirectDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerCoulomDirect, discVMPolicy, rbfVFunction, "VRBFCoulomDirectDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerDiscConstBeta, discVMPolicy, rbfVFunction, "VRBFDiscConstBetaDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerCoulomConstBeta, discVMPolicy, rbfVFunction, "VRBFCoulomConstBetaDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerEulerConstBeta, discVMPolicy, rbfVFunction, "VRBFEulerConstBetaDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerDiscVarBeta, discVMPolicy, rbfVFunction, "VRBFDiscVarBetaDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerEulerVarBeta, discVMPolicy, rbfVFunction, "VRBFEulerVarBetaDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerCoulomVarBeta, discVMPolicy, rbfVFunction, "VRBFCoulomVarBetaDiscVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerDiscDirect, contVMPolicy, rbfVFunction, "VRBFDiscDirectContVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerEulerDirect, contVMPolicy, rbfVFunction, "VRBFEulerDirectContVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerCoulomDirect, contVMPolicy, rbfVFunction, "VRBFCoulomDirectContVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerDiscConstBeta, contVMPolicy, rbfVFunction, "VRBFDiscConstBetaContVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerCoulomConstBeta, contVMPolicy, rbfVFunction, "VRBFCoulomConstBetaContVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerEulerConstBeta, contVMPolicy, rbfVFunction, "VRBFEulerConstBetaContVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerDiscVarBeta, contVMPolicy, rbfVFunction, "VRBFDiscVarBetaContVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerEulerVarBeta, contVMPolicy, rbfVFunction, "VRBFEulerVarBetaContVMPolicy"));
addTestSuite(new CListenerTestSuite(agent, vLearnerCoulomVarBeta, contVMPolicy, rbfVFunction, "VRBFCoulomVarBetaContVMPolicy"));
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -