?? cacrobotmodel.cpp
字號:
// Copyright (C) 2003
// Gerhard Neumann (gerhard@igi.tu-graz.ac.at)
//
// This file is part of RL Toolbox.
// http://www.igi.tugraz.at/ril_toolbox
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cacrobotmodel.h"
#include <math.h>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
CAcroBotModel::CAcroBotModel(rlt_real dt, rlt_real uMax, rlt_real length1, rlt_real length2, rlt_real mass1, rlt_real mass2, rlt_real mu_1, rlt_real mu_2, rlt_real g) : CDynamicLinearActionContinuousTimeModel(new CStateProperties(4, 0),new CContinuousAction(new CContinuousActionProperties(1)), dt)
{
this->uMax = uMax;
this->length1 = length1;
this->length2 = length2;
this->mass1 = mass1;
this->mass2 = mass2;
this->mu_1 = mu_1;
this->mu_2 = mu_2;
this->g = g;
properties->setMaxValue(0, M_PI);
properties->setMinValue(0, -M_PI);
properties->setPeriodicity(0, true);
properties->setMaxValue(1, M_PI * 2);
properties->setMinValue(1, -M_PI * 2);
properties->setMaxValue(2, M_PI);
properties->setMinValue(2, -M_PI);
properties->setPeriodicity(2, true);
properties->setMaxValue(3, M_PI * 3);
properties->setMinValue(3, -M_PI * 3);
contAction->getContinuousActionProperties()->setMaxActionValue(0,uMax);
contAction->getContinuousActionProperties()->setMinActionValue(0,-uMax);
}
CAcroBotModel::~CAcroBotModel()
{
delete properties;
delete actionProp;
delete contAction;
}
CMyMatrix *CAcroBotModel::getB(CState *state)
{
rlt_real Phi1 = state->getContinuousState(0);
// rlt_real dPhi1 = state->getContinuousState(1);
rlt_real Phi2 = state->getContinuousState(2) + Phi1;
// rlt_real dPhi2 = state->getContinuousState(3);
rlt_real denum = 16 * pow(length2/length1, 2) * (mass1 * mass2 / 9 + pow(mass2, 2) / 3)
- 4 * pow(length1 * length2*mass2*cos(Phi1-Phi2), 2);
rlt_real dPhi1_U = 4/3* pow(length2, 2) * mass2 * (-1) - 2 * length1 * length2 * mass2 * cos(Phi1 - Phi2);
rlt_real dPhi2_U = 2 * length1 * length2 * mass2 * cos(Phi1 - Phi2) + 4*(mass1/3 + mass2) / pow(length1, 2);
B->initMatrix(0.0);
B->setElement(1, 0, dPhi1_U / denum);
B->setElement(3, 0, dPhi2_U / denum);
return B;
}
CMyVector *CAcroBotModel::getA(CState *state)
{
rlt_real Phi1 = state->getContinuousState(0);
rlt_real dPhi1 = state->getContinuousState(1);
rlt_real Phi2 = state->getContinuousState(2) + Phi1;
rlt_real dPhi2 = state->getContinuousState(3);
rlt_real denum = 16 * pow(length2/length1, 2) * (mass1 * mass2 / 9 + pow(mass2, 2) / 3)
- 4 * pow(length1 * length2*mass2*cos(Phi1-Phi2), 2);
rlt_real b1, b2;
b1 = 2 * mass2 * length2 * length1 * pow(dPhi2,2) * sin(Phi2-Phi1) + (mass1 + 2 * mass2) * sin(Phi1) * length1* g - mu_1 * dPhi1;
b2 = 2 * mass2 * length2 * length1 * pow(dPhi1,2) * sin(Phi1-Phi2) + mass2 * sin(Phi2) * length2 * g - mu_2 * dPhi2;
rlt_real ddPhi1 = 4/3* pow(length2, 2) * mass2 * b1 - 2 * length1 * length2 * mass2 * cos(Phi1 - Phi2) * b2;
rlt_real ddPhi2 = - 2 * length1 * length2 * mass2 * cos(Phi1 - Phi2) * b1 + 4*(mass1/3 + mass2) / pow(length1, 2) * b2;
ddPhi1 = ddPhi1 / denum;
ddPhi2 = ddPhi2 / denum;
A->setElement(0, dPhi1);
A->setElement(1, ddPhi1);
A->setElement(2, dPhi2);
A->setElement(3, ddPhi2);
return A;
}
bool CAcroBotModel::isFailedState(CState *state)
{
return false;
}
void CAcroBotModel::doSimulationStep(CState *state, rlt_real timestep, CAction *action, CActionData *data)
{
getDerivationX(state, action, derivation, data);
rlt_real ddPhi1 = derivation->getElement(1);
rlt_real ddPhi2 = derivation->getElement(3);
for (unsigned int i = 0; i < state->getNumContinuousStates(); i++)
{
state->setContinuousState(i, state->getContinuousState(i) + timestep * derivation->getElement(i));
}
state->setContinuousState(0, state->getContinuousState(0) + pow(timestep, 2) * ddPhi1 / 2);
state->setContinuousState(2, state->getContinuousState(2) + pow(timestep, 2) * ddPhi2 / 2);
}
void CAcroBotModel::getResetState(CState *state)
{
CTransitionFunction::getResetState(state);
state->setContinuousState(1, 0.0);
state->setContinuousState(3, 0.0);
// state->setContinuousState(2, state->getContinuousState(2) - state->getContinuousState(0));
}
CAcroBotModelSutton::CAcroBotModelSutton(rlt_real dt, rlt_real uMax, rlt_real length1, rlt_real length2, rlt_real mass1, rlt_real mass2, rlt_real I1, rlt_real I2, rlt_real g) : CAcroBotModel(dt, uMax, length1, length2, mass1, mass2, 0.05, 0.05, g)
{
this->I1 = I1;
this->I2 = I2;
}
CAcroBotModelSutton::~CAcroBotModelSutton()
{
}
{
rlt_real Phi1 = state->getContinuousState(0) + M_PI;
rlt_real Phi2 = state->getContinuousState(2);
rlt_real d1 = mass1 * pow(length1 / 2.0, 2) + mass2 * (pow(length1, 2) + pow(length2 / 2.0, 2) + length1 * length2 * cos(Phi2)) + I1 + I2;
rlt_real d2 = mass2 * (pow(length2 / 2.0, 2) + length1 * length2 / 2.0 * cos(Phi2)) + I2;
rlt_real dPhi2_U = 1 / ( mass2 * pow(length2 / 2.0, 2) + I2 - pow(d2,2) / d1);
rlt_real dPhi1_U = - d2 / d1 * dPhi2_U;
B->initMatrix(0.0);
B->setElement(1, 0, dPhi1_U);
B->setElement(3, 0, dPhi2_U);
return B;
}
CMyVector *CAcroBotModelSutton::getA(CState *state)
{
rlt_real Phi1 = state->getContinuousState(0) - M_PI;
rlt_real Phi2 = state->getContinuousState(2);
rlt_real dPhi1 = state->getContinuousState(1);
rlt_real dPhi2 = state->getContinuousState(3);
rlt_real d1 = mass1 * pow(length1/ 2.0, 2) + mass2 * (pow(length1, 2) + pow(length2 / 2.0, 2) + length1 * length2 * cos(Phi2)) + I1 + I2;
rlt_real d2 = mass2 * (pow(length2 / 2.0, 2) + length1 * length2 / 2.0 * cos(Phi2)) + I2;
rlt_real f2 = mass2 * length2 / 2.0 * g * cos(Phi1 + Phi2 - M_PI / 2.0);
rlt_real f1 = - mass2 * length1 * length2 / 2.0 * pow(dPhi2, 2) * sin(Phi2) - 2 * mass2 * length1 * length2 / 2.0 * dPhi1 * dPhi2 * sin(Phi2);
f1 += (mass1 * length1 / 2.0 + mass2 * length1) * g * cos(Phi1 - M_PI / 2) + f2;
rlt_real ddPhi2 = 1 / ( mass2 * pow(length2 / 2.0, 2) + I2 - pow(d2,2) / d1) * (d2 / d1 * f1 - mass2 * length1 * length2 / 2.0 * pow(dPhi1, 2.0) * sin(Phi2) - f2);
rlt_real ddPhi1 = - (d2 * ddPhi2 + f1) / d1;
A->setElement(0, dPhi1);
A->setElement(1, ddPhi1);
A->setElement(2, dPhi2);
A->setElement(3, ddPhi2);
return A;
}
CAcroBotRewardFunction::CAcroBotRewardFunction(CAcroBotModel *model, rlt_real segmentFactor) : CStateReward(model->getStateProperties())
{
this->model = model;
useHeighPeak = true;
this->segmentFactor = segmentFactor;
power = 1.0;
}
rlt_real CAcroBotRewardFunction::getStateReward(CState *state)
{
rlt_real Phi1 = state->getContinuousState(0);
rlt_real Phi2 = state->getContinuousState(2);
rlt_real height = 2 *(segmentFactor * model->length1 * (cos(Phi1) + 1) + (1 - segmentFactor) * model->length2 * ( cos(Phi2 + Phi1) + 1)) / (model->length1 + model->length2) / 2.0;
rlt_real reward = 2 * (pow(height, power) - 1.0);
if (useHeighPeak)
{
rlt_real dreward = 0.5 * exp((-pow(Phi1, 2.0) - pow(Phi2, 2.0)) * 25);
reward += dreward;
}
return reward;
}
void CAcroBotRewardFunction::getInputDerivation(CState *modelState, CMyVector *targetState)
{
rlt_real Phi1 = modelState->getState(properties)->getContinuousState(0);
rlt_real Phi2 = modelState->getState(properties)->getContinuousState(2);
targetState->setElement(0, - segmentFactor * model->length1 * sin(Phi1));
targetState->setElement(1, 0);
targetState->setElement(2, -(1 - segmentFactor) * model->length2 * sin(Phi2 + Phi1));
targetState->setElement(3, 0);
if (useHeighPeak)
{
targetState->setElement(0, targetState->getElement(0) - 25 * (Phi1) * exp( (-pow(Phi1, 2.0) - pow(Phi2, 2.0)) * 25));
targetState->setElement(2, targetState->getElement(2) - 25 * (Phi1) * exp( (-pow(Phi1, 2.0) - pow(Phi2, 2.0)) * 25));
}
}
CAcroBotHeightRewardFunction::CAcroBotHeightRewardFunction(CAcroBotModel *model) : CStateReward(model->getStateProperties())
{
this->model = model;
useHeighPeak = true;
}
rlt_real CAcroBotHeightRewardFunction::getStateReward(CState *state)
{
rlt_real Phi1 = state->getContinuousState(0);
rlt_real Phi2 = state->getContinuousState(2);
rlt_real reward = model->length1 * (cos(Phi1) - 1) + model->length2 * ( cos(Phi2 + Phi1) - 1);
return reward;
}
void CAcroBotHeightRewardFunction::getInputDerivation(CState *modelState, CMyVector *targetState)
{
rlt_real Phi1 = modelState->getState(properties)->getContinuousState(0);
rlt_real Phi2 = modelState->getState(properties)->getContinuousState(2);
targetState->setElement(0, - model->length1 * sin(Phi1));
targetState->setElement(1, 0);
targetState->setElement(2, - model->length2 * sin(Phi2 + Phi1));
targetState->setElement(3, 0);
}
CAcroBotExpRewardFunction::CAcroBotExpRewardFunction(CAcroBotModel *l_model, rlt_real l_expFactor) : CStateReward(l_model->getStateProperties())
{
this->model = l_model;
this->expFactor = l_expFactor;
}
rlt_real CAcroBotExpRewardFunction::getStateReward(CState *state)
{
rlt_real Phi1 = state->getContinuousState(0);
rlt_real Phi2 = state->getContinuousState(2);
rlt_real reward = model->length1 * (cos(Phi1) - 1) + model->length2 * ( cos(Phi2 + Phi1) - 1);
reward = -1.0 + exp(expFactor * reward);
return reward;
}
void CAcroBotExpRewardFunction::getInputDerivation(CState *modelState, CMyVector *targetState)
{
}
CAcroBotVelocityRewardFunction::CAcroBotVelocityRewardFunction(CAcroBotModel *model) : CStateReward(model->getStateProperties())
{
this->model = model;
invertVelocity = false;
}
rlt_real CAcroBotVelocityRewardFunction::getStateReward(CState *state)
{
rlt_real reward = fabs(state->getContinuousState(1));
if (! invertVelocity)
{
reward = - 1.0 + reward;
}
else
{
reward = - reward;
}
return reward;
}
void CAcroBotVelocityRewardFunction::getInputDerivation(CState *modelState, CMyVector *targetState)
{
}
#ifdef RL_TOOLBOX_USE_QT
CQTAcroBotVisualizer::CQTAcroBotVisualizer( CAcroBotModel *acroModel, QWidget *parent, const char *name) : CQTModelVisualizer(parent, name)
{
this->acroModel = acroModel;
phi1 = 0;
dphi1 = 0;
phi2 = 0;
dphi2 = 0;
}
void CQTAcroBotVisualizer::doDrawState( QPainter *painter)
{
QString s1 = "Phi1 = " + QString::number( phi1 );
QString s2 = "Phi1' = " + QString::number( dphi1 );
QString s3 = "Phi2 = " + QString::number( phi2 );
QString s4 = "Phi2' = " + QString::number( dphi2 );
painter->drawText(10,20, s1);
painter->drawText(10,40, s2);
painter->drawText(10,60, s3);
painter->drawText(10,80, s4);
painter->translate(this->width() / 2, this->height() / 2);
painter->rotate(- phi1 + 180);
painter->drawRect(- 5, -5, 10, (acroModel->length1 * 100) + 10);
painter->translate(0, acroModel->length1 * 100);
painter->rotate(- phi2);
painter->drawRect(- 5, -5, 10, (acroModel->length2 * 100) + 10);
painter->flush();
}
void CQTAcroBotVisualizer::newDrawState(CStateCollection *state)
{
phi1 = state->getState()->getContinuousState(0) * 180 / M_PI;
dphi1 = state->getState()->getContinuousState(1) * 180 / M_PI;
phi2 = state->getState()->getContinuousState(2) * 180 / M_PI;
dphi2 = state->getState()->getContinuousState(3) * 180 / M_PI;
}
#endif
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -