?? cagent.cpp

?? 強化學習算法（R-Learning）難得的珍貴資料
?? CPP
?? 第 1 頁 / 共 2 頁
字號:
上一頁 12
					interAction = currentEpisode->getAction(episodeIndex);
					mAction->getMultiStepActionData()->duration += interAction->getDuration();
				}
			}
			
			assert(mAction->getDuration() == oldDuration);
		}
	}
}

void CHierarchicalSemiMarkovDecisionProcess::setLoggedEpisode(CEpisode *loggedEpisode)
{
	currentEpisode = loggedEpisode;
}

/** Sends the next Step if the Hierarchical SMDP has executed an action (i.e. he is in the hierarchical ActoinStack).
Before it sends the step, the executed Action from the SMDP is calculated and the new Tuple S-A-S is send to the Listeners.
*/
void CHierarchicalSemiMarkovDecisionProcess::nextStep(CStateCollection *oldState, CHierarchicalStack *actionStack, CStateCollection *newState)
{
	CAction *currentAction = getExecutedAction(actionStack);

	if (currentAction != NULL)
	{
		if (isFirstStep)
		{
			pastState->setStateCollection(oldState);
			isFirstStep = false;
		}
		bool sendStep = !currentAction->isType(EXTENDEDACTION);
		if (! sendStep)
		{
			CExtendedAction *eAction = dynamic_cast<CExtendedAction *>(currentAction);

			sendStep = eAction->getMultiStepActionData()->finished;
		}
		if (sendStep)
		{
			currentState->setStateCollection(newState);
			sendNextStep(currentAction);
			CStateCollectionImpl *buffer = pastState;
			pastState = currentState;
			currentState = buffer;
		}
	}
	if (multiStepData->finished && currentSteps > 0)
	{
		startNewEpisode();
	}
}


void CHierarchicalSemiMarkovDecisionProcess::newEpisode()
{
	startNewEpisode();
}

CAction* CHierarchicalSemiMarkovDecisionProcess::getNextHierarchyLevel(CStateCollection *state, CActionDataSet *actionDataSet)
{
	return getNextAction(state, actionDataSet);
}

/** Returns the action following the SMDP in the hierarchical Action Stack
*/
CAction *CHierarchicalSemiMarkovDecisionProcess::getExecutedAction(CHierarchicalStack *actionStack)
{
	CHierarchicalStack::iterator it = actionStack->begin();
	while (it != actionStack->end() && (*it) != this)
	{
		it ++;
	}
	if (it == actionStack->end())
	{
		return NULL;
	}
	else
	{
		it ++;
		assert(it != actionStack->end());
		return *it;
	}
}

/** Creates a new Agent. A Episode Object is also instantiated automatically and added to the ListenerList, logging can be turned of by
setLoggedEpisode(bool)*/
CAgent::CAgent(CEnvironmentModel *model) : CSemiMarkovDecisionProcess(), CStateModifiersObject(model->getStateProperties())
{
    this->model = model;
    lastAction = NULL;
    
    setParameters(1, 5000);
	keyboardBreaks = false;

	currentEpisode = new CEpisode(model->getStateProperties(), actions);

	addSemiMDPListener(currentEpisode);
	bLogEpisode = true;

	currentState = new CStateCollectionImpl(model->getStateProperties());
	lastState = new CStateCollectionImpl(model->getStateProperties());

	modifiers = new std::list<CStateModifier *>;

	startNewEpisode();
}

CAgent::~CAgent()
{
	delete currentState;
	delete lastState;
	delete currentEpisode;
}

/** Tells the model which action to execute and then saves the new State. 
Send the oldState, the actoin and the newState as S-A-S Tuple to all Listeners (see CSemiMarkovDecisionProcess::sendNextStep(...)). 

There is a special treatment for actions of the type PRIMITIVEACTIONSTATECHANGE, @see CPrimitiveActionStateChanged  
*/
void CAgent::doAction(CAction *l_action) 
{
	CAction *action = l_action;
	
	/*if (action->isType(CONTINUOUSSTATICACTION))
	{
		CContinuousAction *contAction = dynamic_cast<CStaticContinuousAction *>(action)->getContinuousAction(); 
		action = dynamic_cast<CAction *>(contAction);
	}*/

	int index = actions->getIndex(action);
	
	assert(action != NULL && action->isType(PRIMITIVEACTION) && index >= 0);

	if (model->isReset())
	{
		startNewEpisode();
	}

	if (isFirstStep)
	{
		isFirstStep = false;
		model->getState(currentState);
		currentState->newModelState();

		if (DebugIsEnabled())
		{
			DebugPrint('+', "\nNew Episode (%d): ", this->getCurrentEpisodeNumber());
			DebugPrint('+', "start State: ");
			currentState->getState()->saveASCII(DebugGetFileHandle('+'));
			DebugPrint('+', "\n");
		}
	}

	CStateCollectionImpl *bufState = lastState;
	lastState = currentState;
	currentState = bufState;
		
	CPrimitiveAction* primAction = dynamic_cast<CPrimitiveAction*>(action);
	
	model->nextState(primAction);
	model->getState(currentState);
	
	lastAction = action;

	if (DebugIsEnabled())
	{
		DebugPrint('+', "\nNew Step (%d): ", this->getCurrentStep());
		DebugPrint('+', "oldState: ");
		lastState->getState()->saveASCII(DebugGetFileHandle('+'));
		DebugPrint('+', "action: %d ", actions->getIndex(action));
		if (action->getActionData())
		{
			action->getActionData()->saveASCII(DebugGetFileHandle('+'));
		}
		DebugPrint('+', "currentState: ");
		currentState->getState()->saveASCII(DebugGetFileHandle('+'));
		DebugPrint('+', "\n");

	}

	sendNextStep(lastState, action, currentState);
}

void CAgent::setLogEpisode(bool bLogEpisode)
{
	if (this->bLogEpisode != bLogEpisode)
	{
		this->bLogEpisode = bLogEpisode;
		if (bLogEpisode)
		{
			SMDPListeners->push_front(currentEpisode);
		}
		else
		{
			removeSemiMDPListener(currentEpisode);
		}
	}
}

void CAgent::startNewEpisode()
{
	model->resetModel();

	CSemiMarkovDecisionProcess::startNewEpisode();
}

int CAgent::doControllerEpisode(int maxEpisodes, int maxSteps)
{
    setParameters(maxEpisodes, maxSteps);
	return doRun(false);
}

void CAgent::setParameters(int maxEpisodes, int maxSteps)
{
    this->maxEpisodes = maxEpisodes;
	this->maxSteps = maxSteps;
	this->currentEpisodeNumber = 0;
	this->currentSteps = 0;
}

int CAgent::doResume()
{
	return doRun(true);
}

int CAgent::doRun(bool bContinue)
{
	bool keyhit = false;
	RIL_Toolbox_Set_Keypress();
	while (currentEpisodeNumber < maxEpisodes  && !keyhit)
	{
		if (model->isReset() || currentSteps >= maxSteps)
		{
			startNewEpisode();
		}
		if (currentEpisodeNumber == 0)
		{
			currentEpisodeNumber ++;
		}
		do 
		{        
            doControllerStep();
			if (keyboardBreaks) keyhit = RIL_Toolbox_KeyboardHit();
        }
		while (!model->isReset() && currentSteps < maxSteps && (!keyhit));	
    }
	if (keyhit)
	{
		RIL_Toolbox_Reset_Keypress();
		return -1;
	}
	else
	{
		RIL_Toolbox_Reset_Keypress();
        return currentSteps;
	}
}

void CAgent::doControllerStep()
{
	if (model->isReset())
	{
		startNewEpisode();
	}

	if (isFirstStep)
	{	
		isFirstStep = false;
		model->getState(currentState);
		currentState->newModelState();
		if (DebugIsEnabled())
		{
			DebugPrint('+', "\nNew Episode (%d): ", this->getCurrentEpisodeNumber());
			DebugPrint('+', "start State: ");
			currentState->getState()->saveASCII(DebugGetFileHandle('+'));
			DebugPrint('+', "\n");
		}
	}

	CAction *action = getNextAction(currentState);

	assert(action != NULL);

	action->loadActionData(actionDataSet->getActionData(action));

	doAction(action);
}

void CAgent::setKeyboardBreak(bool keyboardBreaks)
{
	this->keyboardBreaks = keyboardBreaks;
}

bool CAgent::getKeyboardBreak()
{
	return keyboardBreaks;
}

void CAgent::addAction(CPrimitiveAction *action)
{
	CSemiMarkovDecisionProcess::addAction(action);
}

void CAgent::addStateModifier(CStateModifier *modifier)
{
	lastState->addStateModifier(modifier);
	currentState->addStateModifier(modifier);

	currentEpisode->addStateModifier(modifier);

	CStateModifiersObject::addStateModifier(modifier);
}

void CAgent::removeStateModifier(CStateModifier *modifier)
{
	lastState->removeStateModifier(modifier);
	currentState->removeStateModifier(modifier);

	currentEpisode->removeStateModifier(modifier);

	CStateModifiersObject::removeStateModifier(modifier);
}

CEpisode *CAgent::getCurrentEpisode()
{
	return currentEpisode;
}

CStateCollection *CAgent::getCurrentState()
{
	return currentState;
}

CEnvironmentModel *CAgent::getEnvironmentModel()
{
	return model;
}
上一頁 12
?? 文件大小 12117 K
?? 上傳用戶 5201314
?? 所屬分類人工智能/神經網絡
??? 相關標簽

#R-Learning #學習算法
?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? cagent.cpp

?? 快捷鍵說明