?? xcs.c
字號:
/*
/ (XCS)
/ ------------------------------------
/ Learning Classifier System based on accuracy
/
/ by Martin Butz
/ University of Wuerzburg / University of Illinois at Urbana/Champaign
/ butz@illigal.ge.uiuc.edu
/ Last modified: 10-17-99
/
/ Main program
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
//#include <unistd.h>
#include <time.h>
//#include <resource.h>
#include "classifierList.h"
#include "actionSelection.h"
#include "xcs.h"
#include "env.h"
#include "xcsMacros.h"
int main(int args,char *argv[])
{
FILE *env_file=NULL;
/* set the priority */
// setpriority(PRIO_PROCESS, getpid(), 5);
/* randomize the pseudo-number generator */
randomize();
if(args!=2 && IS_MULTI_STEP){
printf("Usage: xcs.out FILE\n");
return 0;
}
if(args==2){
if ((env_file = fopen(argv[1], "rt"))
== NULL)
{
fprintf(stderr, "Cannot open file %s.\n",argv[1]);
return 0;
}
}
/* Initialize the environment (not always necessary) */
if(!initEnv(env_file))
return 0;
if(args==2)
fclose(env_file);
/* start the experiments */
startExperiments();
freeEnv();
return 1;
}
void startExperiments()
{
int expcounter;
struct xClassifierSet *pop;
FILE *tabFile;
/*Open files for statistics*/
if ((tabFile = fopen(TABOUTFILE, "wt"))
== NULL)
{
fprintf(stderr, "Cannot open file");
fprintf(stderr,TABOUTFILE);
return;
}
/* start the experiments */
for( expcounter=0 ; expcounter<NR_EXPS ; expcounter++ ) {
fprintf(tabFile,"Next Experiment\n");
/* Initialize the population */
pop=NULL;
if(INITIALIZE_POP)
pop=createRandomClassifierSet(CONDITION_LENGTH, ACTION_LENGTH);
if(IS_MULTI_STEP)
startOneMultiStepExperiment(tabFile, &pop);
else
startOneSingleStepExperiment(tabFile, &pop);
freeClassifierSet(&pop);
}
}
/* ########################## in a single step environment ########################## */
void startOneSingleStepExperiment(FILE *tabFile, struct xClassifierSet **pop)
{
int trialCounter, exploit=1;
int correct[50];
double sysError[50];
char state[CONDITION_LENGTH+1];
/* set the \0 char at the end of action and state */
state[CONDITION_LENGTH]='\0';
/* Start one experiment, trialCounter counts the number of problems (trials)*/
for( trialCounter=0 ; trialCounter<MAX_NR_STEPS ; trialCounter+=exploit) {
/* change from explore to exploit and backwards */
exploit= (exploit+1)%2;
resetState(state);
if(!exploit)
doOneSingleStepProblemExplore(pop, state, trialCounter);
else
doOneSingleStepProblemExploit(pop, state, trialCounter, &correct[trialCounter%50], &sysError[trialCounter%50]);
if( trialCounter%50==0 && exploit && trialCounter>0 ){
writePerformance(tabFile, *pop, correct, sysError, trialCounter);
}
/* write the trialCounter every 1000 trials, to see the progress */
if(trialCounter%1000==0 && exploit)
printf("%d\n",trialCounter);
}
}
void doOneSingleStepProblemExplore(struct xClassifierSet **pop, char *state, int trialCounter)
{
struct xClassifierSet *mset, *aset, *killset=NULL;
char action[ACTION_LENGTH+1];
double reward=0., predictionArray[NUMBER_OF_ACTIONS];
int correct;
/* get the match set */
mset=getMatchSet(state,pop,&killset,trialCounter);
/* no updates are necessary in this case */
freeSet(&killset);
/* get the Prediction array */
getPredictionArray(mset, predictionArray);
/* Get the action, that wins in the prediction array */
action[ACTION_LENGTH]='\0';
actionWinner(action, predictionArray);
/* Get the action set according to the chosen action aw */
aset = getActionSet(action, mset);
/* execute the action and get reward
* correct represents a boolean for the right or wrong action */
reward = doAction(state, aset->cl->act, &correct);
/* Give immediate reward */
adjustActionSet(aset,0,reward);
/* Exectue the discovery mechanism */
discoveryComponent(&aset,pop,&killset,trialCounter);
/* no update necessary here */
freeSet(&killset);
/* Clean up */
freeSet(&mset);
freeSet(&aset);
}
void doOneSingleStepProblemExploit(struct xClassifierSet **pop, char *state, int trialCounter,
int *correct, double *sysError)
{
struct xClassifierSet *mset, *aset, *killset=NULL;
char action[ACTION_LENGTH+1];
double reward=0., predictionArray[NUMBER_OF_ACTIONS];
/* get the match set*/
mset=getMatchSet(state,pop,&killset,trialCounter);
/* no updates are necessary in this case */
freeSet(&killset);
/* get the Prediction array */
getPredictionArray(mset, predictionArray);
/* Get the action, that wins in the prediction array */
action[ACTION_LENGTH]='\0';
deterministicActionWinner(action, predictionArray);
/* Get the action set according to the chosen action aw */
aset = getActionSet(action, mset);
/* execute the action and get reward
* correct represents a boolean for the right or wrong action */
reward = doAction(state, aset->cl->act, correct);
/* remember the system error */
*sysError=(double)(abs((int)(reward - predictionArray[getActInt(action)])))/(double)PAYMENT_RANGE;
/* Clean up */
freeSet(&mset);
freeSet(&aset);
}
/* ########################## in a multi step environment ########################## */
void startOneMultiStepExperiment(FILE *tabFile, struct xClassifierSet **pop)
{
int counter, trialCounter, exploit=0;
int stepToFood[50];
double sysError[50];
char state[CONDITION_LENGTH+1];
/* set the \0 char at the end of action and state */
state[CONDITION_LENGTH]='\0';
/* Start one experiment, trialCounter counts the number of exploit problems (trials)*/
for( trialCounter=0, counter=0 ; trialCounter<MAX_NR_STEPS ; trialCounter+=exploit) {
exploit= (exploit+1)%2;
if(!exploit)
doOneMultiStepProblemExplore(pop, state, &counter);
else
doOneMultiStepProblemExploit(pop, state, &counter, &stepToFood[trialCounter%50], &sysError[trialCounter%50]);
/* write out the performance every 50 trials */
if( trialCounter%50==0 && exploit && trialCounter>0 ){
writePerformance(tabFile, *pop, stepToFood, sysError, trialCounter);
}
/* write the trialCounter every 500 trials, to see the progress */
if(trialCounter%500==0 && exploit)
printf("%d\n",trialCounter);
}
}
void doOneMultiStepProblemExplore(struct xClassifierSet **pop, char *state, int *counter)
{
double reward=0., predictionArray[NUMBER_OF_ACTIONS];
char action[ACTION_LENGTH+1];
struct xClassifierSet *mset, *aset, *paset=NULL, *killset=NULL;
int stepCounter, reset=0;
/* set the \0 char at the end of action */
action[ACTION_LENGTH]='\0';
resetState(state);
/* Start one problem, stepCounter counts the number of steps executed */
for( stepCounter=0 ; stepCounter<TELETRANSPORTATION && !reset; stepCounter++, (*counter)++) {
/* get the match set and update the previous action set*/
mset=getMatchSet(state,pop,&killset,(*counter));
if( paset!=NULL)
updateSet(&paset,killset);
freeSet(&killset);
/* get the Prediction array */
getPredictionArray(mset, predictionArray);
/* Get the action, that wins in the prediction array */
actionWinner(action, predictionArray);
/* Get the action set according to the chosen action aw */
aset = getActionSet(action, mset);
/* execute the action and get reward */
reward = doAction(state, aset->cl->act, &reset);
/* Backpropagate the reward to the previous action set and apply the GA */
if( paset!=NULL){
adjustActionSet(paset,predictionArray[detActWinInt(predictionArray)],0);
discoveryComponent(&paset,pop,&killset,(*counter));
updateSet(&aset,killset);
freeSet(&killset);
}
/* Give immediate reward, if a reset will take place and apply the GA, too */
if( reset ){
adjustActionSet(aset,0,reward);
discoveryComponent(&aset,pop,&killset,(*counter));
updateSet(&aset,killset);
freeSet(&killset);
}
/* Clean up */
freeSet(&mset);
freeSet(&paset);
paset=aset;
}
freeSet(&paset);
}
void doOneMultiStepProblemExploit(struct xClassifierSet **pop, char *state, int *counter, int *stepToFood, double *sysError )
{
double reward=0., predictionArray[NUMBER_OF_ACTIONS], predictionValue, previousPrediction=0.;
char action[ACTION_LENGTH+1];
struct xClassifierSet *mset, *aset, *paset=NULL, *killset=NULL;
int stepCounter, reset=0;
/* set the \0 char at the end of action and init the sysError*/
action[ACTION_LENGTH]='\0';
*sysError=0;
resetState(state);
/* Start one problem, stepCounter counts the number of steps executed */
for( stepCounter=0 ; stepCounter<TELETRANSPORTATION && !reset ; stepCounter++) {
/* get the match set and update the previous action set*/
mset=getMatchSet(state,pop,&killset,(*counter));
if( paset!=NULL)
updateSet(&paset,killset);
freeSet(&killset);
/* get the Prediction array */
getPredictionArray(mset, predictionArray);
/* Get the action, that wins in the prediction array */
deterministicActionWinner(action, predictionArray);
predictionValue= predictionArray[detActWinInt(predictionArray)];
/* Get the action set according to the chosen action aw */
aset = getActionSet(action, mset);
/* execute the action and get reward */
reward = doAction(state, aset->cl->act, &reset);
/* Give immediate reward, if a reset will take place */
if( reset ){
adjustActionSet(aset,0,reward);
(*sysError) += (double)(abs((int)(reward - predictionValue)))/(double)PAYMENT_RANGE;
}
/* Backpropagate the reward to the previous action set */
if( paset!=NULL){
adjustActionSet(paset,predictionArray[detActWinInt(predictionArray)],0);
(*sysError) += (double)(abs((int)(GAMMA*predictionValue - previousPrediction))) / (double)PAYMENT_RANGE;
}
/* remind the prediction for the system Error */
previousPrediction=predictionValue;
/* Clean up */
freeSet(&mset);
freeSet(&paset);
paset=aset;
}
freeSet(&paset);
*stepToFood=stepCounter;
(*sysError)/=stepCounter;
}
/* writes the performance averaged over the last 50 trials */
void writePerformance(FILE *tabFile,struct xClassifierSet *pop, int *correct,double *sysError,int counter)
{
double corr=0.,serr=0.;
int i, popsize;
for( popsize=0 ; pop!=NULL ; pop=pop->next, popsize++ );/* Just count the size of the population */
for(i=0;i<50;i++){
corr+=correct[i];
serr+=sysError[i];
}
corr/=50.;
serr/=50.;
fprintf(tabFile,"%d;%f;%f;%f\n",counter,corr,serr,(double)popsize/PAYMENT_RANGE);
}
/* randomize the pseudo random generator */
void randomize(void)
{
int i;
for (i=0;i<time(NULL)%1000;rand(),i++);
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -