?? bayesian.cc
字號:
// ################################################################################
//
// name: bayesian.cc
//
// author: Martin Pelikan
//
// purpose: functions for construction and use of bayesian networks
// (not including the metric related functions and some of more
// specific functions defined elsewhere)
//
// last modified: February 1999
//
// ################################################################################
#include <string.h>
#include "bayesian.h"
#include "boa.h"
#include "population.h"
#include "graph.h"
#include "memalloc.h"
#include "recomputeGains.h"
#include "checkCycles.h"
#include "utils.h"
#include "computeCounts.h"
#include "binary.h"
#include "random.h"
// ================================================================================
//
// name: constructTheNetwork
//
// function: constructs the network for a given population of strings (data
// set), and stores it into the graph structure, satisfying the
// maximal number of incoming edges constraint defined in the
// parameters passed to the BOA; uses a simple greedy algorithm
// with only edge additions allowed adding edges which increase
// the network most (positively) and do not violate constraints;
// starts with an empty network
//
// parameters: population...the population of strings to model (a data set)
// G............where the resulting network is to be stored
// boaParams....the parameters sent to the BOA
//
// returns: (int) 0
//
// ================================================================================
int constructTheNetwork(Population *population, AcyclicOrientedGraph *G, BoaParams *boaParams)
{
int k,l;
int maxNumberOfEdges,numberOfNodes,maxNumberOfIncomingEdges;
int numAdded;
char finito;
float **gain;
char *full;
int maxFrom, maxTo;
float maxGain;
// empty the graph
G->removeAllEdges();
// if no incoming edges allowed, the empty network is the solution
if (boaParams->maxIncoming==0)
return 0;
// set some variables
numberOfNodes = G->size();
maxNumberOfIncomingEdges = boaParams->maxIncoming;
// maximal number of edges is determined by the number of nodes and the maximal number of incoming edges
// into each node
maxNumberOfEdges = numberOfNodes*maxNumberOfIncomingEdges;
// allocate the array for the gains with respect to the addition of each of the edges
gain = (float**) Calloc(numberOfNodes,sizeof(float*));
for (k=0; k<numberOfNodes; k++)
gain[k] = (float*) Calloc(numberOfNodes,sizeof(float));
// allocate memory for the array containing information about the full nodes (no more edge can go in those)
full = (char*) Malloc(numberOfNodes);
// a greedy algorithm for the network constrution -----------------------------------------------
finito = 0;
// recompute the gains for edges into all nodes and set each node as not full yet
for (k=0; k<numberOfNodes; k++)
{
full[k] = 0;
recomputeGains(k,gain,full,G,population);
}
for (numAdded=0; (numAdded<maxNumberOfEdges)&&(!finito); numAdded++)
{
maxGain = -1;
maxFrom = 0;
maxTo = 0;
// find the best edge addition (from maxFrom to maxTo with the increase of a metric maxGain)
for (k=0; k<numberOfNodes; k++)
for (l=0; l<numberOfNodes; l++)
if (k!=l)
{
// what about an edge from k to l?
if (gain[k][l]>maxGain)
{
maxFrom = k;
maxTo = l;
maxGain = gain[k][l];
};
}
// add an edge with the best gain over all edge additions if its gain is positive, in the other case
// the greedy algorithm stops
if (maxGain>0)
{
// add the new edge
G->addEdge(maxFrom,maxTo);
// the edge can't be added anymore
gain[maxFrom][maxTo] = gain[maxTo][maxFrom] = -1;
// edges that would create cycles with the new edge have to be disabled
checkCycles(maxFrom,maxTo,gain,G);
// is the ending node of the new edge full yet?
full[maxTo]=(G->getNumIn(maxTo)==boaParams->maxIncoming);
// recompute the gains that are needed
recomputeGains(maxTo,gain,full,G,population);
}
else
finito = 1;
};
// ----------------------------------------------------------------------------------------------
// free the memory used by the gain array
for (k=0; k<numberOfNodes; k++)
Free(gain[k]);
Free(gain);
// free memory used by an auxilary array "full"
Free(full);
// get back
return 0;
};
// ================================================================================
//
// name: generateNewInstances
//
// function: generates new instances of the variables (strings) according to
// the joint distribution encoded by the constructed network and the
// conditional probabilities in a modeled data set
//
// parameters: parents......the modeled population
// offspring....where the new instances should be stored
// G............the network used as a model
// boaParams....the parameters sent to the BOA
//
// returns: (int) 0
//
// ================================================================================
int generateNewInstances(Population *parents,
Population *offspring,
AcyclicOrientedGraph *G,
BoaParams *boaParams)
{
long i;
long N,M;
int k,l;
int numberOfNodes;
int **group, *groupSize;
long *numGroupInstances,*numGroupInstances0,maxNumGroupInstances;
float **marginalAll, **marginalAllButOne;
long *count;
char *added;
int numAdded;
char canAdd;
char *x;
int where;
int position;
float prob1;
// set some variables
numberOfNodes = boaParams->n;
N = parents->N;
M = offspring->N;
// allocate the memory for the matrix of groups, their sizes and marginal frequencies
group = (int**) Calloc(numberOfNodes,sizeof(int*));
groupSize = (int*) Calloc(numberOfNodes,sizeof(int));
numGroupInstances = (long*) Calloc(numberOfNodes,sizeof(long));
numGroupInstances0 = (long*) Calloc(numberOfNodes,sizeof(long));
marginalAll = (float**) Calloc(numberOfNodes,sizeof(float*));
marginalAllButOne = (float**) Calloc(numberOfNodes,sizeof(float*));
for (k=0; k<numberOfNodes; k++)
group[k]=(int*) Calloc(numberOfNodes,sizeof(int));
// allocate the memory for some auxilary array used for ordering vertices topologically
added = (char*) Malloc(numberOfNodes);
// create the array of incoming edges numbers for all vartices (with the node
// on the first position) and the number of them.
// + allocate the memory for marginal frequencies
maxNumGroupInstances = 0;
for (k=0; k<numberOfNodes; k++)
{
// create k-th group (corresponding to the k-th variable)
group[k][0]=k;
if (G->getNumIn(k)>0)
memcpy(&(group[k][1]),G->getParentList(k),G->getNumIn(k)*sizeof(int));
groupSize[k]=G->getNumIn(k)+1;
// compute the number of its instances
numGroupInstances[k] = 1<<groupSize[k];
numGroupInstances0[k] = numGroupInstances[k]>>1;
// allocate the memory for marginals and counts
marginalAll[k] = (float*) Calloc(numGroupInstances[k],sizeof(float));
marginalAllButOne[k] = (float*) Calloc(numGroupInstances0[k],sizeof(float));
// update maxNumGroupInstances
if (numGroupInstances[k]>maxNumGroupInstances)
maxNumGroupInstances=numGroupInstances[k];
}
count = (long*) Calloc(maxNumGroupInstances,sizeof(long));
// topologically order nodes (and the corresponding groups of the nodes and their parents)
for (k=0; k<numberOfNodes; k++)
added[k]=0;
numAdded=0;
while (numAdded<numberOfNodes)
{
for (k=numAdded; k<numberOfNodes; k++)
{
canAdd=1;
for (l=1; l<groupSize[k]; l++)
if (!added[group[k][l]])
canAdd=0;
if (canAdd)
{
added[group[k][0]]=1;
if (k!=numAdded)
{
swapPointers((void**) &(group[k]),(void**) &(group[numAdded]));
swapInt(&(groupSize[k]), &(groupSize[numAdded]));
swapLong(&(numGroupInstances[k]),&(numGroupInstances[numAdded]));
swapLong(&(numGroupInstances0[k]),&(numGroupInstances0[numAdded]));
swapPointers((void**) &(marginalAll[k]), (void**) &(marginalAll[numAdded]));
swapPointers((void**) &(marginalAllButOne[k]), (void**) &(marginalAllButOne[numAdded]));
}
numAdded++;
}
}
}
// calculate the marginal frequencies for created groups of positions
for (k=0; k<numberOfNodes; k++)
{
computeCounts(group[k],groupSize[k],parents,count);
for (l=0; l<numGroupInstances[k]; l++)
marginalAll[k][l] = (float) count[l]/N;
for (l=0; l<numGroupInstances0[k]; l++)
marginalAllButOne[k][l] = marginalAll[k][l]+marginalAll[k][l+numGroupInstances0[k]];
};
// generate the ith coordinate (chromosome) for all dhildren
for (i=0; i<M; i++)
{
x = offspring->x[i];
for (k=0; k<numberOfNodes; k++)
{
position = group[k][0];
if (groupSize[k]==1)
prob1 = marginalAll[k][1];
else
{
x[position] = 0;
where = indexedBinaryToInt(x,group[k],groupSize[k]);
prob1 = 1-marginalAll[k][where]/marginalAllButOne[k][where];
}
if (drand()<prob1)
x[position]=1;
else
x[position]=0;
}
}
// free the memory used by marginal frequencies (for groups of bits) and the "count" array
for (k=0; k<numberOfNodes; k++)
{
Free(marginalAll[k]);
Free(marginalAllButOne[k]);
};
Free(count);
// free the memory used by the matrix of groups, their sizes and marginal frequencies
for (k=0; k<numberOfNodes; k++)
Free(group[k]);
Free(group);
Free(groupSize);
Free(numGroupInstances);
Free(numGroupInstances0);
Free(marginalAll);
Free(marginalAllButOne);
// get back
return 0;
};
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -