?? myhmm.c
字號:
/* basic condition in forward algorithm */
t=0;
iObservation = getObservation( line[t] );
for(i=0; i < N; i++)
{
alpha[t][i] = pi[i]*emissions[i][ iObservation ];
}
/* the induction step in forward algorithm */
for(t=1; t < length; t++)
{
iObservation = getObservation( line[t] );
for(i=0; i < N; i++)
{
sumTransProb = 0;
for(j=0; j < N; j++)
{
sumTransProb = sumTransProb + alpha[t-1][j] * transitions[j][i];
}
alpha[t][i] = sumTransProb * emissions[i][iObservation];
}
}
/* the termination step */
t = length-1;
prob = 0;
for(i=0; i < N; i++)
{
prob = prob + alpha[t][i];
}
return prob;
}
/************************************************************************
NAME
backward - backward algorithm
DESCRIPTION
This function calculates the probability of the observation sequence
with backward algorithm given the HMM model.
Input:
HMM model & the observation sequence
Output:
Probability
Global variables list:
N, beta, pi, transitions, emissions.
*************************************************************************/
double backward(char *line)
{
int i, j, t;
int length = strlen( line );
/* the order of the real observation in the observation set */
int iObservation;
/* probability to state i given the sequence and the observation */
double sumProbi;
double prob;
/* basic condition in backward algorithm */
t = length-1;
for(i=0; i < N; i++)
{
beta[t][i] = 1;
}
/* the induction step in backward algorithm */
for(t=length-2; t >= 0; t--)
{
iObservation = getObservation( line[t+1] );
for(i=0; i < N; i++)
{
sumProbi = 0;
for(j=0; j < N; j++)
{
sumProbi = sumProbi + transitions[i][j]*emissions[j][iObservation] * beta[t+1][j];
}
beta[t][i] = sumProbi;
}
}
/* the termination step */
t = 0;
iObservation = getObservation( line[t] );
prob = 0;
for(i=0; i < N; i++)
{
prob = prob + pi[i]*emissions[i][iObservation]*beta[t][i];
}
return prob;
}
/************************************************************************
NAME
viterbi - viterbi algorithm
DESCRIPTION
This function calculates the most probable state path of the
observation sequence with viterbi algorithm given the HMM model.
Input:
HMM model & the observation sequence
Output:
the most probable state path
Global variables list:
N, beta, pi, transitions, emissions.
*************************************************************************/
void viterbi(char *line, char *buffer)
{
/* delta_t0[ N ] */
double* delta_t0;
/* delta_t1[ N ] */
double* delta_t1;
/* the matrix to store the optimal states, used for backtracking
size: N * length */
int *phi;
/* position in the matrix phi */
int pos;
int length = strlen( line );
int i, j, t;
int fromState, inState;
int iObservation;
double sum_for_scale;
double maxProb, currentProb;
/* allocate space for buffer */
delta_t0 = (double*)malloc(N*sizeof(double));
delta_t1 = (double*)malloc(N*sizeof(double));
phi = (int*)malloc(length * N * sizeof( int ));
/* the initial step */
iObservation = getObservation( line[0] );
for(i=0; i < N; i++)
{
delta_t0[i] = pi[i] * emissions[i][iObservation];
phi[i] = 0;
}
/* recursion step */
pos = N;
for(t=1; t < length; t++)
{
iObservation = getObservation( line[t] );
sum_for_scale = 0;
for(j=0; j < N; j++)
{
/* find the optimal state from which transfered to state j */
maxProb = delta_t0[0] * transitions[0][j];
fromState = 0;
for(i=1; i < N; i++)
{
currentProb = delta_t0[i] * transitions[i][j];
if(maxProb < currentProb)
{
maxProb = currentProb;
fromState = i;
}
}
/* calculate the transition probability */
delta_t1[j] = emissions[j][iObservation] * maxProb;
sum_for_scale = sum_for_scale + delta_t1[j];
phi[ pos++ ] = fromState;
}
/* update current probabilities */
for(j=0; j < N; j++)
{
if(sum_for_scale < 0.1)
delta_t0[j] = delta_t1[j] * 10;
else
delta_t0[j] = delta_t1[j];
}
}
/* termination */
maxProb = delta_t0[0];
inState = 0;
for(i=1; i < N; i++)
{
currentProb = delta_t0[i];
if(maxProb < currentProb)
{
maxProb = currentProb;
inState = i;
}
}
/* path backtracking */
i = inState;
pos = length * N;
for(t=length-1; t >= 0 ; t--)
{
buffer[ t ] = getSymbol( i );
pos = pos - N;
if(pos+i < 0)
{
printf("Error in the subscribe 1: pos = %d, statei = %d\n", pos, i);
exit(1);
}
if(pos+i >= length * N)
{
printf("Error in the subscribe 2\n");
exit(1);
}
i = phi[pos + i];
}
buffer[length] = '\0';
free(delta_t0);
free(delta_t1);
free( phi );
return;
}
/************************************************************************
NAME
baumWelch - baumWelch algorithm
DESCRIPTION
This function estimates the parameters of HMM with specified topology
given the training data and initial parameters of HMM
Input:
HMM topology, HMM initial parameters, and training data
Output:
the estimated parameters
Global variables list:
iTrain, trainData, T, M, N, alpha, beta, pi, transitions, emissions.
*************************************************************************/
void baumWelch()
{
int loops;
int i, j, k, t;
int iObservation;
int length;
double** _transitions; /* _transitions[ N ][ N ] estimated transition matrix T */
double** _emissions; /* _emissions[ N ][ M ] estimated emission matrix E */
double* _pi; /* _pi[ N ] estimated initial distribution */
double oldLikelihood, Likelihood;
double error;
double sumGamma1;
double* gamma1; // gamma1[N]: for estimating initial distribution of the states
double** Eij; // Eij[N][N]: expected numbers transited from state i to state j
double** Eia; // Eia[N][M]: expected numbers of emitting a in state i
/* X_d_t[N][N]: expected numbers transited from state i to state j
when it is with observation O in the position t of d-th sequence */
double** X_d_t;
double* ei; // ei[N]: expected numbers in state i
double* ej; // ej[N]: expected numbers of emissions in state j
double Prob;
double temp, temp2, tempM;
/* allocate space */
AllocateDataSpace( &_transitions, N, N );
AllocateDataSpace( &_emissions, N, M );
_pi = (double*)malloc(N*sizeof(double));
gamma1 = (double*)malloc(N*sizeof(double));
AllocateDataSpace( &Eij, N, N );
AllocateDataSpace( &Eia, N, M );
AllocateDataSpace( &X_d_t, N, N );
ei = (double*)malloc(N*sizeof(double));
ej = (double*)malloc(N*sizeof(double));
/* initialization */
error = 1;
oldLikelihood = forward( trainData[0] );
Likelihood = backward( trainData[0] );
for(k=1; k < iTrain; k++)
{
oldLikelihood = oldLikelihood + forward(trainData[k]);
Likelihood = Likelihood + backward(trainData[k]);
}
/* check whether forward and backward algorithms work well */
if(fabs((Likelihood-oldLikelihood)/Likelihood) > 0.01)
{
printf("there are errors in forward or backward algorithm\n");
exit(1);
}
Likelihood = oldLikelihood;
printf("Likelihood from forward: %e\n", Likelihood);
loops = 0;
while((error > threshold && loops < 1000) && Likelihood > threshold)
{
printf("loops: %d\n", loops);
loops++;
/* initialize the expected matrix */
for(i=0; i < N; i++)
{
gamma1[i] = 0;
for(j=0; j < N; j++)
{
Eij[i][j] = 0;
}
for(j=0; j < M; j++)
{
Eia[i][j] = 0;
}
}
// loop for all the sequences
for(k=0; k < iTrain; k++)
{
// initialize the matrix alpha and beta
Prob = backward(trainData[k]);
Prob = forward(trainData[k]);
length = strlen(trainData[k]);
// loop for one sequence
for(t=0; t < length-1; t++)
{
iObservation = getObservation(trainData[k][t+1]);
for(i=0; i < N; i++)
{
for(j=0; j < N; j++)
{
X_d_t[i][j] = (alpha[t][i] * transitions[i][j] * emissions[j][iObservation] * beta[t+1][j]) / Prob;
Eij[i][j] = Eij[i][j] + X_d_t[i][j];
Eia[j][iObservation] = Eia[j][iObservation] + X_d_t[i][j];
if(t == 0)
{
gamma1[i] = gamma1[i] + X_d_t[i][j];
}
}
}
}
}
sumGamma1 = 0;
for(i=0; i < N; i++)
{
sumGamma1 = sumGamma1 + gamma1[i];
ei[i] = Eij[i][0];
for(j=1; j < N; j++)
{
ei[i] = ei[i] + Eij[i][j];
}
ej[i] = Eia[i][0];
for(j=1; j < M; j++)
{
ej[i] = ej[i] + Eia[i][j];
}
}
temp = 1.0 / (double)N;
temp2= temp/ (double)N;
tempM= 1.0 / (double)M;
for(i=0; i < N; i++)
{
// estimate initial distribution
if(sumGamma1 < threshold2)
_pi[i] = temp; /* uniform distribution */
else
_pi[i] = gamma1[i] / sumGamma1;
// estimate the transition matrix
for(j=0; j < N; j++)
{
if(ei[i] < threshold2)
{
if(i == j)
_transitions[i][j] = 1 - temp + temp2;
else
_transitions[i][j] = temp2;
}
else
{
_transitions[i][j] = Eij[i][j] / ei[i];
}
}
// estimate the emission matrix
for(j=0; j < M; j++)
{
if(ej[i] < threshold2)
_emissions[i][j] = tempM;
else
_emissions[i][j] = Eia[i][j] / ej[i];
}
}
/* assign the estimated new values to the HMM model */
for(i=0; i < N; i++)
{
/* do not need to update the initial state distribution */
/*pi[i] = _pi[i];*/
/* transition matrix */
for(j=0; j < N; j++)
{
transitions[i][j] = _transitions[i][j];
}
/* emission matrix */
for(j=0; j < M; j++)
{
emissions[i][j] = _emissions[i][j];
}
}
/* compute the error */
Likelihood = forward( trainData[0] );
for(k=1; k < iTrain; k++)
{
Likelihood = Likelihood + forward(trainData[k]);
}
error = fabs(Likelihood - oldLikelihood);
printf("oldLikelihood = %e, Likelihood = %e, error = %e\n", oldLikelihood, Likelihood, error);
oldLikelihood = Likelihood;
}
}
/************************************************************************
NAME
loadHMM - load hidden Markov model from the specified file
DESCRIPTION
This function ...
Input:
the specified HMM file name
Output:
N, M, transitions, emissions, pi
Global variables list:
M, N, pi, transitions, emissions.
*************************************************************************/
void loadHMM(char* hmmFile)
{
FILE *in_fp;
char* token;
int i, j, m;
int end;
int nLines = cal_lines(hmmFile);
int nBuffer = getLengthOfLongestLine(hmmFile)+extraSpace;
char *line, *buffer;
/* Open for read (will fail if inputfile does not exist) */
if( (in_fp = fopen( hmmFile, "r" )) == NULL )
{
printf( "The file '%s' was not opened\n", hmmFile);
exit(1);
}
line = (char *) malloc(nBuffer * sizeof(char));
buffer = (char *) malloc(nBuffer * sizeof(char));
// Part 1
// read the first line
fgets(line, nBuffer, in_fp);
// number of states
token = strtok( line, seps );
N = atoi(token);
/* exceptions for number of states */
if(N < 1)
{
printf("the number of states is too small!\n");
exit(1);
}
if(N > 100)
{
printf("the program can't deal with states more than 100!\n");
exit(1);
}
/* number of observations */
token = strtok( NULL, seps );
M = atoi(token);
observationsDefined = TRUE;
if(M == noDefObservations)
{
observationsDefined = FALSE;
}
/* exceptions for number of observations */
if((M <= 0 && M != noDefObservations) || M > 100)
{
printf("the number of the observations is not correct!\n");
exit(1);
}
/* the possible length of the longest line */
token = strtok( NULL, seps );
T = atoi(token);
// skip one line
fgets(line, nBuffer, in_fp);
// Part 2
fgets(line, nBuffer, in_fp);
/* whether to read the observations */
if(observationsDefined == TRUE)
{
strcpy(buffer, line);
m = 0;
token = strtok( buffer, seps );
// count how many observations there are in the line
while(token != NULL)
{
token = strtok( NULL, seps );
m++;
}
m = m - 1; // because there is a prefix in the sequence
/* check whether the nunber of the observations is consistent */
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -