?? vfem.c
字號:
} foundBound = 1; } else { if(gMessageLevel >= 1) { printf("Have a bound and ID may or may not converge, we don't converge.\n"); } } } } if(foundBound) { bound = _CalculateErrorBound(); } else { bound = -1; } if(!gTestOnTrain) { /* just output the distance between matched centers */ /* load the test centers */ testCenters = VALNew(); sprintf(fileNames, "%s/%s.test", gSourceDirectory, gFileStem); testCentersIn = fopen(fileNames, "r"); DebugError(testCentersIn == 0, "Unable to open the .test file"); if(gMessageLevel >= 2) { printf("reading the test centers file...\n"); } tc = ExampleRead(testCentersIn, es); while(tc != 0) { VALAppend(testCenters, tc); tc = ExampleRead(testCentersIn, es); } fclose(testCentersIn); /* Match learned centers with the test centers */ loss = _MatchCentersGetDistanceSquare(learnedCenters, testCenters); /* free the test centers */ for(i = 0 ; i < VALLength(testCenters) ; i++) { ExampleFree(VALIndex(testCenters, i)); } VALFree(testCenters); } else { /* Sum Square distance of example to assigned cluster */ loss = 0; sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem); exampleIn = fopen(fileNames, "r"); DebugError(exampleIn == 0, "Unable to open the .data file"); if(gMessageLevel >= 1) { printf("opened test file, starting scan...\n"); } e = ExampleRead(exampleIn, es); /* HERE only tests on the first 10k test examples? Parameter?? */ while(e != 0 && tested < gMaxExamplesPerIteration) { tested++; lc = _FindClosestCenter(e, learnedCenters); loss += pow(ExampleDistance(e, lc), 2); ExampleFree(e); e = ExampleRead(exampleIn, es); } if(e != 0) { ExampleFree(e); } fclose(exampleIn); } if(finalOutput) { printf("%.4f\t0\n", loss); } else { if(foundBound) { if(bound < gThisErrorTarget) { printf("%d\t%ld\t%d\t%.6f\t%.6f\t%.2lf\n", gRound, learnCount, gTotalExamplesSeen, bound, loss, ((double)learnTime) / 100); } else { printf("%d\t%ld\t%d\t*%.6f\t%.6f\t%.2lf\n", gRound, learnCount, gTotalExamplesSeen, bound, loss, ((double)learnTime) / 100); } } else { if(gMessageLevel > 1) { printf(" No bound, Current bound estimate is %f guarenteed converge %d\n", _CalculateErrorBound(), ((IterationStatsPtr)VALIndex(gStatsList, VALLength(gStatsList) - 1))->guarenteeIDConverge); } printf("%d\t%ld\t%d\t***\t%.6f\t%.2lf\n", gRound, learnCount, gTotalExamplesSeen, loss, ((double)learnTime) / 100); } } fflush(stdout); if(0) {//gOutputCenters) { sprintf(fileNames, "%s-%lu.centers", gFileStem, learnCount); centersOut = fopen(fileNames, "w"); for(i = 0 ; i < VALLength(learnedCenters) ; i++) { ExampleWrite(VALIndex(learnedCenters, i), centersOut);// ExampleWrite(VALIndex(learnedCenters, i), stdout); }// printf("------------------\n"); fclose(centersOut); }}static int _CheckConverganceUpdateStats(IterationStatsPtr last, IterationStatsPtr current) { float thisDistance; float bound, lowerBound, upperBound, clusterBound; float error; ExamplePtr eThis, eLast; int i, j; bound = 0; lowerBound = 0; upperBound = 0; for(i = 0 ; i < VALLength(last->centroids) ; i++) { eLast = VALIndex(last->centroids, i); eThis = VALIndex(current->centroids, i); clusterBound = 0; for(j = 0 ; j < ExampleGetNumAttributes(eThis) ; j++) { /* HERE fix for discrete ?? */ thisDistance = ExampleGetContinuousAttributeValue(eLast, j) - ExampleGetContinuousAttributeValue(eThis, j); if(thisDistance < 0) { thisDistance *= -1; } error = IterationStatsErrorBoundDimension(last, i, j) + IterationStatsErrorBoundDimension(current, i, j); bound += pow(thisDistance, 2); clusterBound += pow(thisDistance, 2); lowerBound += pow(max(thisDistance - error, 0), 2); upperBound += pow(thisDistance + error, 2); if(gMessageLevel > 3) { printf("e: %.4f LossDeltas: dim %.4f sum %.4f min %.4f max %.4f\n", error, bound, clusterBound, lowerBound, upperBound); } } if(gMessageLevel > 0) { printf(" cluster %d moved ^2 loss of %f\n", i, clusterBound); } } if(gMessageLevel > 1) { for(i = 0 ; i < VALLength(current->centroids) ; i++) { for(j = 0 ; j < VALLength(current->centroids) ; j++) { printf("%.3f ", ExampleDistance(VALIndex(current->centroids, i), VALIndex(current->centroids, j))); } printf("\n"); } } if(gMessageLevel > 0) { printf(" clusters moved [ %f - %f - %f ] tau %f\n", lowerBound, bound, upperBound, gConvergeDelta); } if(bound <= gConvergeDelta / 3.0) { current->convergeVFEM = 1; } if(lowerBound <= gConvergeDelta) { current->possibleIDConverge = 1; if(bound <= gConvergeDelta) { current->wouldEMConverge = 1; } if(upperBound <= gConvergeDelta) { current->guarenteeIDConverge = 1; } else if(gMessageLevel > 0) { printf(" IDEM may have or may not have converged.\n"); } } if(gMessageLevel > 0) { printf(" converge info guarenteeID: %d possibleID: %d - EM / 3.0 this: %d last: %d\n", current->guarenteeIDConverge, current->possibleIDConverge, current->convergeVFEM, last->convergeVFEM); } if(gBatch || gAllowBadConverge) { if(gMessageLevel > 0 && gAllowBadConverge && current->wouldEMConverge) { printf(" found a potentially bad converge.\n"); } return current->wouldEMConverge; } else { return current->guarenteeIDConverge || (current->convergeVFEM && last->convergeVFEM); }}float AssignmentScaledDeltaMax(ExamplePtr e, ExamplePtr centroid, ExamplePtr min, ExamplePtr max, float epsilon) { float observedDelta; observedDelta = ExampleDistance(e, centroid); // maximumDelta = max(ExampleDistance(e, min), // ExampleDistance(e, max)); ///* deal with the assignErrorScale */ //return observedDelta + (gAssignErrorScale * (maximumDelta - observedDelta)); return observedDelta + epsilon;}static int _PointInBox(ExamplePtr e, ExamplePtr cMin, ExamplePtr cMax) { int i; for(i = 0 ; i < ExampleGetNumAttributes(e) ; i++) { if(ExampleGetContinuousAttributeValue(e, i) < ExampleGetContinuousAttributeValue(cMin, i) || ExampleGetContinuousAttributeValue(e, i) > ExampleGetContinuousAttributeValue(cMax, i)) { return 0; } } return 1;}float AssignmentScaledDeltaMin(ExamplePtr e, ExamplePtr centroid, ExamplePtr min, ExamplePtr max, float epsilon) { float observedDelta; observedDelta = ExampleDistance(e, centroid); //if(_PointInBox(e, min, max)) { // minimumDelta = 0; //} else { // minimumDelta = min(ExampleDistance(e, min), // ExampleDistance(e, max)); //} //return min(observedDelta - (gAssignErrorScale * // (observedDelta - minimumDelta)), observedDelta); return max(observedDelta - epsilon, 0);}static void _RecordGeoffBoundInfo(ExamplePtr e, IterationStatsPtr is, ExampleSpecPtr es) { int i, j; ExamplePtr centroid, cMin, cMax; double denominator, numerator, weight; double *denomValues; /* HERE modify for negative Xs */ /* do the W-Plusses */ denomValues = MNewPtr(sizeof(double) * VALLength(is->centroids)); denominator = 0; for(i = 0 ; i < VALLength(is->centroids) ; i++) { centroid = VLIndex(is->centroids, i); cMax = VLIndex(is->cMax, i); cMin = VLIndex(is->cMin, i); denomValues[i] = exp( (-1.0 / (2.0 * gSigmaSquare)) * pow(AssignmentScaledDeltaMax(e, centroid, cMin, cMax, is->lastBound[i]), 2)); denominator += denomValues[i]; } for(i = 0 ; i < VALLength(is->centroids) ; i++) { centroid = VLIndex(is->centroids, i); cMax = VLIndex(is->cMax, i); cMin = VLIndex(is->cMin, i); numerator = exp( (-1.0 / (2.0 * gSigmaSquare)) * pow(AssignmentScaledDeltaMin(e, centroid, cMin, cMax, is->lastBound[i]), 2)); denominator -= denomValues[i]; denominator += numerator; weight = (numerator / denominator); if(weight > 1.0) { weight = 1.0; } //printf("c%d num: %.4f denom: %.4f w+: %.4f\n", i, numerator, // denominator, weight); // printf(" ob delta: %.4f as deltamin: %.4f\n", // ExampleDistance(e, centroid), // AssignmentScaledDeltaMin(e, centroid, cMin, cMax)); is->wPlus[i] += (numerator / denominator); is->wPlusSquare[i] += (numerator / denominator) * (numerator / denominator); for(j = 0 ; j < ExampleSpecGetNumAttributes(es) ; j++) { if(ExampleGetContinuousAttributeValue(e, j) >= 0) { is->wxPlus[i][j] += (numerator / denominator) * ExampleGetContinuousAttributeValue(e, j); } else { is->wxMinus[i][j] += (numerator / denominator) * ExampleGetContinuousAttributeValue(e, j); } } denominator += denomValues[i]; denominator -= numerator; } /* do the W-Minuses */ denominator = 0; for(i = 0 ; i < VALLength(is->centroids) ; i++) { centroid = VLIndex(is->centroids, i); cMax = VLIndex(is->cMax, i); cMin = VLIndex(is->cMin, i); denomValues[i] = exp( (-1.0 / (2.0 * gSigmaSquare)) * pow(AssignmentScaledDeltaMin(e, centroid, cMin, cMax, is->lastBound[i]), 2)); denominator += denomValues[i]; } for(i = 0 ; i < VALLength(is->centroids) ; i++) { centroid = VLIndex(is->centroids, i); cMax = VLIndex(is->cMax, i); cMin = VLIndex(is->cMin, i); numerator = exp( (-1.0 / (2.0 * gSigmaSquare)) * pow(AssignmentScaledDeltaMax(e, centroid, cMin, cMax, is->lastBound[i]), 2)); denominator -= denomValues[i]; denominator += numerator; //printf("c%d num: %.4f denom: %.4f w-: %.4f\n", i, numerator, // denominator, (numerator / denominator)); //printf(" ob delta: %.4f as deltamax: %.4f\n", // ExampleDistance(e, centroid), // AssignmentScaledDeltaMax(e, centroid, cMin, cMax)); is->wMinus[i] += (numerator / denominator); for(j = 0 ; j < ExampleSpecGetNumAttributes(es) ; j++) { if(ExampleGetContinuousAttributeValue(e, j) >= 0) { is->wxMinus[i][j] += (numerator / denominator) * ExampleGetContinuousAttributeValue(e, j); } else { is->wxMinus[i][j] += (numerator / denominator) * ExampleGetContinuousAttributeValue(e, j); } } denominator += denomValues[i]; denominator -= numerator; } MFreePtr(denomValues);}static int _DoClusterIterationDidConverge(FILE *data, ExampleSpecPtr es, FILE *boundData) { int i,j; ExamplePtr e, centroid; long seen = 0; int done; IterationStatsPtr is, newIs; double denominator, numerator; is = VALIndex(gStatsList, VALLength(gStatsList) - 1); if(gMessageLevel > 1) { printf("enter iteration %d seen %d\n", gIteration, gTotalExamplesSeen); fflush(stdout); } done = 0; e = ExampleRead(data, es); while(e != 0 && !done ) { seen++; gTotalExamplesSeen++; is->n++; if(gMessageLevel > 3) { //IterationStatsWrite(is, es, stdout); printf("-------------------------------\nincorporating: "); ExampleWrite(e, stdout); for(i = 0 ; i < VALLength(is->centroids) ; i++) {
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -