?? vfem.c
字號:
exampleIn = stdin; if(!gUseGeoffBound) { if(gMessageLevel > 0) { printf("Trying to use the tight bound on stdin won't work, reverting to the looser one pass bound.\n"); } gUseGeoffBound = 1; } } /* Pick the initial centroids from the dataset */ if(!gStdin) { /* in stream mode we never close & reopen the file */ sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem); exampleIn = fopen(fileNames, "r"); DebugError(exampleIn == 0, "Unable to open the .data file"); } centers = VALNew(); /* try get the gInitNumber-ith batch of centroids */ for(i = 0 ; i < gInitNumber ; i++) { while(VALLength(centers) > 0) { ExampleFree(VALRemove(centers, VALLength(centers) - 1)); } _PickInitialCentroids(es, centers, exampleIn); } if(!gStdin) { /* in stream mode we never close & reopen the file */ fclose(exampleIn); } /* create the initial stats structure from the initial centroids */ VALAppend(gStatsList, IterationStatsInitial(centers)); if(gMessageLevel >= 1) { printf("allocation %ld\n", MGetTotalAllocation()); } learnTime = 0; gIteration = 0; gRound = 0; times(&starttime); do { gRound++; /* if we aren't on the first round free the cruft from the prvious one */ if(gRound > 1) { if(gReassignCentersEachRound) { if(!gStdin) { /* in stream mode we never close & reopen the file */ sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem); exampleIn = fopen(fileNames, "r"); DebugError(exampleIn == 0, "Unable to open the .data file"); } /* burn some examples to make the selection be more random */ for(i = RandomRange(0, 10 * gNumClusters) ; i > 0 ; i--) { ExampleFree(ExampleRead(exampleIn, es)); } /* HERE MEM this newCenters may leak memory */ newCenters = _GetCentroidsForNextRound(exampleIn, es); } for(i = VALLength(gStatsList) - 1 ; i >= 0 ; i--) { IterationStatsFree(VALRemove(gStatsList, i)); } if(gReassignCentersEachRound) { VALAppend(gStatsList, IterationStatsInitial(newCenters)); } else { /* use the inital centroids again */ VALAppend(gStatsList, IterationStatsInitial(centers)); } } gIteration = 1; if(!gStdin) { /* in stream mode we never close & reopen the file */ sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem); exampleIn = fopen(fileNames, "r"); boundDataIn = fopen(fileNames, "r"); DebugError(exampleIn == 0 || boundDataIn == 0, "Unable to open the .data file"); } if(gMessageLevel > 0) { printf("========================\n"); printf("round %d n %ld\n", gRound, gN); } while(!_DoClusterIterationDidConverge(exampleIn, es, boundDataIn)) { times(&endtime); /* see if we are toast, if so don't do the rest of the book keep */ thisIs = VALIndex(gStatsList, VALLength(gStatsList) - 1); breakOut = 0; lastIs = 0; if(!gBatch && (_CalculateIDKMEarlyBound() > (40000 * gThisErrorTarget) || !thisIs->foundBound)) { /* do one last check to see if we're on the last round */ if(VALLength(gStatsList) > 1) { lastIs = VALIndex(gStatsList, VALLength(gStatsList) - 2); if(!(lastIs->n >= gDBSize)) { breakOut = 1; } else { breakOut = 0; } } else { breakOut = 0; } } if(breakOut) { /* IDEM could have stoped and we are a loooong way away */ if(gMessageLevel > 1) { printf(" broke out of a round early foundBound %d current bound %f - Target * 40000: %f\n", thisIs->foundBound, _CalculateIDKMEarlyBound(), 40000 * gThisErrorTarget); printf(" dbSize: %ld n: %ld\n", gDBSize, lastIs->n); } thisIs->foundBound = 0; break; } /* do the book keeping to go on to the next iteration */ learnTime += endtime.tms_utime - starttime.tms_utime; /* reset the file for the next iteration */ if(!gStdin) { /* in stream mode we never close & reopen the file */ fclose(exampleIn); fclose(boundDataIn); sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem); exampleIn = fopen(fileNames, "r"); boundDataIn = fopen(fileNames, "r"); DebugError(exampleIn == 0 || boundDataIn == 0, "Unable to open the .data file"); } if(gDoTests) { _doTests(es, thisIs->centroids, gIteration, learnTime, 0); } gIteration++; /* reset the timer for the next iteration */ times(&starttime); } /* end of a round */ /* test to see if we can get anything else from the file, if not then we won't be able to increase n and we are finished */ if(!gStdin) { e = ExampleRead(exampleIn, es); if(e == 0) { fileDone = 1; } else { fileDone = 0; ExampleFree(e); } /* close the file for the beginning of the next round can open */ fclose(exampleIn); fclose(boundDataIn); } else { if(gN >= gMaxExamplesPerIteration) { fileDone = 1; } else { fileDone = 0; } } thisIs = VALIndex(gStatsList, VALLength(gStatsList) - 1); if(thisIs->foundBound && thisIs->guarenteeIDConverge) { /* re-estimate l, update effective delta */ gEstimatedNumIterations = gIteration * 1.5; lastDelta = gNeededDelta; gNeededDelta = 1.0 - pow(1.0 - gDelta, 1.0 / (float)(gD * gNumClusters * gEstimatedNumIterations)); /* HERE fix this */ nIncrement = pow(thisIs->maxEkd/gTargetEkd, 2) * (log(2.0 / gNeededDelta) / log(2.0 / lastDelta)) * gN * 1.1; if(gMessageLevel > 1) { printf("suggested gN increment: %ld current gN: %ld\n", nIncrement, gN); } if(nIncrement > gN) { gN += nIncrement; } else { gN *= 2; } if(gN > gMaxExamplesPerIteration) { gN = gMaxExamplesPerIteration; } } else { gN *= 2; } if(gIterationNs != 0) { MFreePtr(gIterationNs); gIterationNs = 0; gNumIterationNs = 0; } if(!gBatch && gFancyStop && thisIs->foundBound) { if(gMessageLevel > 2) { IterationStatsWrite(thisIs, es, stdout); } CalculateExamplesPerIteration(gStatsList, &gIterationNs, &gNumIterationNs); /* normalize & update to # examples per iteration */ iterationNSum = 0; for(i = 0 ; i < gNumIterationNs ; i++) { iterationNSum += gIterationNs[i]; } if(iterationNSum > gDBSize * gNumIterationNs) { /* go on to a final round */ if(gMessageLevel > 2) { printf("CalculateExamplesPerIteration says we need too many samples, just go to one final round with the whole DB.\n"); } gN = gDBSize + 1; /* make sure it's final */ if(gIterationNs != 0) { MFreePtr(gIterationNs); gIterationNs = 0; gNumIterationNs = 0; } } else { if(iterationNSum < gN * gNumIterationNs) { /* use the actual Nis only if their sum is larger than the number of examples suggested by gN, otherwise use their ratios as the percent of gN * l to use in each iteration */ for(i = 0 ; i < gNumIterationNs ; i++) { gIterationNs[i] = ((gIterationNs[i] / iterationNSum) * gN) * gNumIterationNs; } } if(gMessageLevel > 1) { printf("ni:\n"); for(i = 0 ; i < gNumIterationNs ; i++) { printf("\t%d: %.4f\n", i, gIterationNs[i]); } } } } bound = _CalculateErrorBound(); if(gMessageLevel > 1) { printf(" found bound %d bound: %f guarentee converge %d file done %d\n", thisIs->foundBound, bound, thisIs->guarenteeIDConverge, fileDone); printf("==="); fflush(stdout); } } while(!(thisIs->guarenteeIDConverge && thisIs->foundBound && bound <= gThisErrorTarget) && !(gAllowBadConverge && thisIs->wouldEMConverge && thisIs->foundBound && bound <= gThisErrorTarget) && !fileDone && !gBatch); times(&endtime); learnTime += endtime.tms_utime - starttime.tms_utime; _OutputAllCentroids(bound); if(gMessageLevel > 0) { _OutputCentroidMovement(); } _doTests(es, ((IterationStatsPtr)VALIndex(gStatsList, VALLength(gStatsList) - 1))->centroids, gIteration, learnTime, 0); fclose(stdin); return 0;}/* God help you if you need to comprehend or change this function *//* Get our paper and read it */void CalculateExamplesPerIteration(VoidAListPtr last, float **nextNiOut, int *num) { IterationStatsPtr lastIs, currentIs; float **ni, **a, **b, **r; float tmp, thisDelta; int i, j, k; *num = VALLength(last) - 1; (*nextNiOut) = MNewPtr(sizeof(float) * *num); thisDelta = 1.0 - pow(1.0 - gDelta, 1.0 / (float)(gD * gNumClusters * *num)); ni = MNewPtr(sizeof(float *) * gNumClusters); a = MNewPtr(sizeof(float *) * gNumClusters); b = MNewPtr(sizeof(float *) * gNumClusters); r = MNewPtr(sizeof(float *) * gNumClusters); for(i = 0 ; i < gNumClusters ; i++) { ni[i] = MNewPtr(sizeof(float) * *num); a[i] = MNewPtr(sizeof(float) * *num); b[i] = MNewPtr(sizeof(float) * *num); r[i] = MNewPtr(sizeof(float) * *num); } for(i = 0 ; i < *num ; i++) { lastIs = VALIndex(last, i); for(k = 0 ; k < gNumClusters ; k++) { currentIs = VALIndex(last, i + 1); if(i == 0) { a[k][i] = 0; } else { a[k][i] = currentIs->lastAssignmentBound[k] / lastIs->lastBound[k]; } b[k][i] = pow(lastIs->wMinus[k], 2) / ((float)lastIs->n * lastIs->wPlusSquare[k]); } } for(i = 0 ; i < *num ; i++) { for(k = 0 ; k < gNumClusters ; k++) { /* Here is this the right place for this gD?*/ r[k][i] = sqrt((gD * log(2)) / (2.0 * b[k][i])); for(j = i + 1 ; j < *num ; j++) { r[k][i] *= a[k][j]; } } } for(i = 0 ; i < *num ; i++) { for(k = 0 ; k < gNumClusters ; k++) { tmp = 0; for(j = 0 ; j < gNumClusters ; j++) { tmp += pow((r[k][i] * pow(r[k][j], 2)) , 1.0 / 3.0); } ni[k][i] = ((float)gNumClusters / gThisErrorTarget); ni[k][i] *= pow(tmp, 2); } } for(i = 0 ; i < *num ; i++) { (*nextNiOut)[i] = 0; for(k = 0 ; k < gNumClusters ; k++) { if(ni[k][i] > (*nextNiOut)[i]) { (*nextNiOut)[i] = ni[k][i]; } } } if(gMessageLevel > 2) { for(i = 0 ; i < *num ; i++) { printf("iteration %d nextTotal: %f\n", i, (*nextNiOut)[i]); for(k = 0 ; k < gNumClusters ; k++) { printf(" c%d: ", k); printf("a %.3f b %.3f r %.3f ", a[k][i], b[k][i], r[k][i]); printf("ni %.3f", ni[k][i]); printf("\n"); } } } /* Free memory */ for(i = 0 ; i < gNumClusters ; i++) { MFreePtr(ni[i]); MFreePtr(a[i]); MFreePtr(b[i]); MFreePtr(r[i]); } MFreePtr(ni); MFreePtr(a); MFreePtr(b); MFreePtr(r);}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -