?? svmfusvmlargeopt.cpp
字號:
int i; int totFound; int size = getSize(); int yPos = 0, yNeg = 0; for (i = 0; i < size; i++) { if (getY(i) == 1) { yPos++; } else { yNeg++; } } int yNegDes = (yNeg*sS)/size; int yPosDes = sS-yNegDes; if (verbosity_ > 0) { cout << "The training set contains " << yPos << " positive and " << yNeg << " negative examples." << endl; } // initInfo initInfoArray[2] = { { 0, sS/2+(sS%2), 0, 0, 1 }, // { 0, sS/2, 0, sS-1, -1 } }; initInfo initInfoArray[2] = { { 0, yPosDes, 0, 0, 1 }, { 0, yNegDes, 0, sS-1, -1 } }; for (i = 0, totFound = 0; i < size; i++) { initInfo* iip; iip = (getY(i) == 1 ? &initInfoArray[0] : &initInfoArray[1]); if (totFound < sS || iip->numGood < iip->maxGood) { if (totFound >= sS) { // We thought we might need this example in the initial // working set, but we've now found enough examples from // the other class, so push it onto the queue. int elt = workingSet_[iip->startPos]; workingSetPos_[elt] = -1; eltQueue_.push(QueueElt_(elt,0)); initInfo* iip2 = (getY(i) == 1 ? &initInfoArray[1] : &initInfoArray[0]); iip2->numFound--; } workingSet_[iip->startPos] = i; workingSetPos_[i] = iip->startPos; iip->numFound++; iip->startPos += iip->posChange; totFound++; } else { // We're already got our full complement of examples, push // this one onto the queue. eltQueue_.push(QueueElt_(i,0)); } if (iip->numGood <= iip->maxGood) { iip->numGood++; } } // Attempt at consistency check. if (initInfoArray[0].numGood == 0 || initInfoArray[1].numGood == 0) { cerr << "Error: No " << (initInfoArray[0].numGood == 0 ? "positive" : "negative") << "examples in data set. Aborting." << endl; exit(1); } if (verbosity_ > 1) { cout << "Initial chunk contains " << initInfoArray[0].numFound << " positive examples and " << initInfoArray[1].numFound << " negative examples." << endl << endl; }}/*! Puts as many USV's as possible into the working set; * then pops enough elements off the queue to fill the working set. */CallTemplate(void, createInitialWorkingSetWarm)() { int totSize = getSize(); int numFound = 0; // First, put as many USVs as possible into the working set. for (int i = 0; i < totSize; i++) { if (unbndSupVecP(i) && numFound < chunkSize_) { workingSet_[numFound] = i; workingSetPos_[i] = numFound; numFound++; } else { eltQueue_.push(QueueElt_(i, -KKT_ViolationAmt(i))); } } // Warn if there's not enough space... if (getNumUnbndSupVecs() > numFound) { cerr << "WARNING: Not enough room in chunk to hold all USVs. Badbadbad." << endl; } // add as many more points as necessary // set<QueueElt_>::iterator it = eltQueue_.end(); while (numFound < chunkSize_) { QueueElt_ qe = eltQueue_.top(); eltQueue_.pop(); workingSet_[numFound] = qe.elt_; workingSetPos_[qe.elt_] = numFound; numFound++; }}CallTemplate(void, createInitialWorkingSetHot)() { int i; for (i = 0; i < chunkSize_; i++) { workingSet_[i] = chunkKernCache_->getColToTrnSet(i); } for (i = 0; i < svmSize_; i++) { workingSetPos_[i] = chunkKernCache_->getTrnSetToCol(i); } // Check to make sure it's ok. for (i = 0; i < chunkSize_; i++) { if (workingSet_[i] == -1) { createInitialWorkingSetCold(); return; } } // Otherwise, push all the unused element uinto the queue. for (i = 0; i < svmSize_; i++) { if (workingSetPos_[i] == -1) { eltQueue_.push(QueueElt_(i, KKT_ViolationAmt(i))); } }}CallTemplate(void, buildAndSolveChunk)() { chunkSvm_ = new SvmSmallOpt<DataPt, KernVal> (chunkSize_, chunkY_, chunkTrnSetPtr_, kernProdFuncPtr_, 10, tol_, getEpsilon(), chunkKernCache_); chunkSvm_->setCVec(chunkC_Vec_); chunkSvm_->setB(getB()); chunkSvm_->setAllAlphas(chunkAlphas_); // At THIS point in the code, outputs_[ex] is correct // for all points IN THE WORKING SET, so we can use this to determine // the offsets. DoubleVec offsets = new double[chunkSize_]; for (int i = 0; i < chunkSize_; i++) { offsets[i] = outputs_[workingSet_[i]] - chunkSvm_->outputAtTrainingExample(i); } chunkSvm_->setOffsetVec(offsets); chunkSvm_->optimize(); verbosity_ > 0 ? chunkSvm_->fixB() : chunkSvm_->fixB(false); majorIterations_++; if (verbosity_ > 0) { cout << "CHUNK OPTIMIZATION " << majorIterations_ << " COMPLETE (#USVs=" << chunkSvm_->getNumUnbndSupVecs() << ", #SVs=" << chunkSvm_->getNumSupVecs() << ")" << endl << endl; } delete[] offsets;} CallTemplate(void, destroyChunk)() { delete chunkSvm_;}/*! \fn void SvmLargeOpt<DataPt, KernVal>::updateAlphasAndB () * * (1) Clears removal queue<br> * (2) Copies alphas and B out of our SvmSmallOpt * (3) Sorts the working set into removalQueue_ based * on KKT_ViolationAmt */CallTemplate(void, updateAlphasAndB)() { int i; // Clear removalQueue_ of old values (if any) while (removalQueue_.size()) { removalQueue_.pop(); } // removalQueue_.clear(); AlphaDiffElt_ *alphaDiffs = new AlphaDiffElt_[chunkSize_]; DoubleVec chunkAlphas = chunkSvm_->getAllAlphas(); double phi = 0; double bDiff = chunkSvm_->getB() - getB(); // Delete excess rows from kernelCache; double eps = getEpsilon(); alphaDiffHistory_.push_back(alphaDiffs); int cnt = 0; for (i = 0; i < chunkSize_; i++) { chunkAlphas_[i] = chunkAlphas[i]; // temporary to permanent int ex = workingSet_[i]; double diff = chunkAlphas[i] - getAlpha(ex); if (fabs(diff) > eps) { alphaDiffs[cnt].elt_ = ex; alphaDiffs[cnt].diff_ = diff; phi += getY(ex)*diff*(chunkSvm_->outputAtTrainingExample(i)-outputs_[ex]-bDiff); cnt++; } setAlpha(ex, chunkAlphas[i]); outputs_[ex] = chunkSvm_->outputAtTrainingExample(i); lastCheckIter_[ex] = majorIterations_; // Now place an element on the removal queue. double KKT_Dist = KKT_ViolationAmt(ex); // Add 10 to remove "zero" elements first. Kludge. removalQueue_.push(QueueElt_(ex, -KKT_Dist + (getAlpha(ex) < eps ? 10 : 0))); } alphaDiffLengths_.push_back(cnt); delete[] chunkAlphas; // If we're actually done, the b from the chunk will be the // same as the b implied by an USV's not in the chunk (hopefully, // there won't be any of those anyway...) bHistory_.push_back(getB()); // Put the OLD value of b in history setB(chunkSvm_->getB()); // cout << "PHI = " << phi << endl; phis_.push_back(phi); if (usingLinear_) { int i; for (i = 0; i < linDims_; i++) { w_[i] = 0; } int size = getSize(); double eps = getEpsilon(); for (i = 0; i < size; i++) { if (getAlpha(i) > eps) { addToWPtr_(w_, getTrainingExample(i), getY(i)*getAlpha(i)); } } }}CallTemplate(double, outputAtTrainingExample)(int ex) const { updateNumber_++; int i, e; int size = getSize(); if (updateNumber_ >= SVM_RESET_UPDATES_HOW_OFTEN) { updateNumber_ = 1; for (i = 0; i < size; i++) { updateHelper_[i] = 0; } } // if we have to recalculate the output for this point if (lastCheckIter_[ex] != majorIterations_) { if (usingLinear_) { outputs_[ex] = multWPtr_(w_, getTrainingExample(ex)) + getB(); } else { int lC = lastCheckIter_[ex]; // for each iteration since we last updated while (lC < majorIterations_) { AlphaDiffElt_ *aD = alphaDiffHistory_[lC]; // for each diff in this iteration, update the output for this point for (i = 0; i < alphaDiffLengths_[lC]; i++) { e = aD[i].elt_; if (updateHelper_[e] != updateNumber_) { tempKernProds_[e] = chunkKernCache_->trnSetKernProd(ex, e); updateHelper_[e] = updateNumber_; } outputs_[ex] += getY(e)*aD[i].diff_*tempKernProds_[e]; } lC++; } outputs_[ex] -= bHistory_[lastCheckIter_[ex]]; outputs_[ex] += getB(); } lastCheckIter_[ex] = majorIterations_; } return outputs_[ex];}/*! \fn SvmLargeOpt<DataPt, KernVal>::KKT_ViolationAmt (int) * * Returns KKT violation for the specified point in the training set */CallTemplate(double, KKT_ViolationAmt)(int ex) const { // Added this bit so that if a point has a C of 0, it will always // return a violation of zero and won't be added to a working set. // This makes sense because even if C=0 for a point, that point ALWAYS // satisfies the optimality conditions, since its alpha can't move. // We don't even need to do the computation here. double C = getC(ex); double eps = getEpsilon(); if (C < eps) { return 0; } double yerr = getY(ex)*outputAtTrainingExample(ex); double alpha = getAlpha(ex); if (alpha < eps) { return 1 - yerr; } else if (alpha > C - eps) { return yerr - 1; } else { return fabs(yerr-1); }}/*! \fn SvmLargeOpt<DataPt, KernVal>::pivotWorkingSet (int, int) * * (1) Update chunk data structures to reflect the swap<br> * (2) Adds cells for the point to the kernel cache */CallTemplate(void, pivotWorkingSet)(int pointToAdd, int pointToRemove) { int wsPos = workingSetPos_[pointToRemove]; /////////////////// // error checking /////////////////// if (wsPos < 0 || wsPos > chunkSize_) { cerr << "ERROR! wsPos out of range: " << wsPos << endl; exit(1); } if (workingSetPos_[pointToAdd] != -1) { cerr << pointToAdd << " is ALREADY in wset!" << endl; exit(1); } if (pointToAdd < 0 || pointToAdd > getSize()) { cerr << "POINT TO ADD RANGE: " << pointToAdd << endl; exit(1); } if (pointToRemove < 0 || pointToRemove > getSize()) { cerr << "POINT TO Remove RANGE: " << pointToRemove << endl; exit(1); } /////////////////////////////////////// // update working set data structures /////////////////////////////////////// workingSetPos_[pointToAdd] = wsPos; workingSetPos_[pointToRemove] = -1; workingSet_[wsPos] = pointToAdd; chunkY_[wsPos] = getY(pointToAdd); chunkC_Vec_[wsPos] = getC(pointToAdd); chunkTrnSetPtr_[wsPos] = getTrainingExample(pointToAdd); chunkAlphas_[wsPos] = getAlpha(pointToAdd);}CallTemplate(double, dualObjFunc)() const { double b = getB(); double result = 0.0; int size = getSize(); for (int i = 0; i < size; i++) { result += getAlpha(i)*(1-.5*getY(i)*(outputs_[i]-b)); } return result;}CallTemplate(void, getCurrentOutputVals)(DoubleVec outputVals) { int size = getSize(); for (int i = 0; i < size; i++) { outputVals[i] = outputAtTrainingExample(i); }}CallTemplate(int, computeLeaveOneOutErrors)(DoubleVec LOO_Vals) { int size = getSize(); int i; double eps = getEpsilon(); BoolVec LOO_CompNeededP_ = new bool[size]; if (!optimized_) { cerr << "Error: Cannot compute Leave-One-Out Errors on an unoptimized SVM." << endl; return 0; } if (verbosity_ > 0) { cout << "Computing Leave-One-Out Errors." << endl; } // Update all the outputs, just in case. This can be necessary because // of the phis. A little more care could possibly save some time here, // but this is simpler and correct. computeTrainingSetPerf(false); int compsNeeded = 0; for (i = 0; i < size; i++) { if (getAlpha(i) < eps) { LOO_CompNeededP_[i] = false; LOO_Vals[i] = outputAtTrainingExample(i); } else { compsNeeded++; LOO_CompNeededP_[i] = true; LOO_Vals[i] = 0; } } int numErrors = computeLeaveOneOutErrorsInternal(LOO_CompNeededP_, LOO_Vals); delete[] LOO_CompNeededP_; if (verbosity_ > 0) { cout << "Done." << endl; } return numErrors;}CallTemplate(int, computeLeaveOneOutErrorsInternal)(BoolVec LOO_CompNeededP, DoubleVec LOO_Vals) { int size = getSize(); int i, j, k; int numErrors = 0; double eps = getEpsilon(); int workSetPos; int oldVerbosity = verbosity_; verbosity_ = 0; for (i = 0; i < size; i++) { if (LOO_CompNeededP[i]) { double oldC = getC(i); double alphaRemains = getAlpha(i); int yI = getY(i); double aI = getAlpha(i); // Delete the alpha from all the cached outputs for (j = 0; j < size; j++) { outputs_[j] -= yI*aI*chunkKernCache_->trnSetKernProd(i,j); } j = -1; while (alphaRemains > eps) { j++; if (i == j) { continue; } if (j >= size) { cerr << "Error: Couldn't adjust solution in LOO computation. Aborting." << endl; exit(-1); } double cJ = getC(j); double aJ = getAlpha(j); int yJ = getY(j); double diffAvail = 0; if (yJ == yI && aJ < cJ-eps) { diffAvail = cJ - aJ; } if (yJ != yI && aJ > eps) { diffAvail = aJ; } diffAvail = diffAvail > alphaRemains ? alphaRemains : diffAvail; if (diffAvail > eps) { for (k = 0; k < size; k++) { outputs_[k] += yJ*(yI*yJ*diffAvail)*chunkKernCache_->trnSetKernProd(j,k); } setAlpha(j, aJ + yI*yJ*diffAvail); workSetPos = chunkKernCache_->getTrnSetToCol(j); if (workSetPos != -1) { chunkAlphas_[workSetPos] += yI*yJ*diffAvail; } alphaRemains -= diffAvail; } } setAlpha(i,0); setC(i, 0); workSetPos = chunkKernCache_->getTrnSetToCol(i); if (workSetPos != -1) { chunkC_Vec_[workSetPos] = 0; chunkAlphas_[workSetPos] = 0; } optimized_ = false; reoptimize(); double output = outputAtTrainingExample(i); if (yI*output <= 0) { numErrors++; } LOO_Vals[i] = output; setC(i, oldC); if (workSetPos != -1) { chunkC_Vec_[workSetPos] = oldC; } } } // Get SVM back to "original" solved state. optimized_ = false; reoptimize(); verbosity_ = oldVerbosity; return numErrors;}#include "SvmFuSvmDataPoint.h"#define IterateTypes(datatype, kerntype) \ template class SvmLargeOpt<DataPoint<datatype>, kerntype>;#include "SvmFuSvmTypes.h"//template class vector<QueueElt_>;// extern template class SvmBase<DataPoint<datatype>, kerntype>; // extern template class SvmSmallOpt<DataPoint<datatype>, kerntype>;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -