?? lbfgs.c
字號:
/* Store the current position and gradient vectors. */ veccpy(xp, x, n); veccpy(gp, g, n); /* Search for an optimal step. */ if (param.orthantwise_c == 0.) { ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); } else { ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end ); } if (ls < 0) { /* Revert to the previous point. */ veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; goto lbfgs_exit; } /* Compute x and g norms. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } /* Report the progress. */ if (cd.proc_progress) { if (ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls)) { goto lbfgs_exit; } } /* Convergence test. The criterion is given by the following formula: |g(x)| / \max(1, |x|) < \epsilon */ if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { /* Convergence. */ ret = LBFGS_SUCCESS; break; } /* Test for stopping criterion. The criterion is given by the following formula: (f(past_x) - f(x)) / f(x) < \delta */ if (pf != NULL) { /* We don't test the stopping criterion while k < past. */ if (param.past <= k) { /* Compute the relative improvement from the past. */ rate = (pf[k % param.past] - fx) / fx; /* The stopping criterion. */ if (rate < param.delta) { ret = LBFGS_STOP; break; } } /* Store the current value of the objective function. */ pf[k % param.past] = fx; } if (param.max_iterations != 0 && param.max_iterations < k+1) { /* Maximum number of iterations. */ ret = LBFGSERR_MAXIMUMITERATION; break; } /* Update vectors s and y: s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. y_{k+1} = g_{k+1} - g_{k}. */ it = &lm[end]; vecdiff(it->s, x, xp, n); vecdiff(it->y, g, gp, n); /* Compute scalars ys and yy: ys = y^t \cdot s = 1 / \rho. yy = y^t \cdot y. Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). */ vecdot(&ys, it->y, it->s, n); vecdot(&yy, it->y, it->y, n); it->ys = ys; /* Recursive formula to compute dir = -(H \cdot g). This is described in page 779 of: Jorge Nocedal. Updating Quasi-Newton Matrices with Limited Storage. Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. */ bound = (m <= k) ? m : k; ++k; end = (end + 1) % m; /* Compute the steepest direction. */ if (param.orthantwise_c == 0.) { /* Compute the negative of gradients. */ vecncpy(d, g, n); } else { vecncpy(d, pg, n); } j = end; for (i = 0;i < bound;++i) { j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ it = &lm[j]; /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */ vecdot(&it->alpha, it->s, d, n); it->alpha /= it->ys; /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */ vecadd(d, it->y, -it->alpha, n); } vecscale(d, ys / yy, n); for (i = 0;i < bound;++i) { it = &lm[j]; /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */ vecdot(&beta, it->y, d, n); beta /= it->ys; /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */ vecadd(d, it->s, it->alpha - beta, n); j = (j + 1) % m; /* if (++j == m) j = 0; */ } /* Constrain the search direction for orthant-wise updates. */ if (param.orthantwise_c != 0.) { for (i = param.orthantwise_start;i < param.orthantwise_end;++i) { if (d[i] * pg[i] >= 0) { d[i] = 0; } } } /* Now the search direction d is ready. We try step = 1 first. */ step = 1.0; }lbfgs_exit: /* Return the final value of the objective function. */ if (ptr_fx != NULL) { *ptr_fx = fx; } vecfree(pf); /* Free memory blocks used by this function. */ if (lm != NULL) { for (i = 0;i < m;++i) { vecfree(lm[i].s); vecfree(lm[i].y); } vecfree(lm); } vecfree(pg); vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); return ret;}static int line_search_backtracking( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wp, callback_data_t *cd, const lbfgs_parameter_t *param ){ int ret = 0, count = 0; lbfgsfloatval_t width, dg, norm = 0.; lbfgsfloatval_t finit, dginit = 0., dgtest; const lbfgsfloatval_t wolfe = 0.9, dec = 0.5, inc = 2.1; /* Check the input parameters for errors. */ if (*stp <= 0.) { return LBFGSERR_INVALIDPARAMETERS; } /* Compute the initial gradient in the search direction. */ vecdot(&dginit, g, s, n); /* Make sure that s points to a descent direction. */ if (0 < dginit) { return LBFGSERR_INCREASEGRADIENT; } /* The initial value of the objective function. */ finit = *f; dgtest = param->ftol * dginit; for (;;) { veccpy(x, xp, n); vecadd(x, s, *stp, n); /* Evaluate the function and gradient values. */ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); ++count; if (*f <= finit + *stp * dgtest) { /* The sufficient decrease condition. */ if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG) { /* Check the strong Wolfe condition. */ vecdot(&dg, g, s, n); if (dg > -wolfe * dginit) { width = dec; } else if (dg < wolfe * dginit) { width = inc; } else { /* Strong Wolfe condition. */ return count; } } else { /* Exit with the loose Wolfe condition. */ return count; } } else { width = dec; } if (*stp < param->min_step) { /* The step is the minimum value. */ return LBFGSERR_MINIMUMSTEP; } if (*stp > param->max_step) { /* The step is the maximum value. */ return LBFGSERR_MAXIMUMSTEP; } if (param->max_linesearch <= count) { /* Maximum number of iteration. */ return LBFGSERR_MAXIMUMLINESEARCH; } (*stp) *= width; }}static int line_search_backtracking_owlqn( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wp, callback_data_t *cd, const lbfgs_parameter_t *param ){ int i, ret = 0, count = 0; lbfgsfloatval_t width = 0.5, norm = 0.; lbfgsfloatval_t finit = *f, dgtest; /* Check the input parameters for errors. */ if (*stp <= 0.) { return LBFGSERR_INVALIDPARAMETERS; } /* Choose the orthant for the new point. */ for (i = 0;i < n;++i) { wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i]; } for (;;) { /* Update the current point. */ veccpy(x, xp, n); vecadd(x, s, *stp, n); /* The current point is projected onto the orthant. */ owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end); /* Evaluate the function and gradient values. */ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); /* Compute the L1 norm of the variables and add it to the object value. */ norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end); *f += norm * param->orthantwise_c; ++count; dgtest = 0.; for (i = 0;i < n;++i) { dgtest += (x[i] - xp[i]) * gp[i]; } if (*f <= finit + param->ftol * dgtest) { /* The sufficient decrease condition. */ return count; } if (*stp < param->min_step) { /* The step is the minimum value. */ return LBFGSERR_MINIMUMSTEP; } if (*stp > param->max_step) { /* The step is the maximum value. */ return LBFGSERR_MAXIMUMSTEP; } if (param->max_linesearch <= count) { /* Maximum number of iteration. */ return LBFGSERR_MAXIMUMLINESEARCH; } (*stp) *= width; }}static int line_search_morethuente( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wa, callback_data_t *cd, const lbfgs_parameter_t *param ){ int count = 0; int brackt, stage1, uinfo = 0; lbfgsfloatval_t dg; lbfgsfloatval_t stx, fx, dgx; lbfgsfloatval_t sty, fy, dgy; lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm; lbfgsfloatval_t finit, ftest1, dginit, dgtest; lbfgsfloatval_t width, prev_width; lbfgsfloatval_t stmin, stmax; /* Check the input parameters for errors. */ if (*stp <= 0.) { return LBFGSERR_INVALIDPARAMETERS; } /* Compute the initial gradient in the search direction. */ vecdot(&dginit, g, s, n); /* Make sure that s points to a descent direction. */ if (0 < dginit) { return LBFGSERR_INCREASEGRADIENT; } /* Initialize local variables. */ brackt = 0; stage1 = 1; finit = *f; dgtest = param->ftol * dginit; width = param->max_step - param->min_step; prev_width = 2.0 * width; /* The variables stx, fx, dgx contain the values of the step, function, and directional derivative at the best step. The variables sty, fy, dgy contain the value of the step, function, and derivative at the other endpoint of the interval of uncertainty. The variables stp, f, dg contain the values of the step, function, and derivative at the current step. */ stx = sty = 0.; fx = fy = finit; dgx = dgy = dginit; for (;;) { /* Set the minimum and maximum steps to correspond to the present interval of uncertainty. */ if (brackt) { stmin = min2(stx, sty); stmax = max2(stx, sty); } else { stmin = stx; stmax = *stp + 4.0 * (*stp - stx); } /* Clip the step in the range of [stpmin, stpmax]. */ if (*stp < param->min_step) *stp = param->min_step; if (param->max_step < *stp) *stp = param->max_step; /* If an unusual termination is to occur then let stp be the lowest point obtained so far. */ if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) { *stp = stx; } /* Compute the current value of x: x <- x + (*stp) * s. */ veccpy(x, xp, n); vecadd(x, s, *stp, n); /* Evaluate the function and gradient values. */ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); vecdot(&dg, g, s, n); ftest1 = finit + *stp * dgtest; ++count; /* Test for errors and convergence. */ if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) { /* Rounding errors prevent further progress. */ return LBFGSERR_ROUNDING_ERROR; } if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) { /* The step is the maximum value. */ return LBFGSERR_MAXIMUMSTEP; } if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) { /* The step is the minimum value. */ return LBFGSERR_MINIMUMSTEP;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -