1 #ifndef STAN__OPTIMIZATION__BFGS_HPP
2 #define STAN__OPTIMIZATION__BFGS_HPP
9 #include <boost/math/special_functions/fpclassify.hpp>
20 namespace optimization {
32 template<
typename Scalar =
double>
55 template<
typename Scalar =
double>
71 template<
typename FunctorType,
typename QNUpdateType,
72 typename Scalar = double,
int DimAtCompile = Eigen::Dynamic>
75 typedef Eigen::Matrix<Scalar,DimAtCompile,1>
VectorT;
76 typedef Eigen::Matrix<Scalar,DimAtCompile,DimAtCompile>
HessianT;
123 return std::string(
"Successful step completed");
125 return std::string(
"Convergence detected: absolute change in objective function was below tolerance");
127 return std::string(
"Convergence detected: relative change in objective function was below tolerance");
129 return std::string(
"Convergence detected: gradient norm is below tolerance");
131 return std::string(
"Convergence detected: relative gradient magnitude is below tolerance");
133 return std::string(
"Convergence detected: absolute parameter change was below tolerance");
135 return std::string(
"Maximum number of iterations hit, may not be at an optima");
137 return std::string(
"Line search failed to achieve a sufficient decrease, no more progress can be made");
139 return std::string(
"Unknown termination code");
150 throw std::runtime_error(
"Error evaluating initial BFGS point.");
159 Scalar gradNorm, stepNorm;
182 if (
_itNum > 1 && resetB != 2) {
212 _note +=
"LS failed, Hessian reset";
230 gradNorm =
_gk.norm();
231 stepNorm = sk.norm();
237 Scalar B0fact =
_qn.update(yk,sk,
true);
279 while (!(retcode =
step()));
287 template <
typename M>
289 std::vector<double>& params_r,
290 std::vector<int>& params_i,
291 std::ostream* o = 0) {
300 std::vector<int> _params_i;
302 std::vector<double> _x, _g;
307 const std::vector<int>& params_i,
309 : _model(model), _params_i(params_i), _msgs(msgs), _fevals(0) {}
311 size_t fevals()
const {
return _fevals; }
312 int operator()(
const Eigen::Matrix<double,Eigen::Dynamic,1> &x,
315 using Eigen::Dynamic;
318 typedef typename index_type<Matrix<double,Dynamic,1> >::type idx_t;
321 for (idx_t i = 0; i < x.size(); i++)
325 f = - log_prob_propto<false>(_model, _x, _params_i, _msgs);
326 }
catch (
const std::exception&
e) {
328 (*_msgs) << e.what() << std::endl;
336 *_msgs <<
"Error evaluating model log probability: "
337 "Non-finite function evaluation." << std::endl;
341 int operator()(
const Eigen::Matrix<double,Eigen::Dynamic,1> &x,
343 Eigen::Matrix<double,Eigen::Dynamic,1> &g) {
345 using Eigen::Dynamic;
348 typedef typename index_type<Matrix<double,Dynamic,1> >::type idx_t;
351 for (idx_t i = 0; i < x.size(); i++)
357 f = - log_prob_grad<true,false>(_model, _x, _params_i, _g, _msgs);
358 }
catch (
const std::exception&
e) {
360 (*_msgs) << e.what() << std::endl;
365 for (
size_t i = 0; i < _g.size(); i++) {
368 *_msgs <<
"Error evaluating model log probability: "
369 "Non-finite gradient." << std::endl;
379 *_msgs <<
"Error evaluating model log probability: "
380 "Non-finite function evaluation."
385 int df(
const Eigen::Matrix<double,Eigen::Dynamic,1> &x,
386 Eigen::Matrix<double,Eigen::Dynamic,1> &g) {
388 return (*
this)(x,f,g);
392 template<
typename M,
typename QNUpdateType,
typename Scalar = double,
393 int DimAtCompile = Eigen::Dynamic>
403 const std::vector<double>&
params_r,
404 const std::vector<int>& params_i,
405 std::ostream* msgs = 0)
407 _adaptor(model,params_i,msgs)
413 Eigen::Matrix<double,Eigen::Dynamic,1> x;
414 x.resize(params_r.size());
415 for (
size_t i = 0; i < params_r.size(); i++)
423 void grad(std::vector<double>& g) {
426 for (
idx_t i = 0; i < cg.size(); i++)
432 for (
idx_t i = 0; i < cx.size(); i++)
const size_t iter_num() const
fvar< T > fabs(const fvar< T > &x)
void params_r(std::vector< double > &x)
QNUpdateType & get_qnupdate()
const Scalar & alpha0() const
ModelAdaptor(M &model, const std::vector< int > ¶ms_i, std::ostream *msgs)
double log_prob_grad(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::vector< double > &gradient, std::ostream *msgs=0)
Compute the gradient using reverse-mode automatic differentiation, writing the result into the specif...
const VectorT & curr_x() const
ConvergenceOptions< Scalar > _conv_opts
const VectorT & prev_g() const
BFGSMinimizer< ModelAdaptor< M >, QNUpdateType, Scalar, DimAtCompile > BFGSBase
BFGSMinimizer(FunctorType &f)
bool isfinite(const stan::agrad::var &v)
Checks if the given number has finite value.
int WolfeLineSearch(FunctorType &func, Scalar &alpha, XType &x1, Scalar &f1, XType &gradx1, const XType &p, const XType &x0, const Scalar &f0, const XType &gradx0, const Scalar &c1, const Scalar &c2, const Scalar &minAlpha)
Perform a line search which finds an approximate solution to: satisfying the strong Wolfe conditions...
stan::math::index_type< vector_t >::type idx_t
double max(const double a, const double b)
const Scalar & prev_f() const
Eigen::Matrix< Scalar, DimAtCompile, 1 > VectorT
int minimize(VectorT &x0)
const std::string & note() const
std::string get_code_string(int retCode)
void grad(std::vector< double > &g)
const VectorT & prev_p() const
void initialize(const VectorT &x0)
Primary template class for the metaprogram to compute the index type of a container.
int df(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, Eigen::Matrix< double, Eigen::Dynamic, 1 > &g)
Scalar rel_obj_decrease() const
BFGSBase::VectorT vector_t
const QNUpdateType & get_qnupdate() const
Scalar prev_step_size() const
int operator()(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f, Eigen::Matrix< double, Eigen::Dynamic, 1 > &g)
const Scalar & alpha() const
Eigen::Matrix< Scalar, DimAtCompile, DimAtCompile > HessianT
Scalar CubicInterp(const Scalar &df0, const Scalar &x1, const Scalar &f1, const Scalar &df1, const Scalar &loX, const Scalar &hiX)
Find the minima in an interval [loX, hiX] of a cubic function which interpolates the points...
LSOptions< Scalar > _ls_opts
double e()
Return the base of the natural logarithm.
const VectorT & prev_x() const
BFGSLineSearch(M &model, const std::vector< double > ¶ms_r, const std::vector< int > ¶ms_i, std::ostream *msgs=0)
const VectorT & curr_p() const
const Scalar & curr_f() const
double min(const double a, const double b)
void initialize(const std::vector< double > ¶ms_r)
int operator()(const Eigen::Matrix< double, Eigen::Dynamic, 1 > &x, double &f)
Scalar rel_grad_norm() const
const VectorT & curr_g() const
double lp_no_jacobian(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::ostream *o=0)
double log_prob_propto(const M &model, std::vector< double > ¶ms_r, std::vector< int > ¶ms_i, std::ostream *msgs=0)
Helper function to calculate log probability for double scalars up to a proportion.