Stan  2.5.0
probability, sampling & optimization
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
log_softmax.hpp
Go to the documentation of this file.
1 #ifndef STAN__AGRAD__REV__MATRIX__LOG_SOFTMAX_HPP
2 #define STAN__AGRAD__REV__MATRIX__LOG_SOFTMAX_HPP
3 
4 #include <cmath>
5 #include <vector>
6 #include <stdexcept>
10 #include <stan/agrad/rev/var.hpp>
12 
13 namespace stan {
14  namespace agrad {
15 
16  namespace {
17 
18  class log_softmax_elt_vari : public vari {
19  private:
20  vari** alpha_;
21  const double* softmax_alpha_;
22  const int size_; // array sizes
23  const int idx_; // in in softmax output
24  public:
25  log_softmax_elt_vari(double val,
26  vari** alpha,
27  const double* softmax_alpha,
28  int size,
29  int idx)
30  : vari(val),
31  alpha_(alpha),
32  softmax_alpha_(softmax_alpha),
33  size_(size),
34  idx_(idx) {
35  }
36  void chain() {
37  for (int m = 0; m < size_; ++m) {
38  if (m == idx_)
39  alpha_[m]->adj_ += adj_ * (1 - softmax_alpha_[m]);
40  else
41  alpha_[m]->adj_ -= adj_ * softmax_alpha_[m];
42  }
43  }
44  };
45 
46  }
47 
48 
59  inline Eigen::Matrix<var,Eigen::Dynamic,1>
60  log_softmax(const Eigen::Matrix<var,Eigen::Dynamic,1>& alpha) {
61  using Eigen::Matrix;
62  using Eigen::Dynamic;
63 
64  stan::math::check_nonzero_size("log_softmax(%1%)",alpha,"alpha",(double*)0);
65 
66  if (alpha.size() == 0)
67  throw std::domain_error("arg vector to log_softmax() must have size > 0");
68  if (alpha.size() == 0)
69  throw std::domain_error("arg vector to log_softmax() must have size > 0");
70 
71  if (alpha.size() == 0)
72  throw std::domain_error("arg vector to log_softmax() must have size > 0");
73 
74  vari** alpha_vi_array
75  = (vari**) agrad::chainable::operator new(sizeof(vari*) * alpha.size());
76  for (int i = 0; i < alpha.size(); ++i)
77  alpha_vi_array[i] = alpha(i).vi_;
78 
79 
80  Matrix<double,Dynamic,1> alpha_d(alpha.size());
81  for (int i = 0; i < alpha_d.size(); ++i)
82  alpha_d(i) = alpha(i).val();
83 
84  // fold logic of math::softmax() and math::log_softmax() to save computations
85 
86  Matrix<double,Dynamic,1> softmax_alpha_d(alpha_d.size());
87  Matrix<double,Dynamic,1> log_softmax_alpha_d(alpha_d.size());
88 
89  double max_v = alpha_d.maxCoeff();
90 
91  double sum = 0.0;
92  for (int i = 0; i < alpha_d.size(); ++i) {
93  softmax_alpha_d(i) = std::exp(alpha_d(i) - max_v);
94  sum += softmax_alpha_d(i);
95  }
96 
97  for (int i = 0; i < alpha_d.size(); ++i)
98  softmax_alpha_d(i) /= sum;
99  double log_sum = std::log(sum);
100 
101  for (int i = 0; i < alpha_d.size(); ++i)
102  log_softmax_alpha_d(i) = (alpha_d(i) - max_v) - log_sum;
103 
104  // end fold
105 
106  double* softmax_alpha_d_array
107  = (double*) agrad::chainable::operator new(sizeof(double) * alpha_d.size());
108 
109  for (int i = 0; i < alpha_d.size(); ++i)
110  softmax_alpha_d_array[i] = softmax_alpha_d(i);
111 
112  Matrix<var,Dynamic,1> log_softmax_alpha(alpha.size());
113  for (int k = 0; k < log_softmax_alpha.size(); ++k)
114  log_softmax_alpha(k) = var(new log_softmax_elt_vari(log_softmax_alpha_d[k],
115  alpha_vi_array,
116  softmax_alpha_d_array,
117  alpha.size(),
118  k));
119  return log_softmax_alpha;
120  }
121 
122 
123  }
124 }
125 
126 #endif
const int idx_
Definition: log_softmax.hpp:23
Eigen::Matrix< fvar< T >, Eigen::Dynamic, 1 > log_softmax(const Eigen::Matrix< fvar< T >, Eigen::Dynamic, 1 > &alpha)
Definition: log_softmax.hpp:16
fvar< T > sum(const Eigen::Matrix< fvar< T >, R, C > &m)
Definition: sum.hpp:14
The variable implementation base class.
Definition: vari.hpp:28
vari ** alpha_
Definition: log_softmax.hpp:20
const int size_
Definition: log_softmax.hpp:22
Independent (input) and dependent (output) variables for gradients.
Definition: var.hpp:27
int size(const std::vector< T > &x)
Definition: size.hpp:11
const double * softmax_alpha_
Definition: log_softmax.hpp:21
fvar< T > log(const fvar< T > &x)
Definition: log.hpp:15
fvar< T > exp(const fvar< T > &x)
Definition: exp.hpp:16
bool check_nonzero_size(const char *function, const T_y &y, const char *name, T_result *result)
Return true if the specified matrix/vector is of non-zero size.

     [ Stan Home Page ] © 2011–2014, Stan Development Team.