Stan  2.5.0
probability, sampling & optimization
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
json_parser.hpp
Go to the documentation of this file.
1 #ifndef STAN__IO__JSON__JSON_PARSER_HPP
2 #define STAN__IO__JSON__JSON_PARSER_HPP
3 
4 #include <stdexcept>
5 #include <iostream>
6 #include <istream>
7 #include <sstream>
8 #include <string>
9 
10 #include <boost/lexical_cast.hpp>
11 
13 
14 namespace stan {
15 
16  namespace json {
17 
18  namespace {
19 
20  const unsigned int MIN_HIGH_SURROGATE = 0xD800;
21  const unsigned int MAX_HIGH_SURROGATE = 0xDBFF;
22  const unsigned int MIN_LOW_SURROGATE = 0xDC00;
23  const unsigned int MAX_LOW_SURROGATE = 0xDFFF;
24  const unsigned int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
25 
26  inline bool is_high_surrogate(unsigned int cp) {
27  return (cp >= MIN_HIGH_SURROGATE && cp <= MAX_HIGH_SURROGATE);
28  }
29 
30  inline bool is_low_surrogate(unsigned int cp) {
31  return (cp >= MIN_LOW_SURROGATE && cp <= MAX_LOW_SURROGATE);
32  }
33 
34  inline bool is_whitespace(char c) {
35  return c == ' ' || c == '\n' || c == '\t' || c == '\r';
36  }
37 
44  template <typename Handler, bool Validate_UTF_8>
45  class parser {
46 
47  public:
48 
49  parser(Handler& h,
50  std::istream& in)
51  : h_(h),
52  in_(in),
53  next_char_(0),
54  line_(0),
55  column_(0)
56  { }
57 
58  ~parser() {
59  }
60 
61  void parse() {
62  h_.start_text();
63  parse_text();
64  h_.end_text();
65  }
66 
67  private:
68 
69  json_error json_exception(const std::string& msg) const {
70  std::stringstream ss;
71  ss << "Error in JSON parsing at"
72  << " line=" << line_ << " column=" << column_
73  << std::endl
74  << msg
75  << std::endl;
76  return json_error(ss.str());
77  }
78 
79  // JSON-text = object / array
80  void parse_text() {
81  char c = get_non_ws_char();
82  if (c == '{') { // begin-object
83  h_.start_object();
84  parse_object_members_end_object();
85  h_.end_object();
86  } else if (c == '[') { // begin-array
87  // array
88  h_.start_array();
89  parse_array_values_end_array();
90  h_.end_array();
91  } else {
92  throw json_exception("expecting start of object ({) or array ([)");
93  }
94  }
95 
96  // value = false / null / true / object / array / number / string
97  void parse_value() {
98  // value
99  char c = get_non_ws_char();
100  if (c == 'f') {
101  // false
102  parse_false_literal();
103  } else if (c == 'n') {
104  // null
105  parse_null_literal();
106  } else if (c == 't') {
107  // true
108  parse_true_literal();
109  } else if (c == '"') {
110  // string
111  h_.string(parse_string_chars_quotation_mark());
112  } else if (c == '{' || c == '[') {
113  // object / array
114  unget_char();
115  parse_text();
116  } else if (c == '-' ||
117  (c >= '0' && c <= '9') ) {
118  unget_char();
119  parse_number();
120  } else {
121  throw json_exception("illegal value, expecting object, array, number, string, or literal true/false/null");
122  }
123  }
124 
125  void parse_number() {
126  bool is_positive = true;
127 
128  std::stringstream ss;
129  char c = get_non_ws_char();
130  // minus
131  if (c == '-') {
132  is_positive = false;
133  ss << c;
134  c = get_char();
135  }
136 
137  // int
138  // zero / digit1-9
139  if (c < '0' || c > '9')
140  throw json_exception("expecting int part of number");
141  ss << c;
142 
143  // *DIGIT
144  bool leading_zero = (c == '0');
145  c = get_char();
146  if (leading_zero && (c == '0'))
147  throw json_exception("zero padded numbers not allowed");
148  while (c >= '0' && c <= '9') {
149  ss << c;
150  c = get_char();
151  }
152 
153  // frac
154  bool is_integer = true;
155  if (c == '.') {
156  is_integer = false;
157  ss << '.';
158  c = get_char();
159  if (c < '0' || c > '9')
160  throw json_exception("expected digit after decimal");
161  ss << c;
162  c = get_char();
163  while (c >= '0' && c <= '9') {
164  ss << c;
165  c = get_char();
166  }
167  }
168 
169  // exp
170  if (c == 'e' || c == 'E') {
171  is_integer = false;
172  ss << c;
173  c = get_char();
174  // minus / plus
175  if (c == '+' || c == '-') {
176  ss << c;
177  c = get_char();
178  }
179  // 1*DIGIT
180  if (c < '0' || c > '9')
181  throw json_exception("expected digit after e/E");
182  while (c >= '0' && c <= '9') {
183  ss << c;
184  c = get_char();
185  }
186  }
187  unget_char();
188 
189  if (is_integer) {
190  if (is_positive) {
191  unsigned long n;
192  try {
193  n = boost::lexical_cast<unsigned long>(ss.str());
194  } catch (const boost::bad_lexical_cast & ) {
195  throw json_exception("number exceeds integer range");
196  }
197  ss >> n;
198  h_.number_unsigned_long(n);
199  } else {
200  long n;
201  try {
202  n = boost::lexical_cast<unsigned long>(ss.str());
203  } catch (const boost::bad_lexical_cast & ) {
204  throw json_exception("number exceeds integer range");
205  }
206  ss >> n;
207  h_.number_long(n);
208  }
209  } else {
210  double x;
211  try {
212  x = boost::lexical_cast<double>(ss.str());
213  } catch (const boost::bad_lexical_cast & ) {
214  throw json_exception("number exceeds double range");
215  }
216  ss >> x;
217  h_.number_double(x);
218  }
219  }
220 
221  std::string parse_string_chars_quotation_mark() {
222  std::stringstream s;
223  while (true) {
224  char c = get_char();
225  if (c == '"') {
226  return s.str();
227  } else if (c == '\\') {
228  c = get_char();
229  if (c == '\\' || c == '/' || c == '"') {
230  s << c;
231  } else if (c == 'b') {
232  s << '\b';
233  } else if (c == 'f') {
234  s << '\f';
235  } else if (c == 'n') {
236  s << '\n';
237  } else if (c == 'r') {
238  s << '\r';
239  } else if (c == 't') {
240  s << '\t';
241  } else if (c == 'u') {
242  get_escaped_unicode(s);
243  } else {
244  throw json_exception("expecting legal escape");
245  }
246  continue;
247  } else if (c > 0 && c < 0x20) { // ASCII control characters
248  throw json_exception("found control character, "
249  "char values less than U+0020 must be \\u escaped");
250  }
251  s << c;
252  }
253  }
254 
255  void parse_true_literal() {
256  get_chars("rue");
257  h_.boolean(true);
258  }
259 
260  void parse_false_literal() {
261  get_chars("alse");
262  h_.boolean(false);
263  }
264 
265  void parse_null_literal() {
266  get_chars("ull");
267  h_.null();
268  }
269 
270  void get_escaped_unicode(std::stringstream& s) {
271  unsigned int codepoint = get_int_as_hex_chars();
272  if (!(is_high_surrogate(codepoint) || is_low_surrogate(codepoint))) {
273  putCodepoint(s,codepoint);
274  } else if (!is_high_surrogate(codepoint)) {
275  throw json_exception("illegal unicode values, "
276  "found low-surrogate, missing high-surrogate");
277  } else {
278  char c = get_char();
279  if (! (c == '\\'))
280  throw json_exception("illegal unicode values, "
281  "found high-surrogate, expecting low-surrogate");
282  c = get_char();
283  if (! (c == 'u'))
284  throw json_exception("illegal unicode values, "
285  "found high-surrogate, expecting low-surrogate");
286  unsigned int codepoint2 = get_int_as_hex_chars();
287  unsigned int supplemental
288  = ((codepoint - MIN_HIGH_SURROGATE) << 10)
289  + (codepoint2 - MIN_LOW_SURROGATE)
290  + MIN_SUPPLEMENTARY_CODE_POINT;
291  putCodepoint(s,supplemental);
292  }
293  }
294 
295  unsigned int get_int_as_hex_chars() {
296  std::stringstream s;
297  s << std::hex;
298  for (int i=0; i<4; i++) {
299  char c = get_char();
300  if (! ((c >= 'a' && c<= 'f')
301  || (c >= 'A' && c<= 'F')
302  || (c >= '0' && c<= '9')))
303  throw json_exception("illegal unicode code point");
304  s << c;
305  }
306  unsigned int hex;
307  s >> hex;
308  return hex;
309  }
310 
311  void putCodepoint(std::stringstream& s, unsigned int codepoint){
312  if (codepoint <= 0x7f)
313  s.put(codepoint);
314  else if (codepoint <= 0x7ff) {
315  s.put(0xc0 | ((codepoint >> 6) & 0x1f));
316  s.put(0x80 | (codepoint & 0x3f));
317  } else if (codepoint <= 0xffff) {
318  s.put(0xe0 | ((codepoint >> 12) & 0x0f));
319  s.put(0x80 | ((codepoint >> 6) & 0x3f));
320  s.put(0x80 | (codepoint & 0x3f));
321  } else {
322  s.put(0xf0 | ((codepoint >> 18) & 0x07));
323  s.put(0x80 | ((codepoint >> 12) & 0x3f));
324  s.put(0x80 | ((codepoint >> 6) & 0x3f));
325  s.put(0x80 | (codepoint & 0x3f));
326  }
327  }
328 
329  void get_chars(const std::string& s) {
330  for (size_t i = 0; i < s.size(); ++i) {
331  char c = get_char();
332  if (c != s[i])
333  throw json_exception("expecting rest of literal: "
334  + s.substr(i));
335  }
336  }
337 
338  void parse_array_values_end_array() {
339  char c = get_non_ws_char();
340  if (c == ']') return;
341  unget_char();
342  while (true) {
343  parse_value();
344  char c = get_non_ws_char();
345  if (c == ']') return;
346  if (c != ',') {
347  throw json_exception("in array, expecting ] or ,");
348  }
349  c = get_non_ws_char();
350  if (c == ']')
351  throw json_exception("in array, expecting value");
352  unget_char();
353  }
354  }
355 
356  void parse_object_members_end_object() {
357  char c = get_non_ws_char();
358  if (c == '}') return;
359  while (true) {
360  // string (key)
361  if (c != '"')
362  throw json_exception("expecting member key"
363  " or end of object marker (})");
364  std::string key = parse_string_chars_quotation_mark();
365  h_.key(key);
366  // name-separator separator
367  c = get_non_ws_char();
368  if (c != ':')
369  throw json_exception("expecting key-value separator :");
370  // value
371  parse_value();
372 
373  // continuation
374  c = get_non_ws_char();
375  if (c == '}')
376  return;
377  if (c != ',')
378  throw json_exception("expecting end of object } or separator ,");
379  c = get_non_ws_char();
380  }
381  }
382 
383  char get_char() {
384  char c = in_.get();
385  if (!in_.good())
386  throw json_exception("unexpected end of stream");
387  if (c == '\n') {
388  ++line_;
389  column_ = 1;
390  } else {
391  ++column_;
392  }
393  return c;
394  }
395 
396  char get_non_ws_char() {
397  while (true) {
398  char c = get_char();
399  if (is_whitespace(c)) continue;
400  return c;
401  }
402  }
403 
404  void unget_char() {
405  in_.unget();
406  --column_;
407  }
408 
409  Handler& h_;
410  std::istream& in_;
412  size_t line_;
413  size_t column_;
414  };
415 
416  }
417 
428  template <bool Validate_UTF_8, typename Handler>
429  void parse(std::istream& in,
430  Handler& handler) {
431  parser<Handler,Validate_UTF_8>(handler,in).parse();
432  }
433 
444  template <typename Handler>
445  void parse(std::istream& in,
446  Handler& handler) {
447  parse<false>(in,handler);
448  }
449 
450  }
451 }
452 
453 #endif
std::istream & in_
size_t line_
void parse(std::istream &in, Handler &handler)
Parse the JSON text represented by the specified input stream, sending events to the specified handle...
Handler & h_
char next_char_
size_t column_

     [ Stan Home Page ] © 2011–2014, Stan Development Team.