1 /*************************************************************************
6 * [2011] - [2016] Realm Inc
9 * NOTICE: All information contained herein is, and remains
10 * the property of Realm Incorporated and its suppliers,
11 * if any. The intellectual and technical concepts contained
12 * herein are proprietary to Realm Incorporated
13 * and its suppliers and may be covered by U.S. and Foreign Patents,
14 * patents in process, and are protected by trade secret or copyright law.
15 * Dissemination of this information or reproduction of this material
16 * is strictly forbidden unless prior written permission is obtained
17 * from Realm Incorporated.
19 **************************************************************************/
21 #ifndef REALM_UTIL_JSON_PARSER_HPP
22 #define REALM_UTIL_JSON_PARSER_HPP
24 #include <system_error>
29 #include <realm/string_data.hpp>
34 /// A JSON parser that neither allocates heap memory nor throws exceptions.
36 /// The parser takes as input a range of characters, and emits a stream of events
37 /// representing the structure of the JSON document.
39 /// Parser errors are represented as `std::error_condition`s.
42 using InputIterator = const char*;
44 enum class EventType {
55 using Range = StringData;
60 Event(EventType type): type(type) {}
67 StringData escaped_string_value() const noexcept;
69 /// Unescape the string value into \a buffer.
70 /// The type of this event must be EventType::string.
72 /// \param buffer is a pointer to a buffer big enough to hold the
73 /// unescaped string value. The unescaped string is guaranteed to be
74 /// shorter than the escaped string, so escaped_string_value().size() can
75 /// be used as an upper bound. Unicode sequences of the form "\uXXXX"
76 /// will be converted to UTF-8 sequences. Note that the escaped form of
77 /// a unicode point takes exactly 6 bytes, which is also the maximum
78 /// possible length of a UTF-8 encoded codepoint.
79 StringData unescape_string(char* buffer) const noexcept;
84 unexpected_end_of_stream = 2
87 JSONParser(StringData);
89 /// Parse the input data, and call f repeatedly with an argument of type Event
90 /// representing the token that the parser encountered.
92 /// The stream of events is "flat", which is to say that it is the responsibility
93 /// of the function f to keep track of any nested object structures as it deems
96 /// This function is guaranteed to never throw, as long as f never throws.
98 std::error_condition parse(F&& f) noexcept(noexcept(f(std::declval<Event>())));
100 class ErrorCategory: public std::error_category {
102 const char* name() const noexcept final;
103 std::string message(int) const final;
105 static const ErrorCategory error_category;
123 InputIterator m_current;
127 std::error_condition parse_object(F&& f) noexcept(noexcept(f(std::declval<Event>())));
129 std::error_condition parse_pair(F&& f) noexcept(noexcept(f(std::declval<Event>())));
131 std::error_condition parse_array(F&& f) noexcept(noexcept(f(std::declval<Event>())));
133 std::error_condition parse_number(F&& f) noexcept(noexcept(f(std::declval<Event>())));
135 std::error_condition parse_string(F&& f) noexcept(noexcept(f(std::declval<Event>())));
137 std::error_condition parse_value(F&& f) noexcept(noexcept(f(std::declval<Event>())));
139 std::error_condition parse_boolean(F&& f) noexcept(noexcept(f(std::declval<Event>())));
141 std::error_condition parse_null(F&& f) noexcept(noexcept(f(std::declval<Event>())));
143 std::error_condition expect_token(char, Range& out_range) noexcept;
144 std::error_condition expect_token(Token, Range& out_range) noexcept;
146 // Returns true unless EOF was reached.
147 bool peek_char(char& out_c) noexcept;
148 bool peek_token(Token& out_t) noexcept;
149 bool is_whitespace(Token t) noexcept;
150 void skip_whitespace() noexcept;
153 std::error_condition make_error_condition(JSONParser::Error e);
160 struct is_error_condition_enum<realm::util::JSONParser::Error> {
161 static const bool value = true;
171 inline JSONParser::JSONParser(StringData input):
172 m_current(input.data()), m_end(input.data() + input.size())
177 std::error_condition JSONParser::parse(F&& f) noexcept(noexcept(f(std::declval<Event>())))
179 return parse_value(f);
183 std::error_condition JSONParser::parse_object(F&& f) noexcept(noexcept(f(std::declval<Event>())))
185 Event event{EventType::object_begin};
186 auto ec = expect_token(Token::object_begin, event.range);
194 ec = expect_token(Token::object_end, event.range);
197 event.type = EventType::object_end;
204 if (ec != Error::unexpected_token)
215 if (t == Token::object_end) {
216 // Fine, will terminate on next iteration
218 else if (t == Token::comma)
219 ++m_current; // OK, because peek_char returned true
221 return Error::unexpected_token;
224 return Error::unexpected_end_of_stream;
228 return std::error_condition{};
232 std::error_condition JSONParser::parse_pair(F&& f) noexcept(noexcept(f(std::declval<Event>())))
236 auto ec = parse_string(f);
244 if (t == Token::colon) {
248 return Error::unexpected_token;
252 return parse_value(f);
256 std::error_condition JSONParser::parse_array(F&& f) noexcept(noexcept(f(std::declval<Event>())))
258 Event event{EventType::array_begin};
259 auto ec = expect_token(Token::array_begin, event.range);
267 ec = expect_token(Token::array_end, event.range);
270 event.type = EventType::array_end;
277 if (ec != Error::unexpected_token)
288 if (t == Token::array_end) {
289 // Fine, will terminate next iteration.
291 else if (t == Token::comma)
292 ++m_current; // OK, because peek_char returned true
294 return Error::unexpected_token;
297 return Error::unexpected_end_of_stream;
301 return std::error_condition{};
305 std::error_condition JSONParser::parse_number(F&& f) noexcept(noexcept(f(std::declval<Event>())))
307 static const size_t buffer_size = 64;
308 char buffer[buffer_size] = {0};
309 size_t bytes_to_copy = std::min<size_t>(m_end - m_current, buffer_size - 1);
310 if (bytes_to_copy == 0)
311 return Error::unexpected_end_of_stream;
313 if (std::isspace(*m_current)) {
314 // JSON has a different idea of what constitutes whitespace than isspace(),
315 // but strtod() uses isspace() to skip initial whitespace. We have already
316 // skipped whitespace that JSON considers valid, so if there is any whitespace
317 // at m_current now, it is invalid according to JSON, and so is an error.
318 return Error::unexpected_token;
321 switch (m_current[0]) {
323 // strtod() parses "NAN", JSON does not.
325 // strtod() parses "INF", JSON does not.
328 // strtod() may parse exponent notation, JSON does not.
329 return Error::unexpected_token;
331 if (bytes_to_copy > 2 && (m_current[1] == 'x' || m_current[1] == 'X')) {
332 // strtod() parses hexadecimal, JSON does not.
333 return Error::unexpected_token;
337 std::copy(m_current, m_current + bytes_to_copy, buffer);
339 char* endp = nullptr;
340 Event event{EventType::number};
341 event.number = std::strtod(buffer, &endp);
343 if (endp == buffer) {
344 return Error::unexpected_token;
346 size_t num_bytes_consumed = endp - buffer;
347 m_current += num_bytes_consumed;
352 std::error_condition JSONParser::parse_string(F&& f) noexcept(noexcept(f(std::declval<Event>())))
354 InputIterator p = m_current;
356 return Error::unexpected_end_of_stream;
358 auto count_num_escapes_backwards = [](const char* p, const char* begin) -> size_t {
360 for (; p > begin && *p == Token::escape; ++p)
365 Token t = static_cast<Token>(*p);
366 InputIterator inner_end;
367 if (t == Token::dquote) {
368 inner_end = m_current;
370 inner_end = std::find(inner_end + 1, m_end, Token::dquote);
371 if (inner_end == m_end)
372 return Error::unexpected_end_of_stream;
373 } while (count_num_escapes_backwards(inner_end - 1, m_current) % 2 == 1);
375 Event event{EventType::string};
376 event.range = Range(m_current, inner_end - m_current + 1);
377 m_current = inner_end + 1;
380 return Error::unexpected_token;
384 std::error_condition JSONParser::parse_boolean(F&& f) noexcept(noexcept(f(std::declval<Event>())))
386 auto first_nonalpha = std::find_if_not(m_current, m_end, [](auto c) { return std::isalpha(c); });
388 Event event{EventType::boolean};
389 event.range = Range(m_current, first_nonalpha - m_current);
390 if (event.range == "true") {
391 event.boolean = true;
395 else if (event.range == "false") {
396 event.boolean = false;
401 return Error::unexpected_token;
405 std::error_condition JSONParser::parse_null(F&& f) noexcept(noexcept(f(std::declval<Event>())))
407 auto first_nonalpha = std::find_if_not(m_current, m_end, [](auto c) { return std::isalpha(c); });
409 Event event{EventType::null};
410 event.range = Range(m_current, first_nonalpha - m_current);
411 if (event.range == "null") {
416 return Error::unexpected_token;
420 std::error_condition JSONParser::parse_value(F&& f) noexcept(noexcept(f(std::declval<Event>())))
424 if (m_current >= m_end)
425 return Error::unexpected_end_of_stream;
427 if (*m_current == Token::object_begin)
428 return parse_object(f);
430 if (*m_current == Token::array_begin)
431 return parse_array(f);
433 if (*m_current == 't' || *m_current == 'f')
434 return parse_boolean(f);
436 if (*m_current == 'n')
437 return parse_null(f);
439 if (*m_current == Token::dquote)
440 return parse_string(f);
442 return parse_number(f);
446 bool JSONParser::is_whitespace(Token t) noexcept
460 void JSONParser::skip_whitespace() noexcept
462 while (m_current < m_end && is_whitespace(static_cast<Token>(*m_current)))
467 std::error_condition JSONParser::expect_token(char c, Range& out_range) noexcept
470 if (m_current == m_end)
471 return Error::unexpected_end_of_stream;
472 if (*m_current == c) {
473 out_range = Range(m_current, 1);
475 return std::error_condition{};
477 return Error::unexpected_token;
481 std::error_condition JSONParser::expect_token(Token t, Range& out_range) noexcept
483 return expect_token(static_cast<char>(t), out_range);
487 bool JSONParser::peek_char(char& out_c) noexcept
489 if (m_current < m_end) {
497 bool JSONParser::peek_token(Token& out_t) noexcept
499 if (m_current < m_end) {
500 out_t = static_cast<Token>(*m_current);
507 StringData JSONParser::Event::escaped_string_value() const noexcept
509 REALM_ASSERT(type == EventType::string);
510 REALM_ASSERT(range.size() >= 2);
511 return StringData(range.data() + 1, range.size() - 2);
515 OS& operator<<(OS& os, JSONParser::EventType type)
518 case JSONParser::EventType::number: os << "number"; return os;
519 case JSONParser::EventType::string: os << "string"; return os;
520 case JSONParser::EventType::boolean: os << "boolean"; return os;
521 case JSONParser::EventType::null: os << "null"; return os;
522 case JSONParser::EventType::array_begin: os << "["; return os;
523 case JSONParser::EventType::array_end: os << "]"; return os;
524 case JSONParser::EventType::object_begin: os << "{"; return os;
525 case JSONParser::EventType::object_end: os << "}"; return os;
531 OS& operator<<(OS& os, const JSONParser::Event& e) {
534 case JSONParser::EventType::number: return os << "(" << e.number << ")";
535 case JSONParser::EventType::string: return os << "(" << e.range << ")";
536 case JSONParser::EventType::boolean: return os << "(" << e.boolean << ")";
544 #endif // REALM_UTIL_JSON_PARSER_HPP