// Copyright 2017 Daniel Parker // Distributed under the Boost license, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // See https://github.com/danielaparker/jsoncons for latest version #ifndef JSONCONS_BSON_BSON_PARSER_HPP #define JSONCONS_BSON_BSON_PARSER_HPP #include #include #include #include // std::move #include #include #include #include #include #include #include #include #include namespace jsoncons { namespace bson { enum class parse_mode {root,accept,document,array,value}; struct parse_state { parse_mode mode; std::size_t length; std::size_t pos; uint8_t type; std::size_t index; parse_state(parse_mode mode_, std::size_t length_, std::size_t pos_, uint8_t type_ = 0) noexcept : mode(mode_), length(length_), pos(pos_), type(type_), index(0) { } parse_state(const parse_state&) = default; parse_state(parse_state&&) = default; parse_state& operator=(const parse_state&) = default; parse_state& operator=(parse_state&&) = default; }; template > class basic_bson_parser : public ser_context { using char_type = char; using char_traits_type = std::char_traits; using temp_allocator_type = Allocator; using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; using byte_allocator_type = typename std::allocator_traits:: template rebind_alloc; using parse_state_allocator_type = typename std::allocator_traits:: template rebind_alloc; Source source_; bson_decode_options options_; bool more_; bool done_; std::vector bytes_buffer_; std::basic_string,char_allocator_type> text_buffer_; std::vector state_stack_; public: template basic_bson_parser(Sourceable&& source, const bson_decode_options& options = bson_decode_options(), const Allocator alloc = Allocator()) : source_(std::forward(source)), options_(options), more_(true), done_(false), text_buffer_(alloc), state_stack_(alloc) { state_stack_.emplace_back(parse_mode::root,0,0); } void restart() { more_ = true; } void reset() { more_ = true; done_ = false; bytes_buffer_.clear(); text_buffer_.clear(); state_stack_.clear(); state_stack_.emplace_back(parse_mode::root,0,0); } template void reset(Sourceable&& source) { source_ = std::forward(source); reset(); } bool done() const { return done_; } bool stopped() const { return !more_; } std::size_t line() const override { return 0; } std::size_t column() const override { return source_.position(); } void array_expected(json_visitor& visitor, std::error_code& ec) { if (state_stack_.size() == 2 && state_stack_.back().mode == parse_mode::document) { state_stack_.back().mode = parse_mode::array; more_ = visitor.begin_array(semantic_tag::none, *this, ec); } } void parse(json_visitor& visitor, std::error_code& ec) { if (JSONCONS_UNLIKELY(source_.is_error())) { ec = bson_errc::source_error; more_ = false; return; } while (!done_ && more_) { switch (state_stack_.back().mode) { case parse_mode::root: state_stack_.back().mode = parse_mode::accept; begin_document(visitor, ec); break; case parse_mode::document: { uint8_t type; std::size_t n = source_.read(&type, 1); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != 1)) { ec = bson_errc::unexpected_eof; more_ = false; return; } if (type != 0x00) { read_e_name(visitor,jsoncons::bson::bson_container_type::document,ec); state_stack_.back().mode = parse_mode::value; state_stack_.back().type = type; } else { end_document(visitor,ec); } break; } case parse_mode::array: { uint8_t type; std::size_t n = source_.read(&type, 1); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != 1)) { ec = bson_errc::unexpected_eof; more_ = false; return; } if (type != 0x00) { read_e_name(visitor,jsoncons::bson::bson_container_type::array,ec); read_value(visitor, type, ec); } else { end_array(visitor,ec); } break; } case parse_mode::value: state_stack_.back().mode = parse_mode::document; read_value(visitor,state_stack_.back().type,ec); break; case parse_mode::accept: { JSONCONS_ASSERT(state_stack_.size() == 1); state_stack_.clear(); more_ = false; done_ = true; visitor.flush(); break; } } } } private: void begin_document(json_visitor& visitor, std::error_code& ec) { if (JSONCONS_UNLIKELY(static_cast(state_stack_.size()) > options_.max_nesting_depth())) { ec = bson_errc::max_nesting_depth_exceeded; more_ = false; return; } uint8_t buf[sizeof(int32_t)]; size_t n = source_.read(buf, sizeof(int32_t)); if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) { ec = bson_errc::unexpected_eof; more_ = false; return; } auto length = binary::little_to_native(buf, sizeof(buf)); more_ = visitor.begin_object(semantic_tag::none, *this, ec); state_stack_.emplace_back(parse_mode::document,length,n); } void end_document(json_visitor& visitor, std::error_code& ec) { JSONCONS_ASSERT(state_stack_.size() >= 2); more_ = visitor.end_object(*this,ec); if (JSONCONS_UNLIKELY(state_stack_.back().pos != state_stack_.back().length)) { ec = bson_errc::size_mismatch; more_ = false; return; } std::size_t pos = state_stack_.back().pos; state_stack_.pop_back(); state_stack_.back().pos += pos; } void begin_array(json_visitor& visitor, std::error_code& ec) { if (JSONCONS_UNLIKELY(static_cast(state_stack_.size()) > options_.max_nesting_depth())) { ec = bson_errc::max_nesting_depth_exceeded; more_ = false; return; } uint8_t buf[sizeof(int32_t)]; std::size_t n = source_.read(buf, sizeof(int32_t)); if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) { ec = bson_errc::unexpected_eof; more_ = false; return; } auto length = binary::little_to_native(buf, sizeof(buf)); more_ = visitor.begin_array(semantic_tag::none, *this, ec); if (ec) { return; } state_stack_.emplace_back(parse_mode::array, length, n); } void end_array(json_visitor& visitor, std::error_code& ec) { JSONCONS_ASSERT(state_stack_.size() >= 2); more_ = visitor.end_array(*this, ec); if (JSONCONS_UNLIKELY(state_stack_.back().pos != state_stack_.back().length)) { ec = bson_errc::size_mismatch; more_ = false; return; } std::size_t pos = state_stack_.back().pos; state_stack_.pop_back(); state_stack_.back().pos += pos; } void read_e_name(json_visitor& visitor, jsoncons::bson::bson_container_type type, std::error_code& ec) { text_buffer_.clear(); read_cstring(ec); if (ec) { return; } if (type == jsoncons::bson::bson_container_type::document) { auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) { ec = bson_errc::invalid_utf8_text_string; more_ = false; return; } more_ = visitor.key(jsoncons::basic_string_view(text_buffer_.data(),text_buffer_.length()), *this, ec); } } void read_value(json_visitor& visitor, uint8_t type, std::error_code& ec) { switch (type) { case jsoncons::bson::bson_type::double_type: { uint8_t buf[sizeof(double)]; std::size_t n = source_.read(buf, sizeof(double)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(double))) { ec = bson_errc::unexpected_eof; more_ = false; return; } double res = binary::little_to_native(buf, sizeof(buf)); more_ = visitor.double_value(res, semantic_tag::none, *this, ec); break; } case jsoncons::bson::bson_type::symbol_type: case jsoncons::bson::bson_type::min_key_type: case jsoncons::bson::bson_type::max_key_type: case jsoncons::bson::bson_type::string_type: { text_buffer_.clear(); read_string(ec); if (ec) { return; } auto result = unicode_traits::validate(text_buffer_.data(), text_buffer_.size()); if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) { ec = bson_errc::invalid_utf8_text_string; more_ = false; return; } more_ = visitor.string_value(text_buffer_, semantic_tag::none, *this, ec); break; } case jsoncons::bson::bson_type::javascript_type: { text_buffer_.clear(); read_string(ec); if (ec) { return; } auto result = unicode_traits::validate(text_buffer_.data(), text_buffer_.size()); if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) { ec = bson_errc::invalid_utf8_text_string; more_ = false; return; } more_ = visitor.string_value(text_buffer_, semantic_tag::code, *this, ec); break; } case jsoncons::bson::bson_type::regex_type: { text_buffer_.clear(); text_buffer_.push_back('/'); read_cstring(ec); if (ec) { return; } text_buffer_.push_back('/'); read_cstring(ec); if (ec) { return; } more_ = visitor.string_value(text_buffer_, semantic_tag::regex, *this, ec); break; } case jsoncons::bson::bson_type::document_type: { begin_document(visitor,ec); break; } case jsoncons::bson::bson_type::array_type: { begin_array(visitor,ec); break; } case jsoncons::bson::bson_type::undefined_type: { more_ = visitor.null_value(semantic_tag::undefined, *this, ec); break; } case jsoncons::bson::bson_type::null_type: { more_ = visitor.null_value(semantic_tag::none, *this, ec); break; } case jsoncons::bson::bson_type::bool_type: { uint8_t c; std::size_t n = source_.read(&c, 1); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != 1)) { ec = bson_errc::unexpected_eof; more_ = false; return; } more_ = visitor.bool_value(c != 0, semantic_tag::none, *this, ec); break; } case jsoncons::bson::bson_type::int32_type: { uint8_t buf[sizeof(int32_t)]; std::size_t n = source_.read(buf, sizeof(int32_t)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) { ec = bson_errc::unexpected_eof; more_ = false; return; } auto val = binary::little_to_native(buf, sizeof(buf)); more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); break; } case jsoncons::bson::bson_type::timestamp_type: { uint8_t buf[sizeof(uint64_t)]; std::size_t n = source_.read(buf, sizeof(uint64_t)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(uint64_t))) { ec = bson_errc::unexpected_eof; more_ = false; return; } auto val = binary::little_to_native(buf, sizeof(buf)); more_ = visitor.uint64_value(val, semantic_tag::none, *this, ec); break; } case jsoncons::bson::bson_type::int64_type: { uint8_t buf[sizeof(int64_t)]; std::size_t n = source_.read(buf, sizeof(int64_t)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(int64_t))) { ec = bson_errc::unexpected_eof; more_ = false; return; } auto val = binary::little_to_native(buf, sizeof(buf)); more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); break; } case jsoncons::bson::bson_type::datetime_type: { uint8_t buf[sizeof(int64_t)]; std::size_t n = source_.read(buf, sizeof(int64_t)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(int64_t))) { ec = bson_errc::unexpected_eof; more_ = false; return; } auto val = binary::little_to_native(buf, sizeof(buf)); more_ = visitor.int64_value(val, semantic_tag::epoch_milli, *this, ec); break; } case jsoncons::bson::bson_type::binary_type: { uint8_t buf[sizeof(int32_t)]; std::size_t n = source_.read(buf, sizeof(int32_t)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) { ec = bson_errc::unexpected_eof; more_ = false; return; } const auto len = binary::little_to_native(buf, sizeof(buf)); if (JSONCONS_UNLIKELY(len < 0)) { ec = bson_errc::length_is_negative; more_ = false; return; } uint8_t subtype; n = source_.read(&subtype, 1); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != 1)) { ec = bson_errc::unexpected_eof; more_ = false; return; } bytes_buffer_.clear(); n = source_reader::read(source_, bytes_buffer_, len); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != static_cast(len))) { ec = bson_errc::unexpected_eof; more_ = false; return; } more_ = visitor.byte_string_value(bytes_buffer_, subtype, *this, ec); break; } case jsoncons::bson::bson_type::decimal128_type: { uint8_t buf[sizeof(uint64_t)*2]; std::size_t n = source_.read(buf, sizeof(buf)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(buf))) { ec = bson_errc::unexpected_eof; more_ = false; return; } decimal128_t dec; dec.low = binary::little_to_native(buf, sizeof(uint64_t)); dec.high = binary::little_to_native(buf+sizeof(uint64_t), sizeof(uint64_t)); text_buffer_.clear(); text_buffer_.resize(bson::decimal128_limits::buf_size); auto r = bson::decimal128_to_chars(&text_buffer_[0], &text_buffer_[0]+text_buffer_.size(), dec); more_ = visitor.string_value(string_view(text_buffer_.data(),static_cast(r.ptr-text_buffer_.data())), semantic_tag::float128, *this, ec); break; } case jsoncons::bson::bson_type::object_id_type: { uint8_t buf[12]; std::size_t n = source_.read(buf, sizeof(buf)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(buf))) { ec = bson_errc::unexpected_eof; more_ = false; return; } oid_t oid(buf); to_string(oid, text_buffer_); more_ = visitor.string_value(text_buffer_, semantic_tag::id, *this, ec); break; } default: { ec = bson_errc::unknown_type; more_ = false; return; } } } void read_cstring(std::error_code& ec) { uint8_t c = 0xff; while (true) { std::size_t n = source_.read(&c, 1); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != 1)) { ec = bson_errc::unexpected_eof; more_ = false; return; } if (c == 0) { break; } text_buffer_.push_back(c); } } void read_string(std::error_code& ec) { uint8_t buf[sizeof(int32_t)]; std::size_t n = source_.read(buf, sizeof(int32_t)); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) { ec = bson_errc::unexpected_eof; more_ = false; return; } auto len = binary::little_to_native(buf, sizeof(buf)); if (JSONCONS_UNLIKELY(len < 1)) { ec = bson_errc::string_length_is_non_positive; more_ = false; return; } std::size_t size = static_cast(len) - static_cast(1); n = source_reader::read(source_, text_buffer_, size); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != size)) { ec = bson_errc::unexpected_eof; more_ = false; return; } uint8_t c; n = source_.read(&c, 1); state_stack_.back().pos += n; if (JSONCONS_UNLIKELY(n != 1)) { ec = bson_errc::unexpected_eof; more_ = false; return; } } }; }} #endif