diff options
Diffstat (limited to 'include/jsoncons_ext/bson/bson_parser.hpp')
-rw-r--r-- | include/jsoncons_ext/bson/bson_parser.hpp | 645 |
1 files changed, 645 insertions, 0 deletions
diff --git a/include/jsoncons_ext/bson/bson_parser.hpp b/include/jsoncons_ext/bson/bson_parser.hpp new file mode 100644 index 0000000..2dc6e75 --- /dev/null +++ b/include/jsoncons_ext/bson/bson_parser.hpp @@ -0,0 +1,645 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_PARSER_HPP +#define JSONCONS_BSON_BSON_PARSER_HPP + +#include <string> +#include <vector> +#include <memory> +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/bson/bson_type.hpp> +#include <jsoncons_ext/bson/bson_decimal128.hpp> +#include <jsoncons_ext/bson/bson_error.hpp> +#include <jsoncons_ext/bson/bson_options.hpp> +#include <jsoncons_ext/bson/bson_oid.hpp> + +namespace jsoncons { namespace bson { + +enum class parse_mode {root,accept,document,array,value}; + +struct parse_state +{ + parse_mode mode; + std::size_t length; + std::size_t pos; + uint8_t type; + std::size_t index; + + parse_state(parse_mode mode_, std::size_t length_, std::size_t pos_, uint8_t type_ = 0) noexcept + : mode(mode_), length(length_), pos(pos_), type(type_), index(0) + { + } + + parse_state(const parse_state&) = default; + parse_state(parse_state&&) = default; + parse_state& operator=(const parse_state&) = default; + parse_state& operator=(parse_state&&) = default; +}; + +template <class Source,class Allocator=std::allocator<char>> +class basic_bson_parser : public ser_context +{ + using char_type = char; + using char_traits_type = std::char_traits<char>; + using temp_allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<char_type>; + using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>; + using parse_state_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_state>; + + Source source_; + bson_decode_options options_; + bool more_; + bool done_; + std::vector<uint8_t,byte_allocator_type> bytes_buffer_; + std::basic_string<char,std::char_traits<char>,char_allocator_type> text_buffer_; + std::vector<parse_state,parse_state_allocator_type> state_stack_; +public: + template <class Sourceable> + basic_bson_parser(Sourceable&& source, + const bson_decode_options& options = bson_decode_options(), + const Allocator alloc = Allocator()) + : source_(std::forward<Sourceable>(source)), + options_(options), + more_(true), + done_(false), + text_buffer_(alloc), + state_stack_(alloc) + { + state_stack_.emplace_back(parse_mode::root,0,0); + } + + void restart() + { + more_ = true; + } + + void reset() + { + more_ = true; + done_ = false; + bytes_buffer_.clear(); + text_buffer_.clear(); + state_stack_.clear(); + state_stack_.emplace_back(parse_mode::root,0,0); + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + source_ = std::forward<Sourceable>(source); + reset(); + } + + bool done() const + { + return done_; + } + + bool stopped() const + { + return !more_; + } + + std::size_t line() const override + { + return 0; + } + + std::size_t column() const override + { + return source_.position(); + } + + void array_expected(json_visitor& visitor, std::error_code& ec) + { + if (state_stack_.size() == 2 && state_stack_.back().mode == parse_mode::document) + { + state_stack_.back().mode = parse_mode::array; + more_ = visitor.begin_array(semantic_tag::none, *this, ec); + } + } + + void parse(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(source_.is_error())) + { + ec = bson_errc::source_error; + more_ = false; + return; + } + + while (!done_ && more_) + { + switch (state_stack_.back().mode) + { + case parse_mode::root: + state_stack_.back().mode = parse_mode::accept; + begin_document(visitor, ec); + break; + case parse_mode::document: + { + uint8_t type; + std::size_t n = source_.read(&type, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + if (type != 0x00) + { + read_e_name(visitor,jsoncons::bson::bson_container_type::document,ec); + state_stack_.back().mode = parse_mode::value; + state_stack_.back().type = type; + } + else + { + end_document(visitor,ec); + } + break; + } + case parse_mode::array: + { + uint8_t type; + std::size_t n = source_.read(&type, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + if (type != 0x00) + { + read_e_name(visitor,jsoncons::bson::bson_container_type::array,ec); + read_value(visitor, type, ec); + } + else + { + end_array(visitor,ec); + } + break; + } + case parse_mode::value: + state_stack_.back().mode = parse_mode::document; + read_value(visitor,state_stack_.back().type,ec); + break; + case parse_mode::accept: + { + JSONCONS_ASSERT(state_stack_.size() == 1); + state_stack_.clear(); + more_ = false; + done_ = true; + visitor.flush(); + break; + } + } + } + } + +private: + + void begin_document(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(static_cast<int>(state_stack_.size()) > options_.max_nesting_depth())) + { + ec = bson_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + + uint8_t buf[sizeof(int32_t)]; + size_t n = source_.read(buf, sizeof(int32_t)); + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + auto length = binary::little_to_native<int32_t>(buf, sizeof(buf)); + + more_ = visitor.begin_object(semantic_tag::none, *this, ec); + state_stack_.emplace_back(parse_mode::document,length,n); + } + + void end_document(json_visitor& visitor, std::error_code& ec) + { + JSONCONS_ASSERT(state_stack_.size() >= 2); + + more_ = visitor.end_object(*this,ec); + if (JSONCONS_UNLIKELY(state_stack_.back().pos != state_stack_.back().length)) + { + ec = bson_errc::size_mismatch; + more_ = false; + return; + } + std::size_t pos = state_stack_.back().pos; + state_stack_.pop_back(); + state_stack_.back().pos += pos; + } + + void begin_array(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(static_cast<int>(state_stack_.size()) > options_.max_nesting_depth())) + { + ec = bson_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + uint8_t buf[sizeof(int32_t)]; + std::size_t n = source_.read(buf, sizeof(int32_t)); + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto length = binary::little_to_native<int32_t>(buf, sizeof(buf)); + + more_ = visitor.begin_array(semantic_tag::none, *this, ec); + if (ec) + { + return; + } + state_stack_.emplace_back(parse_mode::array, length, n); + } + + void end_array(json_visitor& visitor, std::error_code& ec) + { + JSONCONS_ASSERT(state_stack_.size() >= 2); + + more_ = visitor.end_array(*this, ec); + if (JSONCONS_UNLIKELY(state_stack_.back().pos != state_stack_.back().length)) + { + ec = bson_errc::size_mismatch; + more_ = false; + return; + } + std::size_t pos = state_stack_.back().pos; + state_stack_.pop_back(); + state_stack_.back().pos += pos; + } + + void read_e_name(json_visitor& visitor, jsoncons::bson::bson_container_type type, std::error_code& ec) + { + text_buffer_.clear(); + read_cstring(ec); + if (ec) + { + return; + } + if (type == jsoncons::bson::bson_container_type::document) + { + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) + { + ec = bson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.key(jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()), *this, ec); + } + } + + void read_value(json_visitor& visitor, uint8_t type, std::error_code& ec) + { + switch (type) + { + case jsoncons::bson::bson_type::double_type: + { + uint8_t buf[sizeof(double)]; + std::size_t n = source_.read(buf, sizeof(double)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(double))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + double res = binary::little_to_native<double>(buf, sizeof(buf)); + more_ = visitor.double_value(res, semantic_tag::none, *this, ec); + break; + } + case jsoncons::bson::bson_type::symbol_type: + case jsoncons::bson::bson_type::min_key_type: + case jsoncons::bson::bson_type::max_key_type: + case jsoncons::bson::bson_type::string_type: + { + text_buffer_.clear(); + read_string(ec); + if (ec) + { + return; + } + auto result = unicode_traits::validate(text_buffer_.data(), text_buffer_.size()); + if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) + { + ec = bson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::none, *this, ec); + break; + } + case jsoncons::bson::bson_type::javascript_type: + { + text_buffer_.clear(); + read_string(ec); + if (ec) + { + return; + } + auto result = unicode_traits::validate(text_buffer_.data(), text_buffer_.size()); + if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) + { + ec = bson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::code, *this, ec); + break; + } + case jsoncons::bson::bson_type::regex_type: + { + text_buffer_.clear(); + text_buffer_.push_back('/'); + read_cstring(ec); + if (ec) + { + return; + } + text_buffer_.push_back('/'); + read_cstring(ec); + if (ec) + { + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::regex, *this, ec); + break; + } + case jsoncons::bson::bson_type::document_type: + { + begin_document(visitor,ec); + break; + } + + case jsoncons::bson::bson_type::array_type: + { + begin_array(visitor,ec); + break; + } + case jsoncons::bson::bson_type::undefined_type: + { + more_ = visitor.null_value(semantic_tag::undefined, *this, ec); + break; + } + case jsoncons::bson::bson_type::null_type: + { + more_ = visitor.null_value(semantic_tag::none, *this, ec); + break; + } + case jsoncons::bson::bson_type::bool_type: + { + uint8_t c; + std::size_t n = source_.read(&c, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + more_ = visitor.bool_value(c != 0, semantic_tag::none, *this, ec); + break; + } + case jsoncons::bson::bson_type::int32_type: + { + uint8_t buf[sizeof(int32_t)]; + std::size_t n = source_.read(buf, sizeof(int32_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto val = binary::little_to_native<int32_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::bson::bson_type::timestamp_type: + { + uint8_t buf[sizeof(uint64_t)]; + std::size_t n = source_.read(buf, sizeof(uint64_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(uint64_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto val = binary::little_to_native<uint64_t>(buf, sizeof(buf)); + more_ = visitor.uint64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::bson::bson_type::int64_type: + { + uint8_t buf[sizeof(int64_t)]; + std::size_t n = source_.read(buf, sizeof(int64_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int64_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto val = binary::little_to_native<int64_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::bson::bson_type::datetime_type: + { + uint8_t buf[sizeof(int64_t)]; + std::size_t n = source_.read(buf, sizeof(int64_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int64_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto val = binary::little_to_native<int64_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::epoch_milli, *this, ec); + break; + } + case jsoncons::bson::bson_type::binary_type: + { + uint8_t buf[sizeof(int32_t)]; + std::size_t n = source_.read(buf, sizeof(int32_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + const auto len = binary::little_to_native<int32_t>(buf, sizeof(buf)); + if (JSONCONS_UNLIKELY(len < 0)) + { + ec = bson_errc::length_is_negative; + more_ = false; + return; + } + uint8_t subtype; + n = source_.read(&subtype, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + bytes_buffer_.clear(); + n = source_reader<Source>::read(source_, bytes_buffer_, len); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != static_cast<std::size_t>(len))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + more_ = visitor.byte_string_value(bytes_buffer_, + subtype, + *this, + ec); + break; + } + case jsoncons::bson::bson_type::decimal128_type: + { + uint8_t buf[sizeof(uint64_t)*2]; + std::size_t n = source_.read(buf, sizeof(buf)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(buf))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + decimal128_t dec; + dec.low = binary::little_to_native<uint64_t>(buf, sizeof(uint64_t)); + dec.high = binary::little_to_native<uint64_t>(buf+sizeof(uint64_t), sizeof(uint64_t)); + + text_buffer_.clear(); + text_buffer_.resize(bson::decimal128_limits::buf_size); + auto r = bson::decimal128_to_chars(&text_buffer_[0], &text_buffer_[0]+text_buffer_.size(), dec); + more_ = visitor.string_value(string_view(text_buffer_.data(),static_cast<std::size_t>(r.ptr-text_buffer_.data())), semantic_tag::float128, *this, ec); + break; + } + case jsoncons::bson::bson_type::object_id_type: + { + uint8_t buf[12]; + std::size_t n = source_.read(buf, sizeof(buf)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(buf))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + oid_t oid(buf); + to_string(oid, text_buffer_); + + more_ = visitor.string_value(text_buffer_, semantic_tag::id, *this, ec); + break; + } + default: + { + ec = bson_errc::unknown_type; + more_ = false; + return; + } + } + } + + void read_cstring(std::error_code& ec) + { + uint8_t c = 0xff; + while (true) + { + std::size_t n = source_.read(&c, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + if (c == 0) + { + break; + } + text_buffer_.push_back(c); + } + } + + void read_string(std::error_code& ec) + { + uint8_t buf[sizeof(int32_t)]; + std::size_t n = source_.read(buf, sizeof(int32_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto len = binary::little_to_native<int32_t>(buf, sizeof(buf)); + if (JSONCONS_UNLIKELY(len < 1)) + { + ec = bson_errc::string_length_is_non_positive; + more_ = false; + return; + } + + std::size_t size = static_cast<std::size_t>(len) - static_cast<std::size_t>(1); + n = source_reader<Source>::read(source_, text_buffer_, size); + state_stack_.back().pos += n; + + if (JSONCONS_UNLIKELY(n != size)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + uint8_t c; + n = source_.read(&c, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + } +}; + +}} + +#endif |