From 1d055261b4144dbf86b2658437015b15d4dd9bff Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 4 Sep 2022 00:32:56 +0100 Subject: initial --- include/jsoncons_ext/ubjson/ubjson_parser.hpp | 880 ++++++++++++++++++++++++++ 1 file changed, 880 insertions(+) create mode 100644 include/jsoncons_ext/ubjson/ubjson_parser.hpp (limited to 'include/jsoncons_ext/ubjson/ubjson_parser.hpp') diff --git a/include/jsoncons_ext/ubjson/ubjson_parser.hpp b/include/jsoncons_ext/ubjson/ubjson_parser.hpp new file mode 100644 index 0000000..468f139 --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson_parser.hpp @@ -0,0 +1,880 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_PARSER_HPP +#define JSONCONS_UBJSON_UBJSON_PARSER_HPP + +#include +#include +#include // std::move +#include +#include +#include +#include +#include +#include +#include + +namespace jsoncons { namespace ubjson { + +enum class parse_mode {root,accept,array,indefinite_array,strongly_typed_array,map_key,map_value,strongly_typed_map_key,strongly_typed_map_value,indefinite_map_key,indefinite_map_value}; + +struct parse_state +{ + parse_mode mode; + std::size_t length; + uint8_t type; + std::size_t index; + + parse_state(parse_mode mode, std::size_t length, uint8_t type = 0) noexcept + : mode(mode), length(length), type(type), index(0) + { + } + + parse_state(const parse_state&) = default; + parse_state(parse_state&&) = default; +}; + +template > +class basic_ubjson_parser : public ser_context +{ + using char_type = char; + using char_traits_type = std::char_traits; + using temp_allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; + using byte_allocator_type = typename std::allocator_traits:: template rebind_alloc; + using parse_state_allocator_type = typename std::allocator_traits:: template rebind_alloc; + + Source source_; + ubjson_decode_options options_; + bool more_; + bool done_; + std::basic_string,char_allocator_type> text_buffer_; + std::vector state_stack_; + int nesting_depth_; +public: + template + basic_ubjson_parser(Sourceable&& source, + const ubjson_decode_options& options = ubjson_decode_options(), + const Allocator alloc = Allocator()) + : source_(std::forward(source)), + options_(options), + more_(true), + done_(false), + text_buffer_(alloc), + state_stack_(alloc), + nesting_depth_(0) + { + state_stack_.emplace_back(parse_mode::root,0); + } + + void restart() + { + more_ = true; + } + + void reset() + { + more_ = true; + done_ = false; + text_buffer_.clear(); + state_stack_.clear(); + state_stack_.emplace_back(parse_mode::root,0,0); + nesting_depth_ = 0; + } + + template + void reset(Sourceable&& source) + { + source_ = std::forward(source); + reset(); + } + + bool done() const + { + return done_; + } + + bool stopped() const + { + return !more_; + } + + std::size_t line() const override + { + return 0; + } + + std::size_t column() const override + { + return source_.position(); + } + + void parse(json_visitor& visitor, std::error_code& ec) + { + while (!done_ && more_) + { + switch (state_stack_.back().mode) + { + case parse_mode::array: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + } + else + { + end_array(visitor, ec); + } + break; + } + case parse_mode::strongly_typed_array: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_value(visitor, state_stack_.back().type, ec); + if (ec) + { + return; + } + } + else + { + end_array(visitor, ec); + } + break; + } + case parse_mode::indefinite_array: + { + auto c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::end_array_marker) + { + source_.ignore(1); + end_array(visitor, ec); + if (ec) + { + return; + } + } + else + { + if (++state_stack_.back().index > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + } + break; + } + case parse_mode::map_key: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_key(visitor, ec); + if (ec) + { + return; + } + state_stack_.back().mode = parse_mode::map_value; + } + else + { + end_object(visitor, ec); + } + break; + } + case parse_mode::map_value: + { + state_stack_.back().mode = parse_mode::map_key; + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::strongly_typed_map_key: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_key(visitor, ec); + if (ec) + { + return; + } + state_stack_.back().mode = parse_mode::strongly_typed_map_value; + } + else + { + end_object(visitor, ec); + } + break; + } + case parse_mode::strongly_typed_map_value: + { + state_stack_.back().mode = parse_mode::strongly_typed_map_key; + read_value(visitor, state_stack_.back().type, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::indefinite_map_key: + { + auto c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::end_object_marker) + { + source_.ignore(1); + end_object(visitor, ec); + if (ec) + { + return; + } + } + else + { + if (++state_stack_.back().index > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + read_key(visitor, ec); + if (ec) + { + return; + } + state_stack_.back().mode = parse_mode::indefinite_map_value; + } + break; + } + case parse_mode::indefinite_map_value: + { + state_stack_.back().mode = parse_mode::indefinite_map_key; + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::root: + { + state_stack_.back().mode = parse_mode::accept; + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::accept: + { + JSONCONS_ASSERT(state_stack_.size() == 1); + state_stack_.clear(); + more_ = false; + done_ = true; + visitor.flush(); + break; + } + } + } + } +private: + void read_type_and_value(json_visitor& visitor, std::error_code& ec) + { + if (source_.is_error()) + { + ec = ubjson_errc::source_error; + more_ = false; + return; + } + + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + read_value(visitor, b, ec); + } + + void read_value(json_visitor& visitor, uint8_t type, std::error_code& ec) + { + switch (type) + { + case jsoncons::ubjson::ubjson_type::null_type: + { + more_ = visitor.null_value(semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::no_op_type: + { + break; + } + case jsoncons::ubjson::ubjson_type::true_type: + { + more_ = visitor.bool_value(true, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::false_type: + { + more_ = visitor.bool_value(false, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::int8_type: + { + uint8_t buf[sizeof(int8_t)]; + if (source_.read(buf, sizeof(int8_t)) != sizeof(int8_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + int8_t val = binary::big_to_native(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::uint8_type: + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + more_ = visitor.uint64_value(b, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::int16_type: + { + uint8_t buf[sizeof(int16_t)]; + if (source_.read(buf, sizeof(int16_t)) != sizeof(int16_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + int16_t val = binary::big_to_native(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::int32_type: + { + uint8_t buf[sizeof(int32_t)]; + if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + int32_t val = binary::big_to_native(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::int64_type: + { + uint8_t buf[sizeof(int64_t)]; + if (source_.read(buf, sizeof(int64_t)) != sizeof(int64_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + int64_t val = binary::big_to_native(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::float32_type: + { + uint8_t buf[sizeof(float)]; + if (source_.read(buf, sizeof(float)) != sizeof(float)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + float val = binary::big_to_native(buf, sizeof(buf)); + more_ = visitor.double_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::float64_type: + { + uint8_t buf[sizeof(double)]; + if (source_.read(buf, sizeof(double)) != sizeof(double)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + double val = binary::big_to_native(buf, sizeof(buf)); + more_ = visitor.double_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::char_type: + { + text_buffer_.clear(); + if (source_reader::read(source_,text_buffer_,1) != 1) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = ubjson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::string_type: + { + std::size_t length = get_length(ec); + if (ec) + { + return; + } + text_buffer_.clear(); + if (source_reader::read(source_,text_buffer_,length) != length) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = ubjson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(jsoncons::basic_string_view(text_buffer_.data(),text_buffer_.length()), semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::high_precision_number_type: + { + std::size_t length = get_length(ec); + if (ec) + { + return; + } + text_buffer_.clear(); + if (source_reader::read(source_,text_buffer_,length) != length) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (jsoncons::detail::is_base10(text_buffer_.data(),text_buffer_.length())) + { + more_ = visitor.string_value(jsoncons::basic_string_view(text_buffer_.data(),text_buffer_.length()), semantic_tag::bigint, *this, ec); + } + else + { + more_ = visitor.string_value(jsoncons::basic_string_view(text_buffer_.data(),text_buffer_.length()), semantic_tag::bigdec, *this, ec); + } + break; + } + case jsoncons::ubjson::ubjson_type::start_array_marker: + { + begin_array(visitor,ec); + break; + } + case jsoncons::ubjson::ubjson_type::start_object_marker: + { + begin_object(visitor, ec); + break; + } + default: + { + ec = ubjson_errc::unknown_type; + break; + } + } + if (ec) + { + more_ = false; + } + } + + void begin_array(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = ubjson_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + + auto c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::type_marker) + { + source_.ignore(1); + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::count_marker) + { + source_.ignore(1); + std::size_t length = get_length(ec); + if (ec) + { + return; + } + if (length > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + state_stack_.emplace_back(parse_mode::strongly_typed_array,length,b); + more_ = visitor.begin_array(length, semantic_tag::none, *this, ec); + } + else + { + ec = ubjson_errc::count_required_after_type; + more_ = false; + return; + } + } + else if (c.value == jsoncons::ubjson::ubjson_type::count_marker) + { + source_.ignore(1); + std::size_t length = get_length(ec); + if (ec) + { + return; + } + if (length > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + state_stack_.emplace_back(parse_mode::array,length); + more_ = visitor.begin_array(length, semantic_tag::none, *this, ec); + } + else + { + state_stack_.emplace_back(parse_mode::indefinite_array,0); + more_ = visitor.begin_array(semantic_tag::none, *this, ec); + } + } + + void end_array(json_visitor& visitor, std::error_code& ec) + { + --nesting_depth_; + + more_ = visitor.end_array(*this, ec); + state_stack_.pop_back(); + } + + void begin_object(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = ubjson_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + + auto c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::type_marker) + { + source_.ignore(1); + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::count_marker) + { + source_.ignore(1); + std::size_t length = get_length(ec); + if (ec) + { + return; + } + if (length > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + state_stack_.emplace_back(parse_mode::strongly_typed_map_key,length,b); + more_ = visitor.begin_object(length, semantic_tag::none, *this, ec); + } + else + { + ec = ubjson_errc::count_required_after_type; + more_ = false; + return; + } + } + else + { + c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::count_marker) + { + source_.ignore(1); + std::size_t length = get_length(ec); + if (ec) + { + return; + } + if (length > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + state_stack_.emplace_back(parse_mode::map_key,length); + more_ = visitor.begin_object(length, semantic_tag::none, *this, ec); + } + else + { + state_stack_.emplace_back(parse_mode::indefinite_map_key,0); + more_ = visitor.begin_object(semantic_tag::none, *this, ec); + } + } + } + + void end_object(json_visitor& visitor, std::error_code& ec) + { + --nesting_depth_; + more_ = visitor.end_object(*this, ec); + state_stack_.pop_back(); + } + + std::size_t get_length(std::error_code& ec) + { + std::size_t length = 0; + uint8_t type; + if (source_.read(&type, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + switch (type) + { + case jsoncons::ubjson::ubjson_type::int8_type: + { + uint8_t buf[sizeof(int8_t)]; + if (source_.read(buf, sizeof(int8_t)) != sizeof(int8_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + int8_t val = binary::big_to_native(buf, sizeof(buf)); + if (val >= 0) + { + length = val; + } + else + { + ec = ubjson_errc::length_is_negative; + more_ = false; + return length; + } + break; + } + case jsoncons::ubjson::ubjson_type::uint8_type: + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + length = b; + break; + } + case jsoncons::ubjson::ubjson_type::int16_type: + { + uint8_t buf[sizeof(int16_t)]; + if (source_.read(buf, sizeof(int16_t)) != sizeof(int16_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + int16_t val = binary::big_to_native(buf, sizeof(buf)); + if (val >= 0) + { + length = val; + } + else + { + ec = ubjson_errc::length_is_negative; + more_ = false; + return length; + } + break; + } + case jsoncons::ubjson::ubjson_type::int32_type: + { + uint8_t buf[sizeof(int32_t)]; + if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + int32_t val = binary::big_to_native(buf, sizeof(buf)); + if (val >= 0) + { + length = static_cast(val); + } + else + { + ec = ubjson_errc::length_is_negative; + more_ = false; + return length; + } + break; + } + case jsoncons::ubjson::ubjson_type::int64_type: + { + uint8_t buf[sizeof(int64_t)]; + if (source_.read(buf, sizeof(int64_t)) != sizeof(int64_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + int64_t val = binary::big_to_native(buf, sizeof(buf)); + if (val >= 0) + { + length = (std::size_t)val; + if (length != (uint64_t)val) + { + ec = ubjson_errc::number_too_large; + more_ = false; + return length; + } + } + else + { + ec = ubjson_errc::length_is_negative; + more_ = false; + return length; + } + break; + } + default: + { + ec = ubjson_errc::length_must_be_integer; + more_ = false; + return length; + } + } + return length; + } + + void read_key(json_visitor& visitor, std::error_code& ec) + { + std::size_t length = get_length(ec); + if (ec) + { + ec = ubjson_errc::key_expected; + more_ = false; + return; + } + text_buffer_.clear(); + if (source_reader::read(source_,text_buffer_,length) != length) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = ubjson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.key(jsoncons::basic_string_view(text_buffer_.data(),text_buffer_.length()), *this, ec); + } +}; + +}} + +#endif -- cgit v1.2.3