diff options
Diffstat (limited to 'include/jsoncons_ext')
82 files changed, 40225 insertions, 0 deletions
diff --git a/include/jsoncons_ext/LICENSE b/include/jsoncons_ext/LICENSE new file mode 100644 index 0000000..ecf46ab --- /dev/null +++ b/include/jsoncons_ext/LICENSE @@ -0,0 +1,28 @@ +// Copyright Daniel Parker 2013 - 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/include/jsoncons_ext/bson/bson.hpp b/include/jsoncons_ext/bson/bson.hpp new file mode 100644 index 0000000..ec3192d --- /dev/null +++ b/include/jsoncons_ext/bson/bson.hpp @@ -0,0 +1,23 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_HPP +#define JSONCONS_BSON_BSON_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/bson/bson_encoder.hpp> +#include <jsoncons_ext/bson/bson_reader.hpp> +#include <jsoncons_ext/bson/bson_cursor.hpp> +#include <jsoncons_ext/bson/encode_bson.hpp> +#include <jsoncons_ext/bson/decode_bson.hpp> + +#endif diff --git a/include/jsoncons_ext/bson/bson_cursor.hpp b/include/jsoncons_ext/bson/bson_cursor.hpp new file mode 100644 index 0000000..8baee53 --- /dev/null +++ b/include/jsoncons_ext/bson/bson_cursor.hpp @@ -0,0 +1,320 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_CURSOR_HPP +#define JSONCONS_BSON_BSON_CURSOR_HPP + +#include <memory> // std::allocator +#include <string> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <ios> +#include <istream> // std::basic_istream +#include <jsoncons/byte_string.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/staj_cursor.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons_ext/bson/bson_parser.hpp> + +namespace jsoncons { +namespace bson { + +template<class Source=jsoncons::binary_stream_source,class Allocator=std::allocator<char>> +class basic_bson_cursor : public basic_staj_cursor<char>, private virtual ser_context +{ + using super_type = basic_staj_cursor<char>; +public: + using source_type = Source; + using char_type = char; + using allocator_type = Allocator; +private: + basic_bson_parser<Source,Allocator> parser_; + basic_staj_visitor<char_type> cursor_visitor_; + bool eof_; + + // Noncopyable and nonmoveable + basic_bson_cursor(const basic_bson_cursor&) = delete; + basic_bson_cursor& operator=(const basic_bson_cursor&) = delete; + +public: + using string_view_type = string_view; + + template <class Sourceable> + basic_bson_cursor(Sourceable&& source, + const bson_decode_options& options = bson_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + eof_(false) + { + if (!done()) + { + next(); + } + } + + // Constructors that set parse error codes + + template <class Sourceable> + basic_bson_cursor(Sourceable&& source, + std::error_code& ec) + : basic_bson_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + bson_decode_options(), + ec) + { + } + + template <class Sourceable> + basic_bson_cursor(Sourceable&& source, + const bson_decode_options& options, + std::error_code& ec) + : basic_bson_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + ec) + { + } + + template <class Sourceable> + basic_bson_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const bson_decode_options& options, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + void reset() + { + parser_.reset(); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + void reset(std::error_code& ec) + { + parser_.reset(); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + void reset(Sourceable&& source, std::error_code& ec) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + bool done() const override + { + return parser_.done(); + } + + void array_expected(std::error_code& ec) override + { + if (cursor_visitor_.event().event_type() == staj_event_type::begin_object) + { + parser_.array_expected(cursor_visitor_, ec); + } + else + { + super_type::array_expected(ec); + } + } + + const staj_event& current() const override + { + return cursor_visitor_.event(); + } + + void read_to(basic_json_visitor<char_type>& visitor) override + { + std::error_code ec; + read_to(visitor, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read_to(basic_json_visitor<char_type>& visitor, + std::error_code& ec) override + { + if (staj_to_saj_event(cursor_visitor_.event(), visitor, *this, ec)) + { + read_next(visitor, ec); + } + } + + void next() override + { + std::error_code ec; + next(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void next(std::error_code& ec) override + { + read_next(ec); + } + + const ser_context& context() const override + { + return *this; + } + + bool eof() const + { + return eof_; + } + + std::size_t line() const override + { + return parser_.line(); + } + + std::size_t column() const override + { + return parser_.column(); + } + + friend + staj_filter_view operator|(basic_bson_cursor& cursor, + std::function<bool(const staj_event&, const ser_context&)> pred) + { + return staj_filter_view(cursor, pred); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_bson_cursor(Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + std::error_code& ec) + : basic_bson_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), filter, ec) + { + } + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_bson_cursor(Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + const bson_decode_options& options = bson_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(filter), + eof_(false) + { + if (!done()) + { + next(); + } + } + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_bson_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source),alloc), + cursor_visitor_(filter), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<char_type>&)") + void read(basic_json_visitor<char_type>& visitor) + { + read_to(visitor); + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<char_type>&, std::error_code&)") + void read(basic_json_visitor<char_type>& visitor, + std::error_code& ec) + { + read_to(visitor, ec); + } +#endif +private: + static bool accept_all(const staj_event&, const ser_context&) + { + return true; + } + + void read_next(std::error_code& ec) + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(cursor_visitor_, ec); + if (ec) return; + } + } + + void read_next(basic_json_visitor<char_type>& visitor, std::error_code& ec) + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(visitor, ec); + if (ec) return; + } + } +}; + +using bson_stream_cursor = basic_bson_cursor<jsoncons::binary_stream_source>; +using bson_bytes_cursor = basic_bson_cursor<jsoncons::bytes_source>; + +} // namespace bson +} // namespace jsoncons + +#endif + diff --git a/include/jsoncons_ext/bson/bson_decimal128.hpp b/include/jsoncons_ext/bson/bson_decimal128.hpp new file mode 100644 index 0000000..b487a04 --- /dev/null +++ b/include/jsoncons_ext/bson/bson_decimal128.hpp @@ -0,0 +1,865 @@ +#ifndef JSONCONS_BSON_BSON_DECIMAL128_HPP +#define JSONCONS_BSON_BSON_DECIMAL128_HPP + +/* + * Implements decimal128_to_chars and decimal128_from_chars + * + * Based on the libjson functions bson_decimal128_to_string + * and bson_decimal128_from_string_w_len, available at + * https://github.com/mongodb/mongo-c-driver/blob/master/src/libbson/src/bson/bson-decimal128.h + * and https://github.com/mongodb/mongo-c-driver/blob/master/src/libbson/src/bson/bson-decimal128.c + * +*/ + +/* + * Copyright 2015 MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> +#include <string.h> +#include <cstring> +#include <ctype.h> +#include <system_error> +#include <algorithm> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace bson { + + struct decimal128_to_chars_result + { + char* ptr; + std::errc ec; + }; + + struct decimal128_from_chars_result + { + const char* ptr; + std::errc ec; + }; + +/** + * BSON_DECIMAL128_STRING: + * + * The length of a decimal128 string (with null terminator). + * + * 1 for the sign + * 35 for digits and radix + * 2 for exponent indicator and sign + * 4 for exponent digits + */ +#define BSON_DECIMAL128_STRING 43 + + struct TP1 + { + uint64_t low; + uint64_t high; + + constexpr TP1() : low(0), high(0) {} + constexpr TP1(uint64_t hi, uint64_t lo) : low(lo), high(hi) {} + }; + struct TP2 + { + uint64_t high; + uint64_t low; + + constexpr TP2() : high(0), low(0) {} + constexpr TP2(uint64_t hi, uint64_t lo) : high(hi), low(lo) {} + }; + + typedef std::conditional< + jsoncons::endian::native == jsoncons::endian::little, + TP1, + TP2 + >::type decimal128_t; + + inline + bool operator==(const decimal128_t& lhs, const decimal128_t& rhs) + { + return lhs.high == rhs.high && lhs.low == rhs.low; + } + + inline + bool operator!=(const decimal128_t& lhs, const decimal128_t& rhs) + { + return !(lhs == rhs); + } + + struct decimal128_limits + { + // The length of a decimal128 string (without null terminator). + // + // 1 for the sign + // 35 for digits and radix + // 2 for exponent indicator and sign + // 4 for exponent digits + static constexpr int buf_size = 42; + static constexpr int exponent_max = 6111; + static constexpr int exponent_min = -6176; + static constexpr int exponent_bias = 6176; + static constexpr int max_digits = 34; + + static constexpr decimal128_t nan() {return decimal128_t(0x7c00000000000000ull, 0);} + static constexpr decimal128_t infinity() {return decimal128_t(0x7800000000000000ull, 0);} + static constexpr decimal128_t neg_infinity() {return decimal128_t(0x7800000000000000ull + 0x8000000000000000ull, 0);} + }; + + inline + bool is_nan(decimal128_t dec) { return dec == decimal128_limits::nan(); } + + inline + bool is_inf(decimal128_t dec) { return dec == decimal128_limits::infinity(); } + + inline + bool is_neg_inf(decimal128_t dec) { return dec == decimal128_limits::neg_infinity(); } + + /** + * bson_uint128_t: + * + * This struct represents a 128 bit integer. + */ + typedef struct { + uint32_t parts[4]; /* 32-bit words stored high to low. */ + } bson_uint128_t; + + typedef struct { + uint64_t high, low; + } bson_uint128_6464_t; + + namespace detail { + + /** + *------------------------------------------------------------------------------ + * + * bson_uint128_divide1B -- + * + * This function divides a #bson_uint128_t by 1000000000 (1 billion) and + * computes the quotient and remainder. + * + * The remainder will contain 9 decimal digits for conversion to string. + * + * @value The #bson_uint128_t operand. + * @quotient A pointer to store the #bson_uint128_t quotient. + * @rem A pointer to store the #uint64_t remainder. + * + * Returns: + * The quotient at @quotient and the remainder at @rem. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + + inline + void bson_uint128_divide1B (bson_uint128_t value, /* IN */ + bson_uint128_t *quotient, /* OUT */ + uint32_t *rem) /* OUT */ + { + const uint32_t DIVISOR = 1000 * 1000 * 1000; + uint64_t _rem = 0; + int i = 0; + + if (!value.parts[0] && !value.parts[1] && !value.parts[2] && + !value.parts[3]) { + *quotient = value; + *rem = 0; + return; + } + + for (i = 0; i <= 3; i++) { + _rem <<= 32; /* Adjust remainder to match value of next dividend */ + _rem += value.parts[i]; /* Add the divided to _rem */ + value.parts[i] = (uint32_t) (_rem / DIVISOR); + _rem %= DIVISOR; /* Store the remainder */ + } + + *quotient = value; + *rem = (uint32_t) _rem; + } + + /** + *------------------------------------------------------------------------- + * + * mul64x64 -- + * + * This function multiplies two &uint64_t into a &bson_uint128_6464_t. + * + * Returns: + * The product of @left and @right. + * + * Side Effects: + * None. + * + *------------------------------------------------------------------------- + */ + + inline + void mul_64x64 (uint64_t left, /* IN */ + uint64_t right, /* IN */ + bson_uint128_6464_t *product) /* OUT */ + { + uint64_t left_high, left_low, right_high, right_low, product_high, + product_mid, product_mid2, product_low; + bson_uint128_6464_t rt = {0,0}; + + if (!left && !right) { + *product = rt; + return; + } + + left_high = left >> 32; + left_low = (uint32_t) left; + right_high = right >> 32; + right_low = (uint32_t) right; + + product_high = left_high * right_high; + product_mid = left_high * right_low; + product_mid2 = left_low * right_high; + product_low = left_low * right_low; + + product_high += product_mid >> 32; + product_mid = (uint32_t) product_mid + product_mid2 + (product_low >> 32); + + product_high = product_high + (product_mid >> 32); + product_low = (product_mid << 32) + (uint32_t) product_low; + + rt.high = product_high; + rt.low = product_low; + *product = rt; + } + + /** + *------------------------------------------------------------------------------ + * + * dec128_tolower -- + * + * This function converts the ASCII character @c to lowercase. It is locale + * insensitive (unlike the stdlib tolower). + * + * Returns: + * The lowercased character. + */ + + inline + char dec128_tolower (char c) + { + if (isupper (c)) { + c += 32; + } + + return c; + } + + /** + *------------------------------------------------------------------------------ + * + * dec128_istreq -- + * + * This function compares the null-terminated *ASCII* strings @a and @b + * for case-insensitive equality. + * + * Returns: + * true if the strings are equal, false otherwise. + */ + + inline + bool dec128_istreq (const char* a, const char* lasta, + const char* b, const char* lastb) + { + while (!(a == lasta && b == lastb)) + { + // strings are different lengths + if (a == lasta || b == lastb) + { + return false; + } + + if (dec128_tolower (*a) != dec128_tolower (*b)) { + return false; + } + + a++; + b++; + } + + return true; + } + + } // namespace detail + + + /** + *------------------------------------------------------------------------------ + * + * decimal128_to_chars -- + * + * This function converts a BID formatted decimal128 value to string, + * accepting a &decimal128_t as @dec. The string is stored at @str. + * + * @dec : The BID formatted decimal to convert. + * @str : The output decimal128 string. At least %BSON_DECIMAL128_STRING + *characters. + * + * Returns: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + + inline + decimal128_to_chars_result decimal128_to_chars(char* first, char* last, const decimal128_t& dec) + { + const std::string bson_decimal128_inf = "Infinity"; + const std::string bson_decimal128_nan = "NaN"; + + const uint32_t combination_mask = 0x1f; /* Extract least significant 5 bits */ + const uint32_t exponent_mask = 0x3fff; /* Extract least significant 14 bits */ + const uint32_t combination_infinity = 30; /* Value of combination field for Inf */ + const uint32_t combination_nan = 31; /* Value of combination field for NaN */ + const uint32_t exponent_bias = 6176; /* decimal128 exponent bias */ + + char* str_out = first; /* output pointer in string */ + char significand_str[35]; /* decoded significand digits */ + + /* Note: bits in this routine are referred to starting at 0, */ + /* from the sign bit, towards the coefficient. */ + uint32_t high; /* bits 0 - 31 */ + uint32_t midh; /* bits 32 - 63 */ + uint32_t midl; /* bits 64 - 95 */ + uint32_t low; /* bits 96 - 127 */ + uint32_t combination; /* bits 1 - 5 */ + uint32_t biased_exponent; /* decoded biased exponent (14 bits) */ + uint32_t significand_digits = 0; /* the number of significand digits */ + uint32_t significand[36] = {0}; /* the base-10 digits in the significand */ + uint32_t *significand_read = significand; /* read pointer into significand */ + int32_t exponent; /* unbiased exponent */ + int32_t scientific_exponent; /* the exponent if scientific notation is + * used */ + bool is_zero = false; /* true if the number is zero */ + + uint8_t significand_msb; /* the most signifcant significand bits (50-46) */ + bson_uint128_t + significand128; /* temporary storage for significand decoding */ + + memset (significand_str, 0, sizeof (significand_str)); + + if ((int64_t) dec.high < 0) { /* negative */ + *(str_out++) = '-'; + } + + low = (uint32_t) dec.low, midl = (uint32_t) (dec.low >> 32), + midh = (uint32_t) dec.high, high = (uint32_t) (dec.high >> 32); + + /* Decode combination field and exponent */ + combination = (high >> 26) & combination_mask; + + if (JSONCONS_UNLIKELY ((combination >> 3) == 3)) { + /* Check for 'special' values */ + if (combination == combination_infinity) { /* Infinity */ + if (last-str_out >= static_cast<ptrdiff_t >(bson_decimal128_inf.size())) + { + std::memcpy(str_out, bson_decimal128_inf.data(), bson_decimal128_inf.size()); + str_out += bson_decimal128_inf.size(); + } + *str_out = 0; + //strcpy_s (str_out, last-str_out, bson_decimal128_inf.c_str()); + return decimal128_to_chars_result{str_out, std::errc()}; + } else if (combination == combination_nan) { /* NaN */ + /* first, not str_out, to erase the sign */ + str_out = first; + if (last-str_out >= static_cast<ptrdiff_t >(bson_decimal128_nan.size())) + { + std::memcpy(str_out, bson_decimal128_nan.data(), bson_decimal128_nan.size()); + str_out += bson_decimal128_nan.size(); + } + *str_out = 0; + //strcpy_s (first, last-first, bson_decimal128_nan.c_str()); + /* we don't care about the NaN payload. */ + return decimal128_to_chars_result{str_out, std::errc()}; + } else { + biased_exponent = (high >> 15) & exponent_mask; + significand_msb = 0x8 + ((high >> 14) & 0x1); + } + } else { + significand_msb = (high >> 14) & 0x7; + biased_exponent = (high >> 17) & exponent_mask; + } + + exponent = biased_exponent - exponent_bias; + /* Create string of significand digits */ + + /* Convert the 114-bit binary number represented by */ + /* (high, midh, midl, low) to at most 34 decimal */ + /* digits through modulo and division. */ + significand128.parts[0] = (high & 0x3fff) + ((significand_msb & 0xf) << 14); + significand128.parts[1] = midh; + significand128.parts[2] = midl; + significand128.parts[3] = low; + + if (significand128.parts[0] == 0 && significand128.parts[1] == 0 && + significand128.parts[2] == 0 && significand128.parts[3] == 0) { + is_zero = true; + } else if (significand128.parts[0] >= (1 << 17)) { + /* The significand is non-canonical or zero. + * In order to preserve compatibility with the densely packed decimal + * format, the maximum value for the significand of decimal128 is + * 1e34 - 1. If the value is greater than 1e34 - 1, the IEEE 754 + * standard dictates that the significand is interpreted as zero. + */ + is_zero = true; + } else { + for (int k = 3; k >= 0; k--) { + uint32_t least_digits = 0; + detail::bson_uint128_divide1B ( + significand128, &significand128, &least_digits); + + /* We now have the 9 least significant digits (in base 2). */ + /* Convert and output to string. */ + if (!least_digits) { + continue; + } + + for (int j = 8; j >= 0; j--) { + significand[k * 9 + j] = least_digits % 10; + least_digits /= 10; + } + } + } + + /* Output format options: */ + /* Scientific - [-]d.dddE(+/-)dd or [-]dE(+/-)dd */ + /* Regular - ddd.ddd */ + + if (is_zero) { + significand_digits = 1; + *significand_read = 0; + } else { + significand_digits = 36; + while (!(*significand_read)) { + significand_digits--; + significand_read++; + } + } + + scientific_exponent = significand_digits - 1 + exponent; + + /* The scientific exponent checks are dictated by the string conversion + * specification and are somewhat arbitrary cutoffs. + * + * We must check exponent > 0, because if this is the case, the number + * has trailing zeros. However, we *cannot* output these trailing zeros, + * because doing so would change the precision of the value, and would + * change stored data if the string converted number is round tripped. + */ + if (scientific_exponent < -6 || exponent > 0) { + /* Scientific format */ + *(str_out++) = char(*(significand_read++)) + '0'; + significand_digits--; + + if (significand_digits) { + *(str_out++) = '.'; + } + + for (std::size_t i = 0; i < significand_digits && (str_out - first) < 36; i++) { + *(str_out++) = char(*(significand_read++)) + '0'; + } + /* Exponent */ + *(str_out++) = 'E'; + + std::string s; + if (scientific_exponent >= 0) { + s.push_back('+'); + } + jsoncons::detail::from_integer(scientific_exponent, s); + if (str_out + s.size() < last) + { + std::memcpy(str_out, s.data(), s.size()); + } + else + { + return decimal128_to_chars_result{str_out, std::errc::value_too_large}; + } + str_out += s.size(); + } else { + /* Regular format with no decimal place */ + if (exponent >= 0) { + for (std::size_t i = 0; i < significand_digits && (str_out - first) < 36; i++) { + *(str_out++) = char(*(significand_read++)) + '0'; + } + } else { + int32_t radix_position = significand_digits + exponent; + + if (radix_position > 0) { /* non-zero digits before radix */ + for (int32_t i = 0; + i < radix_position && (str_out < last); + i++) { + *(str_out++) = char(*(significand_read++)) + '0'; + } + } else { /* leading zero before radix point */ + *(str_out++) = '0'; + } + + *(str_out++) = '.'; + while (radix_position++ < 0) { /* add leading zeros after radix */ + *(str_out++) = '0'; + } + + for (std::size_t i = 0; + (i < significand_digits - (std::max) (radix_position - 1, 0)) && + (str_out < last); + i++) { + *(str_out++) = char(*(significand_read++)) + '0'; + } + } + } + return decimal128_to_chars_result{str_out, std::errc()}; + } + + + + /** + *------------------------------------------------------------------------------ + * + * bson_decimal128_from_string_w_len -- + * + * This function converts @string in the format [+-]ddd[.]ddd[E][+-]dddd to + * decimal128. Out of range values are converted to +/-Infinity. Invalid + * strings are converted to NaN. @len is the length of the string, or -1 + * meaning the string is null-terminated. + * + * If more digits are provided than the available precision allows, + * round to the nearest expressable decimal128 with ties going to even will + * occur. + * + * Note: @string must be ASCII only! + * + * Returns: + * true on success, or false on failure. @dec will be NaN if @str was invalid + * The &decimal128_t converted from @string at @dec. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + + inline + decimal128_from_chars_result decimal128_from_chars(const char* first, const char* last, decimal128_t& dec) + { + const string_view inf_str = "inf"; + const string_view infinity_str = "infinity"; + const string_view nan_str = "nan"; + + ptrdiff_t len = last - first; + + bson_uint128_6464_t significand = {0,0}; + + const char* str_read = first; /* Read pointer for consuming str. */ + + /* Parsing state tracking */ + bool is_negative = false; + bool saw_radix = false; + bool includes_sign = false; /* True if the input first contains a sign. */ + bool found_nonzero = false; + + size_t significant_digits = 0; /* Total number of significant digits + * (no leading or trailing zero) */ + size_t ndigits_read = 0; /* Total number of significand digits read */ + size_t ndigits = 0; /* Total number of digits (no leading zeros) */ + size_t radix_position = 0; /* The number of the digits after radix */ + size_t first_nonzero = 0; /* The index of the first non-zero in *str* */ + + uint16_t digits[decimal128_limits::max_digits] = {0}; + uint16_t ndigits_stored = 0; /* The number of digits in digits */ + uint16_t *digits_insert = digits; /* Insertion pointer for digits */ + size_t first_digit = 0; /* The index of the first non-zero digit */ + size_t last_digit = 0; /* The index of the last digit */ + + int32_t exponent = 0; + uint64_t significand_high = 0; /* The high 17 digits of the significand */ + uint64_t significand_low = 0; /* The low 17 digits of the significand */ + uint16_t biased_exponent = 0; /* The biased exponent */ + + dec.high = 0; + dec.low = 0; + + if (*str_read == '+' || *str_read == '-') { + is_negative = *(str_read++) == '-'; + includes_sign = true; + } + + /* Check for Infinity or NaN */ + if (!isdigit (*str_read) && *str_read != '.') { + if (detail::dec128_istreq (str_read, last, inf_str.data(), inf_str.data()+inf_str.length()) || + detail::dec128_istreq (str_read, last, infinity_str.data(), infinity_str.data()+infinity_str.length())) + { + dec = is_negative ? decimal128_limits::neg_infinity() : decimal128_limits::infinity(); + return decimal128_from_chars_result{str_read,std::errc()}; + } else if (detail::dec128_istreq (str_read, last, nan_str.data(), nan_str.data()+nan_str.length())) { + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc()}; + } + + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + + /* Read digits */ + while (((isdigit (*str_read) || *str_read == '.')) && + (len == -1 || str_read < first + len)) { + if (*str_read == '.') { + if (saw_radix) { + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + + saw_radix = true; + str_read++; + continue; + } + + if (ndigits_stored < 34) { + if (*str_read != '0' || found_nonzero) { + if (!found_nonzero) { + first_nonzero = ndigits_read; + } + + found_nonzero = true; + *(digits_insert++) = *(str_read) - '0'; /* Only store 34 digits */ + ndigits_stored++; + } + } + + if (found_nonzero) { + ndigits++; + } + + if (saw_radix) { + radix_position++; + } + + ndigits_read++; + str_read++; + } + + if (saw_radix && !ndigits_read) { + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + + /* Read exponent if exists */ + if (*str_read == 'e' || *str_read == 'E') { + ++str_read; + if (*str_read == '+') { + ++str_read; + } + auto result = jsoncons::detail::to_integer(str_read, last - str_read, exponent); + if (result.ec != jsoncons::detail::to_integer_errc()) + { + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + str_read = result.ptr; + } + + if ((len == -1 || str_read < first + len) && *str_read) { + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + + /* Done reading input. */ + /* Find first non-zero digit in digits */ + first_digit = 0; + + if (!ndigits_stored) { /* value is zero */ + first_digit = 0; + last_digit = 0; + digits[0] = 0; + ndigits = 1; + ndigits_stored = 1; + significant_digits = 0; + } else { + last_digit = ndigits_stored - 1; + significant_digits = ndigits; + /* Mark trailing zeros as non-significant */ + while (first[first_nonzero + significant_digits - 1 + includes_sign + + saw_radix] == '0') { + significant_digits--; + } + } + + + /* Normalization of exponent */ + /* Correct exponent based on radix position, and shift significand as needed + */ + /* to represent user input */ + + /* Overflow prevention */ + if (exponent <= static_cast<int32_t>(radix_position) && static_cast<int32_t>(radix_position) - exponent > (1 << 14)) { + exponent = decimal128_limits::exponent_min; + } else { + exponent -= static_cast<int32_t>(radix_position); + } + + /* Attempt to normalize the exponent */ + while (exponent > decimal128_limits::exponent_max) { + /* Shift exponent to significand and decrease */ + last_digit++; + + if (last_digit - first_digit > decimal128_limits::max_digits) { + /* The exponent is too great to shift into the significand. */ + if (significant_digits == 0) { + /* Value is zero, we are allowed to clamp the exponent. */ + exponent = decimal128_limits::exponent_max; + break; + } + + /* Overflow is not permitted, error. */ + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + + exponent--; + } + + while (exponent < decimal128_limits::exponent_min || ndigits_stored < ndigits) { + /* Shift last digit */ + if (last_digit == 0) { + /* underflow is not allowed, but zero clamping is */ + if (significant_digits == 0) { + exponent = decimal128_limits::exponent_min; + break; + } + + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + + if (ndigits_stored < ndigits) { + if (first[ndigits - 1 + includes_sign + saw_radix] - '0' != 0 && + significant_digits != 0) { + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + + ndigits--; /* adjust to match digits not stored */ + } else { + if (digits[last_digit] != 0) { + /* Inexact rounding is not allowed. */ + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + + + last_digit--; /* adjust to round */ + } + + if (exponent < decimal128_limits::exponent_max) { + exponent++; + } else { + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + } + + /* Round */ + /* We've normalized the exponent, but might still need to round. */ + if (last_digit - first_digit + 1 < significant_digits) { + uint8_t round_digit; + + /* There are non-zero digits after last_digit that need rounding. */ + /* We round to nearest, ties to even */ + round_digit = + first[first_nonzero + last_digit + includes_sign + saw_radix + 1] - + '0'; + + if (round_digit != 0) { + /* Inexact (non-zero) rounding is not allowed */ + dec = decimal128_limits::nan(); + return decimal128_from_chars_result{str_read,std::errc::invalid_argument}; + } + } + + /* Encode significand */ + significand_high = 0, /* The high 17 digits of the significand */ + significand_low = 0; /* The low 17 digits of the significand */ + + if (significant_digits == 0) { /* read a zero */ + significand_high = 0; + significand_low = 0; + } else if (last_digit - first_digit < 17) { + size_t d_idx = first_digit; + significand_low = digits[d_idx++]; + + for (; d_idx <= last_digit; d_idx++) { + significand_low *= 10; + significand_low += digits[d_idx]; + significand_high = 0; + } + } else { + size_t d_idx = first_digit; + significand_high = digits[d_idx++]; + + for (; d_idx <= last_digit - 17; d_idx++) { + significand_high *= 10; + significand_high += digits[d_idx]; + } + + significand_low = digits[d_idx++]; + + for (; d_idx <= last_digit; d_idx++) { + significand_low *= 10; + significand_low += digits[d_idx]; + } + } + + detail::mul_64x64 (significand_high, 100000000000000000ull, &significand); + significand.low += significand_low; + + if (significand.low < significand_low) { + significand.high += 1; + } + + + biased_exponent = static_cast<uint16_t>(exponent + static_cast<int32_t>(decimal128_limits::exponent_bias)); + + /* Encode combination, exponent, and significand. */ + if ((significand.high >> 49) & 1) { + /* Encode '11' into bits 1 to 3 */ + dec.high |= (0x3ull << 61); + dec.high |= (biased_exponent & 0x3fffull) << 47; + dec.high |= significand.high & 0x7fffffffffffull; + } else { + dec.high |= (biased_exponent & 0x3fffull) << 49; + dec.high |= significand.high & 0x1ffffffffffffull; + } + + dec.low = significand.low; + + /* Encode sign */ + if (is_negative) { + dec.high |= 0x8000000000000000ull; + } + + return decimal128_from_chars_result{str_read,std::errc()}; + } + +} // namespace bson +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/bson/bson_decimal128.hpp.bak b/include/jsoncons_ext/bson/bson_decimal128.hpp.bak new file mode 100644 index 0000000..8e27ee1 --- /dev/null +++ b/include/jsoncons_ext/bson/bson_decimal128.hpp.bak @@ -0,0 +1,816 @@ +#ifndef JSONCONS_BSON_BSON_DECIMAL128_HPP +#define JSONCONS_BSON_BSON_DECIMAL128_HPP + +/* + * Copyright 2015 MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <system_error> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace bson { + + struct decimal128_to_chars_result + { + char* ptr; + std::errc ec; + }; + + struct decimal128_from_chars_result + { + const char* ptr; + std::errc ec; + }; + +/** + * BSON_DECIMAL128_STRING: + * + * The length of a decimal128 string (with null terminator). + * + * 1 for the sign + * 35 for digits and radix + * 2 for exponent indicator and sign + * 4 for exponent digits + */ +#define BSON_DECIMAL128_STRING 43 +#define BSON_DECIMAL128_INF "Infinity" +#define BSON_DECIMAL128_NAN "NaN" + + struct TP1 + { + uint64_t low; + uint64_t high; + + constexpr TP1() : low(0), high(0) {} + constexpr TP1(uint64_t hi, uint64_t lo) : low(lo), high(hi) {} + }; + struct TP2 + { + uint64_t high; + uint64_t low; + + constexpr TP2() : high(0), low(0) {} + constexpr TP2(uint64_t hi, uint64_t lo) : high(hi), low(lo) {} + }; + + typedef typename std::conditional< + jsoncons::endian::native == jsoncons::endian::little, + TP1, + TP2 + >::type decimal128_t; + + inline + bool operator==(const decimal128_t& lhs, const decimal128_t& rhs) + { + return lhs.high == rhs.high && lhs.low == rhs.low; + } + + inline + bool operator!=(const decimal128_t& lhs, const decimal128_t& rhs) + { + return !(lhs == rhs); + } + + struct decimal128_limits + { + // The length of a decimal128 string (without null terminator). + // + // 1 for the sign + // 35 for digits and radix + // 2 for exponent indicator and sign + // 4 for exponent digits + static constexpr int recommended_buffer_size = 42; + static constexpr decimal128_t nan = decimal128_t(0x7c00000000000000ull, 0); + static constexpr decimal128_t infinity = decimal128_t(0x7800000000000000ull, 0); + static constexpr decimal128_t neg_infinity = decimal128_t(0x7800000000000000ull + 0x8000000000000000ull, 0); + static constexpr int exponent_max = 6111; + static constexpr int exponent_min = -6176; + static constexpr int exponent_bias = 6176; + static constexpr int max_digits = 34; + }; + + /** + * bson_uint128_t: + * + * This struct represents a 128 bit integer. + */ + typedef struct { + uint32_t parts[4]; /* 32-bit words stored high to low. */ + } bson_uint128_t; + + typedef struct { + uint64_t high, low; + } bson_uint128_6464_t; + + namespace detail { + + /** + *------------------------------------------------------------------------------ + * + * bson_uint128_divide1B -- + * + * This function divides a #bson_uint128_t by 1000000000 (1 billion) and + * computes the quotient and remainder. + * + * The remainder will contain 9 decimal digits for conversion to string. + * + * @value The #bson_uint128_t operand. + * @quotient A pointer to store the #bson_uint128_t quotient. + * @rem A pointer to store the #uint64_t remainder. + * + * Returns: + * The quotient at @quotient and the remainder at @rem. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + + inline + void bson_uint128_divide1B (bson_uint128_t value, /* IN */ + bson_uint128_t *quotient, /* OUT */ + uint32_t *rem) /* OUT */ + { + const uint32_t DIVISOR = 1000 * 1000 * 1000; + uint64_t _rem = 0; + int i = 0; + + if (!value.parts[0] && !value.parts[1] && !value.parts[2] && + !value.parts[3]) { + *quotient = value; + *rem = 0; + return; + } + + for (i = 0; i <= 3; i++) { + _rem <<= 32; /* Adjust remainder to match value of next dividend */ + _rem += value.parts[i]; /* Add the divided to _rem */ + value.parts[i] = (uint32_t) (_rem / DIVISOR); + _rem %= DIVISOR; /* Store the remainder */ + } + + *quotient = value; + *rem = (uint32_t) _rem; + } + + /** + *------------------------------------------------------------------------- + * + * mul64x64 -- + * + * This function multiplies two &uint64_t into a &bson_uint128_6464_t. + * + * Returns: + * The product of @left and @right. + * + * Side Effects: + * None. + * + *------------------------------------------------------------------------- + */ + + inline + void mul_64x64 (uint64_t left, /* IN */ + uint64_t right, /* IN */ + bson_uint128_6464_t *product) /* OUT */ + { + uint64_t left_high, left_low, right_high, right_low, product_high, + product_mid, product_mid2, product_low; + bson_uint128_6464_t rt = {0}; + + if (!left && !right) { + *product = rt; + return; + } + + left_high = left >> 32; + left_low = (uint32_t) left; + right_high = right >> 32; + right_low = (uint32_t) right; + + product_high = left_high * right_high; + product_mid = left_high * right_low; + product_mid2 = left_low * right_high; + product_low = left_low * right_low; + + product_high += product_mid >> 32; + product_mid = (uint32_t) product_mid + product_mid2 + (product_low >> 32); + + product_high = product_high + (product_mid >> 32); + product_low = (product_mid << 32) + (uint32_t) product_low; + + rt.high = product_high; + rt.low = product_low; + *product = rt; + } + + /** + *------------------------------------------------------------------------------ + * + * dec128_tolower -- + * + * This function converts the ASCII character @c to lowercase. It is locale + * insensitive (unlike the stdlib tolower). + * + * Returns: + * The lowercased character. + */ + + inline + char dec128_tolower (char c) + { + if (isupper (c)) { + c += 32; + } + + return c; + } + + /** + *------------------------------------------------------------------------------ + * + * dec128_istreq -- + * + * This function compares the null-terminated *ASCII* strings @a and @b + * for case-insensitive equality. + * + * Returns: + * true if the strings are equal, false otherwise. + */ + + inline + bool dec128_istreq (const char* a, /* IN */ + const char* b /* IN */) + { + while (*a != '\0' || *b != '\0') { + /* strings are different lengths. */ + if (*a == '\0' || *b == '\0') { + return false; + } + + if (dec128_tolower (*a) != dec128_tolower (*b)) { + return false; + } + + a++; + b++; + } + + return true; + } + + } // namespace detail + + + /** + *------------------------------------------------------------------------------ + * + * decimal128_to_chars -- + * + * This function converts a BID formatted decimal128 value to string, + * accepting a &decimal128_t as @dec. The string is stored at @str. + * + * @dec : The BID formatted decimal to convert. + * @str : The output decimal128 string. At least %BSON_DECIMAL128_STRING + *characters. + * + * Returns: + * None. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + + inline + void decimal128_to_chars(char* first, char* last, const decimal128_t& dec) + { + uint32_t COMBINATION_MASK = 0x1f; /* Extract least significant 5 bits */ + uint32_t EXPONENT_MASK = 0x3fff; /* Extract least significant 14 bits */ + uint32_t COMBINATION_INFINITY = 30; /* Value of combination field for Inf */ + uint32_t COMBINATION_NAN = 31; /* Value of combination field for NaN */ + uint32_t EXPONENT_BIAS = 6176; /* decimal128 exponent bias */ + + char* str_out = first; /* output pointer in string */ + char significand_str[35]; /* decoded significand digits */ + + + /* Note: bits in this routine are referred to starting at 0, */ + /* from the sign bit, towards the coefficient. */ + uint32_t high; /* bits 0 - 31 */ + uint32_t midh; /* bits 32 - 63 */ + uint32_t midl; /* bits 64 - 95 */ + uint32_t low; /* bits 96 - 127 */ + uint32_t combination; /* bits 1 - 5 */ + uint32_t biased_exponent; /* decoded biased exponent (14 bits) */ + uint32_t significand_digits = 0; /* the number of significand digits */ + uint32_t significand[36] = {0}; /* the base-10 digits in the significand */ + uint32_t *significand_read = significand; /* read pointer into significand */ + int32_t exponent; /* unbiased exponent */ + int32_t scientific_exponent; /* the exponent if scientific notation is + * used */ + bool is_zero = false; /* true if the number is zero */ + + uint8_t significand_msb; /* the most signifcant significand bits (50-46) */ + bson_uint128_t + significand128; /* temporary storage for significand decoding */ + size_t i; /* indexing variables */ + int j, k; + + memset (significand_str, 0, sizeof (significand_str)); + + if ((int64_t) dec.high < 0) { /* negative */ + *(str_out++) = '-'; + } + + low = (uint32_t) dec.low, midl = (uint32_t) (dec.low >> 32), + midh = (uint32_t) dec.high, high = (uint32_t) (dec.high >> 32); + + /* Decode combination field and exponent */ + combination = (high >> 26) & COMBINATION_MASK; + + if (JSONCONS_UNLIKELY ((combination >> 3) == 3)) { + /* Check for 'special' values */ + if (combination == COMBINATION_INFINITY) { /* Infinity */ + strcpy (str_out, BSON_DECIMAL128_INF); + return; + } else if (combination == COMBINATION_NAN) { /* NaN */ + /* first, not str_out, to erase the sign */ + strcpy (first, BSON_DECIMAL128_NAN); + /* we don't care about the NaN payload. */ + return; + } else { + biased_exponent = (high >> 15) & EXPONENT_MASK; + significand_msb = 0x8 + ((high >> 14) & 0x1); + } + } else { + significand_msb = (high >> 14) & 0x7; + biased_exponent = (high >> 17) & EXPONENT_MASK; + } + + exponent = biased_exponent - EXPONENT_BIAS; + /* Create string of significand digits */ + + /* Convert the 114-bit binary number represented by */ + /* (high, midh, midl, low) to at most 34 decimal */ + /* digits through modulo and division. */ + significand128.parts[0] = (high & 0x3fff) + ((significand_msb & 0xf) << 14); + significand128.parts[1] = midh; + significand128.parts[2] = midl; + significand128.parts[3] = low; + + if (significand128.parts[0] == 0 && significand128.parts[1] == 0 && + significand128.parts[2] == 0 && significand128.parts[3] == 0) { + is_zero = true; + } else if (significand128.parts[0] >= (1 << 17)) { + /* The significand is non-canonical or zero. + * In order to preserve compatibility with the densely packed decimal + * format, the maximum value for the significand of decimal128 is + * 1e34 - 1. If the value is greater than 1e34 - 1, the IEEE 754 + * standard dictates that the significand is interpreted as zero. + */ + is_zero = true; + } else { + for (k = 3; k >= 0; k--) { + uint32_t least_digits = 0; + detail::bson_uint128_divide1B ( + significand128, &significand128, &least_digits); + + /* We now have the 9 least significant digits (in base 2). */ + /* Convert and output to string. */ + if (!least_digits) { + continue; + } + + for (j = 8; j >= 0; j--) { + significand[k * 9 + j] = least_digits % 10; + least_digits /= 10; + } + } + } + + /* Output format options: */ + /* Scientific - [-]d.dddE(+/-)dd or [-]dE(+/-)dd */ + /* Regular - ddd.ddd */ + + if (is_zero) { + significand_digits = 1; + *significand_read = 0; + } else { + significand_digits = 36; + while (!(*significand_read)) { + significand_digits--; + significand_read++; + } + } + + scientific_exponent = significand_digits - 1 + exponent; + + /* The scientific exponent checks are dictated by the string conversion + * specification and are somewhat arbitrary cutoffs. + * + * We must check exponent > 0, because if this is the case, the number + * has trailing zeros. However, we *cannot* output these trailing zeros, + * because doing so would change the precision of the value, and would + * change stored data if the string converted number is round tripped. + */ + if (scientific_exponent < -6 || exponent > 0) { + /* Scientific format */ + *(str_out++) = *(significand_read++) + '0'; + significand_digits--; + + if (significand_digits) { + *(str_out++) = '.'; + } + + for (i = 0; i < significand_digits && (str_out - first) < 36; i++) { + *(str_out++) = *(significand_read++) + '0'; + } + /* Exponent */ + *(str_out++) = 'E'; + snprintf (str_out, 6, "%+d", scientific_exponent); + } else { + /* Regular format with no decimal place */ + if (exponent >= 0) { + for (i = 0; i < significand_digits && (str_out - first) < 36; i++) { + *(str_out++) = *(significand_read++) + '0'; + } + *str_out = '\0'; + } else { + int32_t radix_position = significand_digits + exponent; + + if (radix_position > 0) { /* non-zero digits before radix */ + for (i = 0; + i < radix_position && (str_out < last); + i++) { + *(str_out++) = *(significand_read++) + '0'; + } + } else { /* leading zero before radix point */ + *(str_out++) = '0'; + } + + *(str_out++) = '.'; + while (radix_position++ < 0) { /* add leading zeros after radix */ + *(str_out++) = '0'; + } + + for (i = 0; + (i < significand_digits - (std::max) (radix_position - 1, 0)) && + (str_out < last); + i++) { + *(str_out++) = *(significand_read++) + '0'; + } + *str_out = '\0'; + } + } + } + + + + /** + *------------------------------------------------------------------------------ + * + * bson_decimal128_from_string_w_len -- + * + * This function converts @string in the format [+-]ddd[.]ddd[E][+-]dddd to + * decimal128. Out of range values are converted to +/-Infinity. Invalid + * strings are converted to NaN. @len is the length of the string, or -1 + * meaning the string is null-terminated. + * + * If more digits are provided than the available precision allows, + * round to the nearest expressable decimal128 with ties going to even will + * occur. + * + * Note: @string must be ASCII only! + * + * Returns: + * true on success, or false on failure. @dec will be NaN if @str was invalid + * The &decimal128_t converted from @string at @dec. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------------ + */ + + inline + bool decimal128_from_chars(const char* first, const char* last, decimal128_t& dec) + { + int len = last - first; + + bson_uint128_6464_t significand = {0}; + + const char* str_read = first; /* Read pointer for consuming str. */ + + /* Parsing state tracking */ + bool is_negative = false; + bool saw_radix = false; + bool includes_sign = false; /* True if the input first contains a sign. */ + bool found_nonzero = false; + + size_t significant_digits = 0; /* Total number of significant digits + * (no leading or trailing zero) */ + size_t ndigits_read = 0; /* Total number of significand digits read */ + size_t ndigits = 0; /* Total number of digits (no leading zeros) */ + size_t radix_position = 0; /* The number of the digits after radix */ + size_t first_nonzero = 0; /* The index of the first non-zero in *str* */ + + uint16_t digits[decimal128_limits::max_digits] = {0}; + uint16_t ndigits_stored = 0; /* The number of digits in digits */ + uint16_t *digits_insert = digits; /* Insertion pointer for digits */ + size_t first_digit = 0; /* The index of the first non-zero digit */ + size_t last_digit = 0; /* The index of the last digit */ + + int32_t exponent = 0; + uint64_t significand_high = 0; /* The high 17 digits of the significand */ + uint64_t significand_low = 0; /* The low 17 digits of the significand */ + uint16_t biased_exponent = 0; /* The biased exponent */ + + dec.high = 0; + dec.low = 0; + + if (*str_read == '+' || *str_read == '-') { + is_negative = *(str_read++) == '-'; + includes_sign = true; + } + + /* Check for Infinity or NaN */ + if (!isdigit (*str_read) && *str_read != '.') { + if (detail::dec128_istreq (str_read, "inf") || + detail::dec128_istreq (str_read, "infinity")) { + dec = is_negative ? decimal128_limits::neg_infinity : decimal128_limits::infinity; + return true; + } else if (detail::dec128_istreq (str_read, "nan")) { + dec = decimal128_limits::nan; + return true; + } + + dec = decimal128_limits::nan; + return false; + } + + /* Read digits */ + while (((isdigit (*str_read) || *str_read == '.')) && + (len == -1 || str_read < first + len)) { + if (*str_read == '.') { + if (saw_radix) { + dec = decimal128_limits::nan; + return false; + } + + saw_radix = true; + str_read++; + continue; + } + + if (ndigits_stored < 34) { + if (*str_read != '0' || found_nonzero) { + if (!found_nonzero) { + first_nonzero = ndigits_read; + } + + found_nonzero = true; + *(digits_insert++) = *(str_read) - '0'; /* Only store 34 digits */ + ndigits_stored++; + } + } + + if (found_nonzero) { + ndigits++; + } + + if (saw_radix) { + radix_position++; + } + + ndigits_read++; + str_read++; + } + + if (saw_radix && !ndigits_read) { + dec = decimal128_limits::nan; + return false; + } + + /* Read exponent if exists */ + if (*str_read == 'e' || *str_read == 'E') { + int nread = 0; + #ifdef _MSC_VER + #define SSCANF sscanf_s + #else + #define SSCANF sscanf + #endif + int read_exponent = SSCANF (++str_read, "%d%n", &exponent, &nread); + str_read += nread; + + if (!read_exponent || nread == 0) { + dec = decimal128_limits::nan; + return false; + } + + #undef SSCANF + } + + if ((len == -1 || str_read < first + len) && *str_read) { + dec = decimal128_limits::nan; + return false; + } + + /* Done reading input. */ + /* Find first non-zero digit in digits */ + first_digit = 0; + + if (!ndigits_stored) { /* value is zero */ + first_digit = 0; + last_digit = 0; + digits[0] = 0; + ndigits = 1; + ndigits_stored = 1; + significant_digits = 0; + } else { + last_digit = ndigits_stored - 1; + significant_digits = ndigits; + /* Mark trailing zeros as non-significant */ + while (first[first_nonzero + significant_digits - 1 + includes_sign + + saw_radix] == '0') { + significant_digits--; + } + } + + + /* Normalization of exponent */ + /* Correct exponent based on radix position, and shift significand as needed + */ + /* to represent user input */ + + /* Overflow prevention */ + if (exponent <= radix_position && radix_position - exponent > (1 << 14)) { + exponent = decimal128_limits::exponent_min; + } else { + exponent -= radix_position; + } + + /* Attempt to normalize the exponent */ + while (exponent > decimal128_limits::exponent_max) { + /* Shift exponent to significand and decrease */ + last_digit++; + + if (last_digit - first_digit > decimal128_limits::max_digits) { + /* The exponent is too great to shift into the significand. */ + if (significant_digits == 0) { + /* Value is zero, we are allowed to clamp the exponent. */ + exponent = decimal128_limits::exponent_max; + break; + } + + /* Overflow is not permitted, error. */ + dec = decimal128_limits::nan; + return false; + } + + exponent--; + } + + while (exponent < decimal128_limits::exponent_min || ndigits_stored < ndigits) { + /* Shift last digit */ + if (last_digit == 0) { + /* underflow is not allowed, but zero clamping is */ + if (significant_digits == 0) { + exponent = decimal128_limits::exponent_min; + break; + } + + dec = decimal128_limits::nan; + return false; + } + + if (ndigits_stored < ndigits) { + if (first[ndigits - 1 + includes_sign + saw_radix] - '0' != 0 && + significant_digits != 0) { + dec = decimal128_limits::nan; + return false; + } + + ndigits--; /* adjust to match digits not stored */ + } else { + if (digits[last_digit] != 0) { + /* Inexact rounding is not allowed. */ + dec = decimal128_limits::nan; + return false; + } + + + last_digit--; /* adjust to round */ + } + + if (exponent < decimal128_limits::exponent_max) { + exponent++; + } else { + dec = decimal128_limits::nan; + return false; + } + } + + /* Round */ + /* We've normalized the exponent, but might still need to round. */ + if (last_digit - first_digit + 1 < significant_digits) { + uint8_t round_digit; + + /* There are non-zero digits after last_digit that need rounding. */ + /* We round to nearest, ties to even */ + round_digit = + first[first_nonzero + last_digit + includes_sign + saw_radix + 1] - + '0'; + + if (round_digit != 0) { + /* Inexact (non-zero) rounding is not allowed */ + dec = decimal128_limits::nan; + return false; + } + } + + /* Encode significand */ + significand_high = 0, /* The high 17 digits of the significand */ + significand_low = 0; /* The low 17 digits of the significand */ + + if (significant_digits == 0) { /* read a zero */ + significand_high = 0; + significand_low = 0; + } else if (last_digit - first_digit < 17) { + size_t d_idx = first_digit; + significand_low = digits[d_idx++]; + + for (; d_idx <= last_digit; d_idx++) { + significand_low *= 10; + significand_low += digits[d_idx]; + significand_high = 0; + } + } else { + size_t d_idx = first_digit; + significand_high = digits[d_idx++]; + + for (; d_idx <= last_digit - 17; d_idx++) { + significand_high *= 10; + significand_high += digits[d_idx]; + } + + significand_low = digits[d_idx++]; + + for (; d_idx <= last_digit; d_idx++) { + significand_low *= 10; + significand_low += digits[d_idx]; + } + } + + detail::mul_64x64 (significand_high, 100000000000000000ull, &significand); + significand.low += significand_low; + + if (significand.low < significand_low) { + significand.high += 1; + } + + + biased_exponent = (exponent + (int16_t) decimal128_limits::exponent_bias); + + /* Encode combination, exponent, and significand. */ + if ((significand.high >> 49) & 1) { + /* Encode '11' into bits 1 to 3 */ + dec.high |= (0x3ull << 61); + dec.high |= (biased_exponent & 0x3fffull) << 47; + dec.high |= significand.high & 0x7fffffffffffull; + } else { + dec.high |= (biased_exponent & 0x3fffull) << 49; + dec.high |= significand.high & 0x1ffffffffffffull; + } + + dec.low = significand.low; + + /* Encode sign */ + if (is_negative) { + dec.high |= 0x8000000000000000ull; + } + + return true; + } + +} // namespace bson +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/bson/bson_encoder.hpp b/include/jsoncons_ext/bson/bson_encoder.hpp new file mode 100644 index 0000000..4569f06 --- /dev/null +++ b/include/jsoncons_ext/bson/bson_encoder.hpp @@ -0,0 +1,585 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_ENCODER_HPP +#define JSONCONS_BSON_BSON_ENCODER_HPP + +#include <string> +#include <vector> +#include <limits> // std::numeric_limits +#include <memory> +#include <utility> // std::move +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/sink.hpp> +#include <jsoncons/detail/parse_number.hpp> +#include <jsoncons_ext/bson/bson_type.hpp> +#include <jsoncons_ext/bson/bson_error.hpp> +#include <jsoncons_ext/bson/bson_options.hpp> +#include <jsoncons_ext/bson/bson_decimal128.hpp> +#include <jsoncons_ext/bson/bson_oid.hpp> + +namespace jsoncons { namespace bson { + +template<class Sink=jsoncons::binary_stream_sink,class Allocator=std::allocator<char>> +class basic_bson_encoder final : public basic_json_visitor<char> +{ + enum class decimal_parse_state { start, integer, exp1, exp2, fraction1 }; + static constexpr int64_t nanos_in_milli = 1000000; + static constexpr int64_t nanos_in_second = 1000000000; + static constexpr int64_t millis_in_second = 1000; +public: + using allocator_type = Allocator; + using char_type = char; + using typename basic_json_visitor<char>::string_view_type; + using sink_type = Sink; + +private: + struct stack_item + { + jsoncons::bson::bson_container_type type_; + std::size_t offset_; + std::size_t name_offset_; + std::size_t index_; + + stack_item(jsoncons::bson::bson_container_type type, std::size_t offset) noexcept + : type_(type), offset_(offset), name_offset_(0), index_(0) + { + } + + std::size_t offset() const + { + return offset_; + } + + std::size_t member_offset() const + { + return name_offset_; + } + + void member_offset(std::size_t offset) + { + name_offset_ = offset; + } + + std::size_t next_index() + { + return index_++; + } + + bool is_object() const + { + return type_ == jsoncons::bson::bson_container_type::document; + } + + + }; + + sink_type sink_; + const bson_encode_options options_; + allocator_type alloc_; + + std::vector<stack_item> stack_; + std::vector<uint8_t> buffer_; + int nesting_depth_; + + // Noncopyable and nonmoveable + basic_bson_encoder(const basic_bson_encoder&) = delete; + basic_bson_encoder& operator=(const basic_bson_encoder&) = delete; +public: + explicit basic_bson_encoder(Sink&& sink, + const Allocator& alloc = Allocator()) + : basic_bson_encoder(std::forward<Sink>(sink), + bson_encode_options(), + alloc) + { + } + + explicit basic_bson_encoder(Sink&& sink, + const bson_encode_options& options, + const Allocator& alloc = Allocator()) + : sink_(std::forward<Sink>(sink)), + options_(options), + alloc_(alloc), + nesting_depth_(0) + { + } + + ~basic_bson_encoder() noexcept + { + sink_.flush(); + } + + void reset() + { + stack_.clear(); + buffer_.clear(); + nesting_depth_ = 0; + } + + void reset(Sink&& sink) + { + sink_ = std::move(sink); + reset(); + } + +private: + // Implementing methods + + void visit_flush() override + { + sink_.flush(); + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = bson_errc::max_nesting_depth_exceeded; + return false; + } + if (buffer_.size() > 0) + { + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + before_value(jsoncons::bson::bson_type::document_type); + } + + stack_.emplace_back(jsoncons::bson::bson_container_type::document, buffer_.size()); + buffer_.insert(buffer_.end(), sizeof(int32_t), 0); + + return true; + } + + bool visit_end_object(const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + --nesting_depth_; + + buffer_.push_back(0x00); + + std::size_t length = buffer_.size() - stack_.back().offset(); + binary::native_to_little(static_cast<uint32_t>(length), buffer_.begin()+stack_.back().offset()); + + stack_.pop_back(); + if (stack_.empty()) + { + for (auto c : buffer_) + { + sink_.push_back(c); + } + } + return true; + } + + bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = bson_errc::max_nesting_depth_exceeded; + return false; + } + if (buffer_.size() > 0) + { + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + before_value(jsoncons::bson::bson_type::array_type); + } + stack_.emplace_back(jsoncons::bson::bson_container_type::array, buffer_.size()); + buffer_.insert(buffer_.end(), sizeof(int32_t), 0); + return true; + } + + bool visit_end_array(const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + --nesting_depth_; + + buffer_.push_back(0x00); + + std::size_t length = buffer_.size() - stack_.back().offset(); + binary::native_to_little(static_cast<uint32_t>(length), buffer_.begin()+stack_.back().offset()); + + stack_.pop_back(); + if (stack_.empty()) + { + for (auto c : buffer_) + { + sink_.push_back(c); + } + } + return true; + } + + bool visit_key(const string_view_type& name, const ser_context&, std::error_code&) override + { + stack_.back().member_offset(buffer_.size()); + buffer_.push_back(0x00); // reserve space for code + for (auto c : name) + { + buffer_.push_back(c); + } + buffer_.push_back(0x00); + return true; + } + + bool visit_null(semantic_tag tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + switch (tag) + { + case semantic_tag::undefined: + before_value(jsoncons::bson::bson_type::undefined_type); + break; + default: + before_value(jsoncons::bson::bson_type::null_type); + break; + } + return true; + } + + bool visit_bool(bool val, semantic_tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + before_value(jsoncons::bson::bson_type::bool_type); + if (val) + { + buffer_.push_back(0x01); + } + else + { + buffer_.push_back(0x00); + } + + return true; + } + + bool visit_string(const string_view_type& sv, semantic_tag tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + + switch (tag) + { + case semantic_tag::float128: + { + before_value(jsoncons::bson::bson_type::decimal128_type); + decimal128_t dec; + auto rc = decimal128_from_chars(sv.data(), sv.data()+sv.size(), dec); + if (rc.ec != std::errc()) + { + ec = bson_errc::invalid_decimal128_string; + return false; + } + binary::native_to_little(dec.low,std::back_inserter(buffer_)); + binary::native_to_little(dec.high,std::back_inserter(buffer_)); + break; + } + case semantic_tag::id: + { + before_value(jsoncons::bson::bson_type::object_id_type); + oid_t oid(sv); + for (auto b : oid) + { + buffer_.push_back(b); + } + break; + } + case semantic_tag::regex: + { + before_value(jsoncons::bson::bson_type::regex_type); + std::size_t first = sv.find_first_of('/'); + std::size_t last = sv.find_last_of('/'); + if (first == string_view::npos || last == string_view::npos || first == last) + { + ec = bson_errc::invalid_regex_string; + return false; + } + string_view regex = sv.substr(first+1,last-1); + for (auto c : regex) + { + buffer_.push_back(c); + } + buffer_.push_back(0x00); + string_view options = sv.substr(last+1); + for (auto c : options) + { + buffer_.push_back(c); + } + buffer_.push_back(0x00); + break; + } + default: + switch (tag) + { + case semantic_tag::code: + before_value(jsoncons::bson::bson_type::javascript_type); + break; + default: + before_value(jsoncons::bson::bson_type::string_type); + break; + } + std::size_t offset = buffer_.size(); + buffer_.insert(buffer_.end(), sizeof(int32_t), 0); + std::size_t string_offset = buffer_.size(); + auto sink = unicode_traits::validate(sv.data(), sv.size()); + if (sink.ec != unicode_traits::conv_errc()) + { + ec = bson_errc::invalid_utf8_text_string; + return false; + } + for (auto c : sv) + { + buffer_.push_back(c); + } + buffer_.push_back(0x00); + std::size_t length = buffer_.size() - string_offset; + binary::native_to_little(static_cast<uint32_t>(length), buffer_.begin()+offset); + break; + } + + return true; + } + + bool visit_byte_string(const byte_string_view& b, + semantic_tag, + const ser_context&, + std::error_code& ec) override + { + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + before_value(jsoncons::bson::bson_type::binary_type); + + std::size_t offset = buffer_.size(); + buffer_.insert(buffer_.end(), sizeof(int32_t), 0); + std::size_t string_offset = buffer_.size(); + + buffer_.push_back(0x80); // default subtype + + for (auto c : b) + { + buffer_.push_back(c); + } + std::size_t length = buffer_.size() - string_offset - 1; + binary::native_to_little(static_cast<uint32_t>(length), buffer_.begin()+offset); + + return true; + } + + bool visit_byte_string(const byte_string_view& b, + uint64_t ext_tag, + const ser_context&, + std::error_code& ec) override + { + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + before_value(jsoncons::bson::bson_type::binary_type); + + std::size_t offset = buffer_.size(); + buffer_.insert(buffer_.end(), sizeof(int32_t), 0); + std::size_t string_offset = buffer_.size(); + + buffer_.push_back(static_cast<uint8_t>(ext_tag)); // default subtype + + for (auto c : b) + { + buffer_.push_back(c); + } + std::size_t length = buffer_.size() - string_offset - 1; + binary::native_to_little(static_cast<uint32_t>(length), buffer_.begin()+offset); + + return true; + } + + bool visit_int64(int64_t val, + semantic_tag tag, + const ser_context&, + std::error_code& ec) override + { + static constexpr int64_t min_value_div_1000 = (std::numeric_limits<int64_t>::min)() / 1000; + static constexpr int64_t max_value_div_1000 = (std::numeric_limits<int64_t>::max)() / 1000; + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + + switch (tag) + { + case semantic_tag::epoch_second: + if (val < min_value_div_1000) + { + ec = bson_errc::datetime_too_small; + return false; + } + if (val > max_value_div_1000) + { + ec = bson_errc::datetime_too_large; + return false; + } + before_value(jsoncons::bson::bson_type::datetime_type); + binary::native_to_little(val*millis_in_second,std::back_inserter(buffer_)); + return true; + case semantic_tag::epoch_milli: + before_value(jsoncons::bson::bson_type::datetime_type); + binary::native_to_little(val,std::back_inserter(buffer_)); + return true; + case semantic_tag::epoch_nano: + before_value(jsoncons::bson::bson_type::datetime_type); + if (val != 0) + { + val /= nanos_in_milli; + } + binary::native_to_little(static_cast<int64_t>(val),std::back_inserter(buffer_)); + return true; + default: + { + if (val >= (std::numeric_limits<int32_t>::lowest)() && val <= (std::numeric_limits<int32_t>::max)()) + { + before_value(jsoncons::bson::bson_type::int32_type); + binary::native_to_little(static_cast<uint32_t>(val),std::back_inserter(buffer_)); + } + else + { + before_value(jsoncons::bson::bson_type::int64_type); + binary::native_to_little(static_cast<int64_t>(val),std::back_inserter(buffer_)); + } + return true; + } + } + } + + bool visit_uint64(uint64_t val, + semantic_tag tag, + const ser_context&, + std::error_code& ec) override + { + static constexpr uint64_t max_value_div_1000 = (std::numeric_limits<uint64_t>::max)() / 1000; + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + + switch (tag) + { + case semantic_tag::epoch_second: + if (val > max_value_div_1000) + { + ec = bson_errc::datetime_too_large; + return false; + } + before_value(jsoncons::bson::bson_type::datetime_type); + binary::native_to_little(static_cast<int64_t>(val*millis_in_second),std::back_inserter(buffer_)); + return true; + case semantic_tag::epoch_milli: + before_value(jsoncons::bson::bson_type::datetime_type); + binary::native_to_little(static_cast<int64_t>(val),std::back_inserter(buffer_)); + return true; + case semantic_tag::epoch_nano: + before_value(jsoncons::bson::bson_type::datetime_type); + if (val != 0) + { + val /= nanos_in_second; + } + binary::native_to_little(static_cast<int64_t>(val),std::back_inserter(buffer_)); + return true; + default: + { + bool more; + if (val <= static_cast<uint64_t>((std::numeric_limits<int32_t>::max)())) + { + before_value(jsoncons::bson::bson_type::int32_type); + binary::native_to_little(static_cast<uint32_t>(val),std::back_inserter(buffer_)); + more = true; + } + else if (val <= static_cast<uint64_t>((std::numeric_limits<int64_t>::max)())) + { + before_value(jsoncons::bson::bson_type::int64_type); + binary::native_to_little(static_cast<uint64_t>(val),std::back_inserter(buffer_)); + more = true; + } + else + { + ec = bson_errc::number_too_large; + more = false; + } + return more; + } + } + } + + bool visit_double(double val, + semantic_tag, + const ser_context&, + std::error_code& ec) override + { + if (stack_.empty()) + { + ec = bson_errc::expected_bson_document; + return false; + } + before_value(jsoncons::bson::bson_type::double_type); + binary::native_to_little(val,std::back_inserter(buffer_)); + return true; + } + + void before_value(uint8_t code) + { + JSONCONS_ASSERT(!stack_.empty()); + if (stack_.back().is_object()) + { + buffer_[stack_.back().member_offset()] = code; + } + else + { + buffer_.push_back(code); + std::string name = std::to_string(stack_.back().next_index()); + buffer_.insert(buffer_.end(), name.begin(), name.end()); + buffer_.push_back(0x00); + } + } +}; + +using bson_stream_encoder = basic_bson_encoder<jsoncons::binary_stream_sink>; +using bson_bytes_encoder = basic_bson_encoder<jsoncons::bytes_sink<std::vector<uint8_t>>>; + +#if !defined(JSONCONS_NO_DEPRECATED) +template<class Sink=jsoncons::binary_stream_sink> +using basic_bson_serializer = basic_bson_encoder<Sink>; + +JSONCONS_DEPRECATED_MSG("Instead, use bson_stream_encoder") typedef bson_stream_encoder bson_encoder; +JSONCONS_DEPRECATED_MSG("Instead, use bson_stream_encoder") typedef bson_stream_encoder bson_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use bson_bytes_encoder") typedef bson_bytes_encoder bson_buffer_serializer; + +#endif + +}} +#endif diff --git a/include/jsoncons_ext/bson/bson_error.hpp b/include/jsoncons_ext/bson/bson_error.hpp new file mode 100644 index 0000000..85cf24d --- /dev/null +++ b/include/jsoncons_ext/bson/bson_error.hpp @@ -0,0 +1,103 @@ +/// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_ERROR_HPP +#define JSONCONS_BSON_BSON_ERROR_HPP + +#include <system_error> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace bson { + +enum class bson_errc +{ + success = 0, + unexpected_eof = 1, + source_error, + invalid_utf8_text_string, + max_nesting_depth_exceeded, + string_length_is_non_positive, + length_is_negative, + number_too_large, + invalid_decimal128_string, + datetime_too_small, + datetime_too_large, + expected_bson_document, + invalid_regex_string, + size_mismatch, + unknown_type +}; + +class bson_error_category_impl + : public std::error_category +{ +public: + const char* name() const noexcept override + { + return "jsoncons/bson"; + } + std::string message(int ev) const override + { + switch (static_cast<bson_errc>(ev)) + { + case bson_errc::unexpected_eof: + return "Unexpected end of file"; + case bson_errc::source_error: + return "Source error"; + case bson_errc::invalid_utf8_text_string: + return "Illegal UTF-8 encoding in text string"; + case bson_errc::max_nesting_depth_exceeded: + return "Data item nesting exceeds limit in options"; + case bson_errc::string_length_is_non_positive: + return "Request for the length of a string returned a non-positive result"; + case bson_errc::length_is_negative: + return "Request for the length of a binary returned a negative result"; + case bson_errc::unknown_type: + return "An unknown type was found in the stream"; + case bson_errc::number_too_large: + return "Number too large"; + case bson_errc::invalid_decimal128_string: + return "Invalid decimal128 string"; + case bson_errc::datetime_too_large: + return "datetime too large"; + case bson_errc::datetime_too_small: + return "datetime too small"; + case bson_errc::expected_bson_document: + return "Expected BSON document"; + case bson_errc::invalid_regex_string: + return "Invalid regex string"; + case bson_errc::size_mismatch: + return "Document or array size doesn't match bytes read"; + default: + return "Unknown BSON parser error"; + } + } +}; + +inline +const std::error_category& bson_error_category() +{ + static bson_error_category_impl instance; + return instance; +} + +inline +std::error_code make_error_code(bson_errc result) +{ + return std::error_code(static_cast<int>(result),bson_error_category()); +} + + +}} + +namespace std { + template<> + struct is_error_code_enum<jsoncons::bson::bson_errc> : public true_type + { + }; +} + +#endif diff --git a/include/jsoncons_ext/bson/bson_oid.hpp b/include/jsoncons_ext/bson/bson_oid.hpp new file mode 100644 index 0000000..065d8da --- /dev/null +++ b/include/jsoncons_ext/bson/bson_oid.hpp @@ -0,0 +1,245 @@ +#ifndef JSONCONS_BSON_BSON_OID_HPP +#define JSONCONS_BSON_BSON_OID_HPP + +/* + * Implements class oid_t and non member function bson_oid_to_string + * + * Based on the libjson functions bson_oid_to_string + * and bson_oid_init_from_string_unsafe , available at + * https://github.com/mongodb/mongo-c-driver/blob/master/src/libbson/src/bson/bson-oid.h + * and https://github.com/mongodb/mongo-c-driver/blob/master/src/libbson/src/bson/bson-oid.c + * +*/ + +/* + * Copyright 2015 MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <cstdlib> +#include <cstring> +#include <ctype.h> +#include <system_error> +#include <algorithm> +#include <string> +#include <type_traits> +#include <array> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace bson { + + class oid_t + { + std::array<uint8_t,12> bytes_; + public: + using iterator = std::array<uint8_t,12>::iterator; + using const_iterator = std::array<uint8_t,12>::const_iterator; + + oid_t(const std::array<uint8_t,12>& bytes) + : bytes_(bytes) + { + } + oid_t(uint8_t data[12]) + { + std::memcpy(bytes_.data(),data,12); + } + + oid_t(const string_view& str) + { + for (std::size_t i = 0; i < bytes_.size(); i++) + { + bytes_[i] = ((parse_hex_char (str[2 * i]) << 4) | + (parse_hex_char (str[2 * i + 1]))); + } + } + + const uint8_t* data() const + { + return bytes_.data(); + } + + std::size_t size() const + { + return bytes_.size(); + } + + iterator begin() + { + return bytes_.begin(); + } + + iterator end() + { + return bytes_.end(); + } + + const_iterator begin() const + { + return bytes_.begin(); + } + + const_iterator end() const + { + return bytes_.end(); + } + + private: + + static uint8_t parse_hex_char (char hex) + { + switch (hex) { + case '0': + return 0; + case '1': + return 1; + case '2': + return 2; + case '3': + return 3; + case '4': + return 4; + case '5': + return 5; + case '6': + return 6; + case '7': + return 7; + case '8': + return 8; + case '9': + return 9; + case 'a': + case 'A': + return 0xa; + case 'b': + case 'B': + return 0xb; + case 'c': + case 'C': + return 0xc; + case 'd': + case 'D': + return 0xd; + case 'e': + case 'E': + return 0xe; + case 'f': + case 'F': + return 0xf; + default: + return 0; + } + } + }; + + namespace detail { + + inline + const uint16_t* get_hex_char_pairs(std::true_type) // big endian + { + static const uint16_t hex_char_pairs[] = { + 12336, 12337, 12338, 12339, 12340, 12341, 12342, 12343, 12344, 12345, 12385, + 12386, 12387, 12388, 12389, 12390, 12592, 12593, 12594, 12595, 12596, 12597, + 12598, 12599, 12600, 12601, 12641, 12642, 12643, 12644, 12645, 12646, 12848, + 12849, 12850, 12851, 12852, 12853, 12854, 12855, 12856, 12857, 12897, 12898, + 12899, 12900, 12901, 12902, 13104, 13105, 13106, 13107, 13108, 13109, 13110, + 13111, 13112, 13113, 13153, 13154, 13155, 13156, 13157, 13158, 13360, 13361, + 13362, 13363, 13364, 13365, 13366, 13367, 13368, 13369, 13409, 13410, 13411, + 13412, 13413, 13414, 13616, 13617, 13618, 13619, 13620, 13621, 13622, 13623, + 13624, 13625, 13665, 13666, 13667, 13668, 13669, 13670, 13872, 13873, 13874, + 13875, 13876, 13877, 13878, 13879, 13880, 13881, 13921, 13922, 13923, 13924, + 13925, 13926, 14128, 14129, 14130, 14131, 14132, 14133, 14134, 14135, 14136, + 14137, 14177, 14178, 14179, 14180, 14181, 14182, 14384, 14385, 14386, 14387, + 14388, 14389, 14390, 14391, 14392, 14393, 14433, 14434, 14435, 14436, 14437, + 14438, 14640, 14641, 14642, 14643, 14644, 14645, 14646, 14647, 14648, 14649, + 14689, 14690, 14691, 14692, 14693, 14694, 24880, 24881, 24882, 24883, 24884, + 24885, 24886, 24887, 24888, 24889, 24929, 24930, 24931, 24932, 24933, 24934, + 25136, 25137, 25138, 25139, 25140, 25141, 25142, 25143, 25144, 25145, 25185, + 25186, 25187, 25188, 25189, 25190, 25392, 25393, 25394, 25395, 25396, 25397, + 25398, 25399, 25400, 25401, 25441, 25442, 25443, 25444, 25445, 25446, 25648, + 25649, 25650, 25651, 25652, 25653, 25654, 25655, 25656, 25657, 25697, 25698, + 25699, 25700, 25701, 25702, 25904, 25905, 25906, 25907, 25908, 25909, 25910, + 25911, 25912, 25913, 25953, 25954, 25955, 25956, 25957, 25958, 26160, 26161, + 26162, 26163, 26164, 26165, 26166, 26167, 26168, 26169, 26209, 26210, 26211, + 26212, 26213, 26214}; + + return hex_char_pairs; + } + + inline + const uint16_t* get_hex_char_pairs(std::false_type) // little endian + { + static const uint16_t hex_char_pairs[] = { + 12336, 12592, 12848, 13104, 13360, 13616, 13872, 14128, 14384, 14640, 24880, + 25136, 25392, 25648, 25904, 26160, 12337, 12593, 12849, 13105, 13361, 13617, + 13873, 14129, 14385, 14641, 24881, 25137, 25393, 25649, 25905, 26161, 12338, + 12594, 12850, 13106, 13362, 13618, 13874, 14130, 14386, 14642, 24882, 25138, + 25394, 25650, 25906, 26162, 12339, 12595, 12851, 13107, 13363, 13619, 13875, + 14131, 14387, 14643, 24883, 25139, 25395, 25651, 25907, 26163, 12340, 12596, + 12852, 13108, 13364, 13620, 13876, 14132, 14388, 14644, 24884, 25140, 25396, + 25652, 25908, 26164, 12341, 12597, 12853, 13109, 13365, 13621, 13877, 14133, + 14389, 14645, 24885, 25141, 25397, 25653, 25909, 26165, 12342, 12598, 12854, + 13110, 13366, 13622, 13878, 14134, 14390, 14646, 24886, 25142, 25398, 25654, + 25910, 26166, 12343, 12599, 12855, 13111, 13367, 13623, 13879, 14135, 14391, + 14647, 24887, 25143, 25399, 25655, 25911, 26167, 12344, 12600, 12856, 13112, + 13368, 13624, 13880, 14136, 14392, 14648, 24888, 25144, 25400, 25656, 25912, + 26168, 12345, 12601, 12857, 13113, 13369, 13625, 13881, 14137, 14393, 14649, + 24889, 25145, 25401, 25657, 25913, 26169, 12385, 12641, 12897, 13153, 13409, + 13665, 13921, 14177, 14433, 14689, 24929, 25185, 25441, 25697, 25953, 26209, + 12386, 12642, 12898, 13154, 13410, 13666, 13922, 14178, 14434, 14690, 24930, + 25186, 25442, 25698, 25954, 26210, 12387, 12643, 12899, 13155, 13411, 13667, + 13923, 14179, 14435, 14691, 24931, 25187, 25443, 25699, 25955, 26211, 12388, + 12644, 12900, 13156, 13412, 13668, 13924, 14180, 14436, 14692, 24932, 25188, + 25444, 25700, 25956, 26212, 12389, 12645, 12901, 13157, 13413, 13669, 13925, + 14181, 14437, 14693, 24933, 25189, 25445, 25701, 25957, 26213, 12390, 12646, + 12902, 13158, 13414, 13670, 13926, 14182, 14438, 14694, 24934, 25190, 25446, + 25702, 25958, 26214}; + + return hex_char_pairs; + } + + inline + void init_hex_char_pairs(const oid_t& oid, uint16_t* data) + { + const uint8_t* bytes = oid.data(); + const uint16_t* gHexCharPairs = get_hex_char_pairs(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>()); + + data[0] = gHexCharPairs[bytes[0]]; + data[1] = gHexCharPairs[bytes[1]]; + data[2] = gHexCharPairs[bytes[2]]; + data[3] = gHexCharPairs[bytes[3]]; + data[4] = gHexCharPairs[bytes[4]]; + data[5] = gHexCharPairs[bytes[5]]; + data[6] = gHexCharPairs[bytes[6]]; + data[7] = gHexCharPairs[bytes[7]]; + data[8] = gHexCharPairs[bytes[8]]; + data[9] = gHexCharPairs[bytes[9]]; + data[10] = gHexCharPairs[bytes[10]]; + data[11] = gHexCharPairs[bytes[11]]; + } + + } // namsepace detail + + template <typename StringT> + inline + void to_string(const oid_t& oid, StringT& s) + { + s.resize(24); + detail::init_hex_char_pairs(oid, reinterpret_cast<uint16_t*>(&s[0])); + } + +} // namespace bson +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/bson/bson_options.hpp b/include/jsoncons_ext/bson/bson_options.hpp new file mode 100644 index 0000000..0e4620e --- /dev/null +++ b/include/jsoncons_ext/bson/bson_options.hpp @@ -0,0 +1,75 @@ +// Copyright 2019 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_OPTIONS_HPP +#define JSONCONS_BSON_BSON_OPTIONS_HPP + +#include <string> +#include <limits> // std::numeric_limits +#include <cwchar> +#include <jsoncons/json_exception.hpp> +#include <jsoncons_ext/bson/bson_type.hpp> + +namespace jsoncons { namespace bson { + +class bson_options; + +class bson_options_common +{ + friend class bson_options; + + int max_nesting_depth_; +protected: + virtual ~bson_options_common() = default; + + bson_options_common() + : max_nesting_depth_(1024) + { + } + + bson_options_common(const bson_options_common&) = default; + bson_options_common& operator=(const bson_options_common&) = default; + bson_options_common(bson_options_common&&) = default; + bson_options_common& operator=(bson_options_common&&) = default; +public: + int max_nesting_depth() const + { + return max_nesting_depth_; + } +}; + +class bson_decode_options : public virtual bson_options_common +{ + friend class bson_options; +public: + bson_decode_options() + { + } +}; + +class bson_encode_options : public virtual bson_options_common +{ + friend class bson_options; +public: + bson_encode_options() + { + } +}; + +class bson_options final : public bson_decode_options, public bson_encode_options +{ +public: + using bson_options_common::max_nesting_depth; + + bson_options& max_nesting_depth(int value) + { + this->max_nesting_depth_ = value; + return *this; + } +}; + +}} +#endif diff --git a/include/jsoncons_ext/bson/bson_parser.hpp b/include/jsoncons_ext/bson/bson_parser.hpp new file mode 100644 index 0000000..2dc6e75 --- /dev/null +++ b/include/jsoncons_ext/bson/bson_parser.hpp @@ -0,0 +1,645 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_PARSER_HPP +#define JSONCONS_BSON_BSON_PARSER_HPP + +#include <string> +#include <vector> +#include <memory> +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/bson/bson_type.hpp> +#include <jsoncons_ext/bson/bson_decimal128.hpp> +#include <jsoncons_ext/bson/bson_error.hpp> +#include <jsoncons_ext/bson/bson_options.hpp> +#include <jsoncons_ext/bson/bson_oid.hpp> + +namespace jsoncons { namespace bson { + +enum class parse_mode {root,accept,document,array,value}; + +struct parse_state +{ + parse_mode mode; + std::size_t length; + std::size_t pos; + uint8_t type; + std::size_t index; + + parse_state(parse_mode mode_, std::size_t length_, std::size_t pos_, uint8_t type_ = 0) noexcept + : mode(mode_), length(length_), pos(pos_), type(type_), index(0) + { + } + + parse_state(const parse_state&) = default; + parse_state(parse_state&&) = default; + parse_state& operator=(const parse_state&) = default; + parse_state& operator=(parse_state&&) = default; +}; + +template <class Source,class Allocator=std::allocator<char>> +class basic_bson_parser : public ser_context +{ + using char_type = char; + using char_traits_type = std::char_traits<char>; + using temp_allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<char_type>; + using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>; + using parse_state_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_state>; + + Source source_; + bson_decode_options options_; + bool more_; + bool done_; + std::vector<uint8_t,byte_allocator_type> bytes_buffer_; + std::basic_string<char,std::char_traits<char>,char_allocator_type> text_buffer_; + std::vector<parse_state,parse_state_allocator_type> state_stack_; +public: + template <class Sourceable> + basic_bson_parser(Sourceable&& source, + const bson_decode_options& options = bson_decode_options(), + const Allocator alloc = Allocator()) + : source_(std::forward<Sourceable>(source)), + options_(options), + more_(true), + done_(false), + text_buffer_(alloc), + state_stack_(alloc) + { + state_stack_.emplace_back(parse_mode::root,0,0); + } + + void restart() + { + more_ = true; + } + + void reset() + { + more_ = true; + done_ = false; + bytes_buffer_.clear(); + text_buffer_.clear(); + state_stack_.clear(); + state_stack_.emplace_back(parse_mode::root,0,0); + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + source_ = std::forward<Sourceable>(source); + reset(); + } + + bool done() const + { + return done_; + } + + bool stopped() const + { + return !more_; + } + + std::size_t line() const override + { + return 0; + } + + std::size_t column() const override + { + return source_.position(); + } + + void array_expected(json_visitor& visitor, std::error_code& ec) + { + if (state_stack_.size() == 2 && state_stack_.back().mode == parse_mode::document) + { + state_stack_.back().mode = parse_mode::array; + more_ = visitor.begin_array(semantic_tag::none, *this, ec); + } + } + + void parse(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(source_.is_error())) + { + ec = bson_errc::source_error; + more_ = false; + return; + } + + while (!done_ && more_) + { + switch (state_stack_.back().mode) + { + case parse_mode::root: + state_stack_.back().mode = parse_mode::accept; + begin_document(visitor, ec); + break; + case parse_mode::document: + { + uint8_t type; + std::size_t n = source_.read(&type, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + if (type != 0x00) + { + read_e_name(visitor,jsoncons::bson::bson_container_type::document,ec); + state_stack_.back().mode = parse_mode::value; + state_stack_.back().type = type; + } + else + { + end_document(visitor,ec); + } + break; + } + case parse_mode::array: + { + uint8_t type; + std::size_t n = source_.read(&type, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + if (type != 0x00) + { + read_e_name(visitor,jsoncons::bson::bson_container_type::array,ec); + read_value(visitor, type, ec); + } + else + { + end_array(visitor,ec); + } + break; + } + case parse_mode::value: + state_stack_.back().mode = parse_mode::document; + read_value(visitor,state_stack_.back().type,ec); + break; + case parse_mode::accept: + { + JSONCONS_ASSERT(state_stack_.size() == 1); + state_stack_.clear(); + more_ = false; + done_ = true; + visitor.flush(); + break; + } + } + } + } + +private: + + void begin_document(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(static_cast<int>(state_stack_.size()) > options_.max_nesting_depth())) + { + ec = bson_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + + uint8_t buf[sizeof(int32_t)]; + size_t n = source_.read(buf, sizeof(int32_t)); + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + auto length = binary::little_to_native<int32_t>(buf, sizeof(buf)); + + more_ = visitor.begin_object(semantic_tag::none, *this, ec); + state_stack_.emplace_back(parse_mode::document,length,n); + } + + void end_document(json_visitor& visitor, std::error_code& ec) + { + JSONCONS_ASSERT(state_stack_.size() >= 2); + + more_ = visitor.end_object(*this,ec); + if (JSONCONS_UNLIKELY(state_stack_.back().pos != state_stack_.back().length)) + { + ec = bson_errc::size_mismatch; + more_ = false; + return; + } + std::size_t pos = state_stack_.back().pos; + state_stack_.pop_back(); + state_stack_.back().pos += pos; + } + + void begin_array(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(static_cast<int>(state_stack_.size()) > options_.max_nesting_depth())) + { + ec = bson_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + uint8_t buf[sizeof(int32_t)]; + std::size_t n = source_.read(buf, sizeof(int32_t)); + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto length = binary::little_to_native<int32_t>(buf, sizeof(buf)); + + more_ = visitor.begin_array(semantic_tag::none, *this, ec); + if (ec) + { + return; + } + state_stack_.emplace_back(parse_mode::array, length, n); + } + + void end_array(json_visitor& visitor, std::error_code& ec) + { + JSONCONS_ASSERT(state_stack_.size() >= 2); + + more_ = visitor.end_array(*this, ec); + if (JSONCONS_UNLIKELY(state_stack_.back().pos != state_stack_.back().length)) + { + ec = bson_errc::size_mismatch; + more_ = false; + return; + } + std::size_t pos = state_stack_.back().pos; + state_stack_.pop_back(); + state_stack_.back().pos += pos; + } + + void read_e_name(json_visitor& visitor, jsoncons::bson::bson_container_type type, std::error_code& ec) + { + text_buffer_.clear(); + read_cstring(ec); + if (ec) + { + return; + } + if (type == jsoncons::bson::bson_container_type::document) + { + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) + { + ec = bson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.key(jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()), *this, ec); + } + } + + void read_value(json_visitor& visitor, uint8_t type, std::error_code& ec) + { + switch (type) + { + case jsoncons::bson::bson_type::double_type: + { + uint8_t buf[sizeof(double)]; + std::size_t n = source_.read(buf, sizeof(double)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(double))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + double res = binary::little_to_native<double>(buf, sizeof(buf)); + more_ = visitor.double_value(res, semantic_tag::none, *this, ec); + break; + } + case jsoncons::bson::bson_type::symbol_type: + case jsoncons::bson::bson_type::min_key_type: + case jsoncons::bson::bson_type::max_key_type: + case jsoncons::bson::bson_type::string_type: + { + text_buffer_.clear(); + read_string(ec); + if (ec) + { + return; + } + auto result = unicode_traits::validate(text_buffer_.data(), text_buffer_.size()); + if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) + { + ec = bson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::none, *this, ec); + break; + } + case jsoncons::bson::bson_type::javascript_type: + { + text_buffer_.clear(); + read_string(ec); + if (ec) + { + return; + } + auto result = unicode_traits::validate(text_buffer_.data(), text_buffer_.size()); + if (JSONCONS_UNLIKELY(result.ec != unicode_traits::conv_errc())) + { + ec = bson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::code, *this, ec); + break; + } + case jsoncons::bson::bson_type::regex_type: + { + text_buffer_.clear(); + text_buffer_.push_back('/'); + read_cstring(ec); + if (ec) + { + return; + } + text_buffer_.push_back('/'); + read_cstring(ec); + if (ec) + { + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::regex, *this, ec); + break; + } + case jsoncons::bson::bson_type::document_type: + { + begin_document(visitor,ec); + break; + } + + case jsoncons::bson::bson_type::array_type: + { + begin_array(visitor,ec); + break; + } + case jsoncons::bson::bson_type::undefined_type: + { + more_ = visitor.null_value(semantic_tag::undefined, *this, ec); + break; + } + case jsoncons::bson::bson_type::null_type: + { + more_ = visitor.null_value(semantic_tag::none, *this, ec); + break; + } + case jsoncons::bson::bson_type::bool_type: + { + uint8_t c; + std::size_t n = source_.read(&c, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + more_ = visitor.bool_value(c != 0, semantic_tag::none, *this, ec); + break; + } + case jsoncons::bson::bson_type::int32_type: + { + uint8_t buf[sizeof(int32_t)]; + std::size_t n = source_.read(buf, sizeof(int32_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto val = binary::little_to_native<int32_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::bson::bson_type::timestamp_type: + { + uint8_t buf[sizeof(uint64_t)]; + std::size_t n = source_.read(buf, sizeof(uint64_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(uint64_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto val = binary::little_to_native<uint64_t>(buf, sizeof(buf)); + more_ = visitor.uint64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::bson::bson_type::int64_type: + { + uint8_t buf[sizeof(int64_t)]; + std::size_t n = source_.read(buf, sizeof(int64_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int64_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto val = binary::little_to_native<int64_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::bson::bson_type::datetime_type: + { + uint8_t buf[sizeof(int64_t)]; + std::size_t n = source_.read(buf, sizeof(int64_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int64_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto val = binary::little_to_native<int64_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::epoch_milli, *this, ec); + break; + } + case jsoncons::bson::bson_type::binary_type: + { + uint8_t buf[sizeof(int32_t)]; + std::size_t n = source_.read(buf, sizeof(int32_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + const auto len = binary::little_to_native<int32_t>(buf, sizeof(buf)); + if (JSONCONS_UNLIKELY(len < 0)) + { + ec = bson_errc::length_is_negative; + more_ = false; + return; + } + uint8_t subtype; + n = source_.read(&subtype, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + bytes_buffer_.clear(); + n = source_reader<Source>::read(source_, bytes_buffer_, len); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != static_cast<std::size_t>(len))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + more_ = visitor.byte_string_value(bytes_buffer_, + subtype, + *this, + ec); + break; + } + case jsoncons::bson::bson_type::decimal128_type: + { + uint8_t buf[sizeof(uint64_t)*2]; + std::size_t n = source_.read(buf, sizeof(buf)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(buf))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + decimal128_t dec; + dec.low = binary::little_to_native<uint64_t>(buf, sizeof(uint64_t)); + dec.high = binary::little_to_native<uint64_t>(buf+sizeof(uint64_t), sizeof(uint64_t)); + + text_buffer_.clear(); + text_buffer_.resize(bson::decimal128_limits::buf_size); + auto r = bson::decimal128_to_chars(&text_buffer_[0], &text_buffer_[0]+text_buffer_.size(), dec); + more_ = visitor.string_value(string_view(text_buffer_.data(),static_cast<std::size_t>(r.ptr-text_buffer_.data())), semantic_tag::float128, *this, ec); + break; + } + case jsoncons::bson::bson_type::object_id_type: + { + uint8_t buf[12]; + std::size_t n = source_.read(buf, sizeof(buf)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(buf))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + + oid_t oid(buf); + to_string(oid, text_buffer_); + + more_ = visitor.string_value(text_buffer_, semantic_tag::id, *this, ec); + break; + } + default: + { + ec = bson_errc::unknown_type; + more_ = false; + return; + } + } + } + + void read_cstring(std::error_code& ec) + { + uint8_t c = 0xff; + while (true) + { + std::size_t n = source_.read(&c, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + if (c == 0) + { + break; + } + text_buffer_.push_back(c); + } + } + + void read_string(std::error_code& ec) + { + uint8_t buf[sizeof(int32_t)]; + std::size_t n = source_.read(buf, sizeof(int32_t)); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != sizeof(int32_t))) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + auto len = binary::little_to_native<int32_t>(buf, sizeof(buf)); + if (JSONCONS_UNLIKELY(len < 1)) + { + ec = bson_errc::string_length_is_non_positive; + more_ = false; + return; + } + + std::size_t size = static_cast<std::size_t>(len) - static_cast<std::size_t>(1); + n = source_reader<Source>::read(source_, text_buffer_, size); + state_stack_.back().pos += n; + + if (JSONCONS_UNLIKELY(n != size)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + uint8_t c; + n = source_.read(&c, 1); + state_stack_.back().pos += n; + if (JSONCONS_UNLIKELY(n != 1)) + { + ec = bson_errc::unexpected_eof; + more_ = false; + return; + } + } +}; + +}} + +#endif diff --git a/include/jsoncons_ext/bson/bson_reader.hpp b/include/jsoncons_ext/bson/bson_reader.hpp new file mode 100644 index 0000000..0079cc9 --- /dev/null +++ b/include/jsoncons_ext/bson/bson_reader.hpp @@ -0,0 +1,92 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_READER_HPP +#define JSONCONS_BSON_BSON_READER_HPP + +#include <string> +#include <vector> +#include <memory> +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/bson/bson_type.hpp> +#include <jsoncons_ext/bson/bson_error.hpp> +#include <jsoncons_ext/bson/bson_parser.hpp> + +namespace jsoncons { namespace bson { + +template <class Source,class Allocator=std::allocator<char>> +class basic_bson_reader +{ + basic_bson_parser<Source,Allocator> parser_; + json_visitor& visitor_; +public: + template <class Sourceable> + basic_bson_reader(Sourceable&& source, + json_visitor& visitor, + const Allocator alloc) + : basic_bson_reader(std::forward<Sourceable>(source), + visitor, + bson_decode_options(), + alloc) + { + } + + template <class Sourceable> + basic_bson_reader(Sourceable&& source, + json_visitor& visitor, + const bson_decode_options& options = bson_decode_options(), + const Allocator alloc=Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + visitor_(visitor) + { + } + + void read() + { + std::error_code ec; + read(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,line(),column())); + } + } + + void read(std::error_code& ec) + { + parser_.reset(); + parser_.parse(visitor_, ec); + if (ec) + { + return; + } + } + + std::size_t line() const + { + return parser_.line(); + } + + std::size_t column() const + { + return parser_.column(); + } +}; + +using bson_stream_reader = basic_bson_reader<jsoncons::binary_stream_source>; +using bson_bytes_reader = basic_bson_reader<jsoncons::bytes_source>; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use bson_stream_reader") typedef bson_stream_reader bson_reader; +JSONCONS_DEPRECATED_MSG("Instead, use bson_bytes_reader") typedef bson_bytes_reader bson_buffer_reader; +#endif + +}} + +#endif diff --git a/include/jsoncons_ext/bson/bson_type.hpp b/include/jsoncons_ext/bson/bson_type.hpp new file mode 100644 index 0000000..cf12344 --- /dev/null +++ b/include/jsoncons_ext/bson/bson_type.hpp @@ -0,0 +1,44 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_BSON_TYPE_HPP +#define JSONCONS_BSON_BSON_TYPE_HPP + +#include <string> +#include <memory> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace bson { + + namespace bson_type + { + const uint8_t double_type = 0x01; + const uint8_t string_type = 0x02; // UTF-8 string + const uint8_t document_type = 0x03; + const uint8_t array_type = 0x04; + const uint8_t binary_type = 0x05; + const uint8_t undefined_type = 0x06; // map to null + const uint8_t object_id_type = 0x07; + const uint8_t bool_type = 0x08; + const uint8_t datetime_type = 0x09; + const uint8_t null_type = 0x0a; + const uint8_t regex_type = 0x0b; + const uint8_t javascript_type = 0x0d; + const uint8_t symbol_type = 0x0e; // deprecated, mapped to string + const uint8_t javascript_with_scope_type = 0x0f; // unsupported + const uint8_t int32_type = 0x10; + const uint8_t timestamp_type = 0x11; // MongoDB internal Timestamp, uint64 + const uint8_t int64_type = 0x12; + const uint8_t decimal128_type = 0x13; + const uint8_t min_key_type = 0xff; + const uint8_t max_key_type = 0x7f; + } + + enum class bson_container_type {document, array}; + +}} + +#endif diff --git a/include/jsoncons_ext/bson/decode_bson.hpp b/include/jsoncons_ext/bson/decode_bson.hpp new file mode 100644 index 0000000..ad352f6 --- /dev/null +++ b/include/jsoncons_ext/bson/decode_bson.hpp @@ -0,0 +1,201 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_DECODE_BSON_HPP +#define JSONCONS_BSON_DECODE_BSON_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/bson/bson_reader.hpp> +#include <jsoncons_ext/bson/bson_cursor.hpp> + +namespace jsoncons { +namespace bson { + + template<class T, class Source> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_bson(const Source& v, + const bson_decode_options& options = bson_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_bson_reader<jsoncons::bytes_source> reader(v, adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class Source> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_bson(const Source& v, + const bson_decode_options& options = bson_decode_options()) + { + basic_bson_cursor<bytes_source> cursor(v, options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_bson(std::istream& is, + const bson_decode_options& options = bson_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + bson_stream_reader reader(is, adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_bson(std::istream& is, + const bson_decode_options& options = bson_decode_options()) + { + basic_bson_cursor<binary_stream_source> cursor(is, options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T, class InputIt> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_bson(InputIt first, InputIt last, + const bson_decode_options& options = bson_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_bson_reader<binary_iterator_source<InputIt>> reader(binary_iterator_source<InputIt>(first, last), adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class InputIt> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_bson(InputIt first, InputIt last, + const bson_decode_options& options = bson_decode_options()) + { + basic_bson_cursor<binary_iterator_source<InputIt>> cursor(binary_iterator_source<InputIt>(first, last), options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + // With leading allocator parameter + + template<class T, class Source, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_bson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& v, + const bson_decode_options& options = bson_decode_options()) + { + json_decoder<T,TempAllocator> decoder(temp_alloc); + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_bson_reader<jsoncons::bytes_source,TempAllocator> reader(v, adaptor, options, temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class Source, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_bson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& v, + const bson_decode_options& options = bson_decode_options()) + { + basic_bson_cursor<bytes_source,TempAllocator> cursor(v, options, temp_alloc); + json_decoder<basic_json<char,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_bson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::istream& is, + const bson_decode_options& options = bson_decode_options()) + { + json_decoder<T,TempAllocator> decoder(temp_alloc); + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_bson_reader<jsoncons::binary_stream_source,TempAllocator> reader(is, adaptor, options, temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_bson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::istream& is, + const bson_decode_options& options = bson_decode_options()) + { + basic_bson_cursor<binary_stream_source,TempAllocator> cursor(is, options, temp_alloc); + json_decoder<basic_json<char,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + +} // bson +} // jsoncons + +#endif diff --git a/include/jsoncons_ext/bson/encode_bson.hpp b/include/jsoncons_ext/bson/encode_bson.hpp new file mode 100644 index 0000000..55f8cf5 --- /dev/null +++ b/include/jsoncons_ext/bson/encode_bson.hpp @@ -0,0 +1,144 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_BSON_ENCODE_BSON_HPP +#define JSONCONS_BSON_ENCODE_BSON_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/bson/bson_encoder.hpp> +#include <jsoncons_ext/bson/bson_reader.hpp> + +namespace jsoncons { +namespace bson { + + template<class T, class Container> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_bson(const T& j, + Container& v, + const bson_encode_options& options = bson_encode_options()) + { + using char_type = typename T::char_type; + basic_bson_encoder<jsoncons::bytes_sink<Container>> encoder(v, options); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T, class Container> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_bson(const T& val, + Container& v, + const bson_encode_options& options = bson_encode_options()) + { + basic_bson_encoder<jsoncons::bytes_sink<Container>> encoder(v, options); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template<class T> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_bson(const T& j, + std::ostream& os, + const bson_encode_options& options = bson_encode_options()) + { + using char_type = typename T::char_type; + bson_stream_encoder encoder(os, options); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_bson(const T& val, + std::ostream& os, + const bson_encode_options& options = bson_encode_options()) + { + bson_stream_encoder encoder(os, options); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + // with temp_allocator_rag + + template<class T, class Container, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_bson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, + Container& v, + const bson_encode_options& options = bson_encode_options()) + { + using char_type = typename T::char_type; + basic_bson_encoder<jsoncons::bytes_sink<Container>,TempAllocator> encoder(v, options, temp_alloc); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T, class Container, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_bson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, + Container& v, + const bson_encode_options& options = bson_encode_options()) + { + basic_bson_encoder<jsoncons::bytes_sink<Container>,TempAllocator> encoder(v, options, temp_alloc); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template<class T,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_bson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, + std::ostream& os, + const bson_encode_options& options = bson_encode_options()) + { + using char_type = typename T::char_type; + basic_bson_encoder<jsoncons::binary_stream_sink,TempAllocator> encoder(os, options, temp_alloc); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_bson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, + std::ostream& os, + const bson_encode_options& options = bson_encode_options()) + { + basic_bson_encoder<jsoncons::binary_stream_sink,TempAllocator> encoder(os, options, temp_alloc); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + +} // bson +} // jsoncons + +#endif diff --git a/include/jsoncons_ext/cbor/cbor.hpp b/include/jsoncons_ext/cbor/cbor.hpp new file mode 100644 index 0000000..3f7329f --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor.hpp @@ -0,0 +1,26 @@ +// Copyright 2017 Daniel Parkerstd +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + + +#ifndef JSONCONS_CBOR_CBOR_HPP +#define JSONCONS_CBOR_CBOR_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/json_filter.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/cbor/cbor_reader.hpp> +#include <jsoncons_ext/cbor/cbor_cursor.hpp> +#include <jsoncons_ext/cbor/cbor_encoder.hpp> +#include <jsoncons_ext/cbor/encode_cbor.hpp> +#include <jsoncons_ext/cbor/decode_cbor.hpp> + +#endif + diff --git a/include/jsoncons_ext/cbor/cbor_cursor.hpp b/include/jsoncons_ext/cbor/cbor_cursor.hpp new file mode 100644 index 0000000..af0d1a8 --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor_cursor.hpp @@ -0,0 +1,351 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_CBOR_CURSOR_HPP +#define JSONCONS_CBOR_CBOR_CURSOR_HPP + +#include <memory> // std::allocator +#include <string> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <ios> +#include <istream> // std::basic_istream +#include <jsoncons/byte_string.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/staj_cursor.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons_ext/cbor/cbor_parser.hpp> + +namespace jsoncons { +namespace cbor { + +template<class Source=jsoncons::binary_stream_source,class Allocator=std::allocator<char>> +class basic_cbor_cursor : public basic_staj_cursor<char>, private virtual ser_context +{ +public: + using source_type = Source; + using char_type = char; + using allocator_type = Allocator; +private: + basic_cbor_parser<Source,Allocator> parser_; + basic_staj_visitor<char_type> cursor_visitor_; + basic_json_visitor2_to_visitor_adaptor<char_type,Allocator> cursor_handler_adaptor_; + bool eof_; + + // Noncopyable and nonmoveable + basic_cbor_cursor(const basic_cbor_cursor&) = delete; + basic_cbor_cursor& operator=(const basic_cbor_cursor&) = delete; + +public: + using string_view_type = string_view; + + template <class Sourceable> + basic_cbor_cursor(Sourceable&& source, + const cbor_decode_options& options = cbor_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + cursor_handler_adaptor_(cursor_visitor_, alloc), + eof_(false) + { + if (!done()) + { + next(); + } + } + + // Constructors that set parse error codes + + template <class Sourceable> + basic_cbor_cursor(Sourceable&& source, + std::error_code& ec) + : basic_cbor_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + cbor_decode_options(), + ec) + { + } + + template <class Sourceable> + basic_cbor_cursor(Sourceable&& source, + const cbor_decode_options& options, + std::error_code& ec) + : basic_cbor_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + ec) + { + } + + template <class Sourceable> + basic_cbor_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const cbor_decode_options& options, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + cursor_handler_adaptor_(cursor_visitor_, alloc), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + void reset() + { + parser_.reset(); + cursor_visitor_.reset(); + cursor_handler_adaptor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + cursor_handler_adaptor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + void reset(std::error_code& ec) + { + parser_.reset(); + cursor_visitor_.reset(); + cursor_handler_adaptor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + void reset(Sourceable&& source, std::error_code& ec) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + cursor_handler_adaptor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + bool done() const override + { + return parser_.done(); + } + + bool is_typed_array() const + { + return cursor_visitor_.is_typed_array(); + } + + const staj_event& current() const override + { + return cursor_visitor_.event(); + } + + void read_to(basic_json_visitor<char_type>& visitor) override + { + std::error_code ec; + read_to(visitor, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read_to(basic_json_visitor<char_type>& visitor, + std::error_code& ec) override + { + if (cursor_visitor_.dump(visitor, *this, ec)) + { + read_next(visitor, ec); + } + } + + void next() override + { + std::error_code ec; + next(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void next(std::error_code& ec) override + { + read_next(ec); + } + + const ser_context& context() const override + { + return *this; + } + + bool eof() const + { + return eof_; + } + + std::size_t line() const override + { + return parser_.line(); + } + + std::size_t column() const override + { + return parser_.column(); + } + + friend + staj_filter_view operator|(basic_cbor_cursor& cursor, + std::function<bool(const staj_event&, const ser_context&)> pred) + { + return staj_filter_view(cursor, pred); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_cbor_cursor(Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + const cbor_decode_options& options = cbor_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(filter), + cursor_handler_adaptor_(cursor_visitor_, alloc), + eof_(false) + { + if (!done()) + { + next(); + } + } + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_cbor_cursor(Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + std::error_code& ec) + : basic_cbor_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), filter, ec) + { + } + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_cbor_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), alloc), + cursor_visitor_(filter), + cursor_handler_adaptor_(cursor_visitor_, alloc), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<char_type>&)") + void read(basic_json_visitor<char_type>& visitor) + { + read_to(visitor); + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<char_type>&, std::error_code&)") + void read(basic_json_visitor<char_type>& visitor, + std::error_code& ec) + { + read_to(visitor, ec); + } +#endif +private: + static bool accept_all(const staj_event&, const ser_context&) + { + return true; + } + + void read_next(std::error_code& ec) + { + if (cursor_visitor_.in_available()) + { + cursor_visitor_.send_available(ec); + } + else + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(cursor_handler_adaptor_, ec); + if (ec) return; + } + } + } + + void read_next(basic_json_visitor<char_type>& visitor, std::error_code& ec) + { + { + struct resource_wrapper + { + basic_json_visitor2_to_visitor_adaptor<char_type,Allocator>& adaptor; + basic_json_visitor<char_type>& original; + + resource_wrapper(basic_json_visitor2_to_visitor_adaptor<char_type,Allocator>& adaptor, + basic_json_visitor<char_type>& visitor) + : adaptor(adaptor), original(adaptor.destination()) + { + adaptor.destination(visitor); + } + + ~resource_wrapper() + { + adaptor.destination(original); + } + } wrapper(cursor_handler_adaptor_, visitor); + + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(cursor_handler_adaptor_, ec); + if (ec) + { + return; + } + } + } + } +}; + +using cbor_stream_cursor = basic_cbor_cursor<jsoncons::binary_stream_source>; +using cbor_bytes_cursor = basic_cbor_cursor<jsoncons::bytes_source>; + +} // namespace cbor +} // namespace jsoncons + +#endif + diff --git a/include/jsoncons_ext/cbor/cbor_cursor2.hpp b/include/jsoncons_ext/cbor/cbor_cursor2.hpp new file mode 100644 index 0000000..eee7445 --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor_cursor2.hpp @@ -0,0 +1,265 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_CBOR_CURSOR2_HPP +#define JSONCONS_CBOR_CBOR_CURSOR2_HPP + +#include <memory> // std::allocator +#include <string> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <ios> +#include <istream> // std::basic_istream +#include <jsoncons/byte_string.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_visitor2.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/staj2_cursor.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons_ext/cbor/cbor_parser.hpp> + +namespace jsoncons { +namespace cbor { + + template<class Source=jsoncons::binary_stream_source,class Allocator=std::allocator<char>> + class basic_cbor_cursor2 : public basic_staj2_cursor<char>, private virtual ser_context + { + public: + using source_type = Source; + using char_type = char; + using allocator_type = Allocator; + private: + basic_cbor_parser<Source,Allocator> parser_; + basic_staj2_visitor<char_type> cursor_visitor_; + bool eof_; + + // Noncopyable and nonmoveable + basic_cbor_cursor2(const basic_cbor_cursor2&) = delete; + basic_cbor_cursor2& operator=(const basic_cbor_cursor2&) = delete; + + public: + using string_view_type = string_view; + + template <class Sourceable> + basic_cbor_cursor2(Sourceable&& source, + const cbor_decode_options& options = cbor_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + eof_(false) + { + if (!done()) + { + next(); + } + } + + // Constructors that set parse error codes + + template <class Sourceable> + basic_cbor_cursor2(Sourceable&& source, + std::error_code& ec) + : basic_cbor_cursor2(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + cbor_decode_options(), + ec) + { + } + + template <class Sourceable> + basic_cbor_cursor2(Sourceable&& source, + const cbor_decode_options& options, + std::error_code& ec) + : basic_cbor_cursor2(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + ec) + { + } + + template <class Sourceable> + basic_cbor_cursor2(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const cbor_decode_options& options, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + void reset() + { + parser_.reset(); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + void reset(std::error_code& ec) + { + parser_.reset(); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + void reset(Sourceable&& source, std::error_code& ec) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + bool done() const override + { + return parser_.done(); + } + + bool is_typed_array() const + { + return cursor_visitor_.is_typed_array(); + } + + const staj2_event& current() const override + { + return cursor_visitor_.event(); + } + + void read_to(basic_json_visitor2<char_type>& visitor) override + { + std::error_code ec; + read_to(visitor, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read_to(basic_json_visitor2<char_type>& visitor, + std::error_code& ec) override + { + if (cursor_visitor_.dump(visitor, *this, ec)) + { + read_next(visitor, ec); + } + } + + void next() override + { + std::error_code ec; + next(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void next(std::error_code& ec) override + { + read_next(ec); + } + + const ser_context& context() const override + { + return *this; + } + + bool eof() const + { + return eof_; + } + + std::size_t line() const override + { + return parser_.line(); + } + + std::size_t column() const override + { + return parser_.column(); + } + + friend + staj2_filter_view operator|(basic_cbor_cursor2& cursor, + std::function<bool(const staj2_event&, const ser_context&)> pred) + { + return staj2_filter_view(cursor, pred); + } + + private: + static bool accept_all(const staj2_event&, const ser_context&) + { + return true; + } + + void read_next(std::error_code& ec) + { + if (cursor_visitor_.in_available()) + { + cursor_visitor_.send_available(ec); + } + else + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(cursor_visitor_, ec); + if (ec) return; + } + } + } + + void read_next(basic_json_visitor2<char_type>& visitor, std::error_code& ec) + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(visitor, ec); + if (ec) + { + return; + } + } + } + }; + + using cbor_stream_cursor2 = basic_cbor_cursor2<jsoncons::binary_stream_source>; + using cbor_bytes_cursor2 = basic_cbor_cursor2<jsoncons::bytes_source>; + +} // namespace cbor +} // namespace jsoncons + +#endif + diff --git a/include/jsoncons_ext/cbor/cbor_detail.hpp b/include/jsoncons_ext/cbor/cbor_detail.hpp new file mode 100644 index 0000000..9acfc3c --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor_detail.hpp @@ -0,0 +1,93 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_CBOR_DETAIL_HPP +#define JSONCONS_CBOR_CBOR_DETAIL_HPP + +#include <string> +#include <vector> +#include <memory> +#include <iterator> // std::forward_iterator_tag +#include <limits> // std::numeric_limits +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> + +// 0x00..0x17 (0..23) +#define JSONCONS_CBOR_0x00_0x17 \ + 0x00:case 0x01:case 0x02:case 0x03:case 0x04:case 0x05:case 0x06:case 0x07:case 0x08:case 0x09:case 0x0a:case 0x0b:case 0x0c:case 0x0d:case 0x0e:case 0x0f:case 0x10:case 0x11:case 0x12:case 0x13:case 0x14:case 0x15:case 0x16:case 0x17 + +#define JSONCONS_CBOR_ARRAY_TAGS \ + 0x40:case 0x41:case 0x42:case 0x43:case 0x44:case 0x45:case 0x46:case 0x47:case 0x48:case 0x49:case 0x4a:case 0x4b:case 0x4c:case 0x4d:case 0x4e:case 0x4f:case 0x50:case 0x51:case 0x52:case 0x53:case 0x54:case 0x55:case 0x56:case 0x57 + +namespace jsoncons { namespace cbor { namespace detail { + +//const uint8_t cbor_array_tags_010_mask = 0b11100000; +//const uint8_t cbor_array_tags_f_mask = 0b00010000; +//const uint8_t cbor_array_tags_s_mask = 0b00001000; +//const uint8_t cbor_array_tags_e_mask = 0b00000100; +//const uint8_t cbor_array_tags_ll_mask = 0b00000011; + +const uint8_t cbor_array_tags_010_mask = 0xE0; +const uint8_t cbor_array_tags_f_mask = 0x10; +const uint8_t cbor_array_tags_s_mask = 0x08; +const uint8_t cbor_array_tags_e_mask = 0x04; +const uint8_t cbor_array_tags_ll_mask = 0x03; + +const uint8_t cbor_array_tags_010_shift = 5; +const uint8_t cbor_array_tags_f_shift = 4; +const uint8_t cbor_array_tags_s_shift = 3; +const uint8_t cbor_array_tags_e_shift = 2; +const uint8_t cbor_array_tags_ll_shift = 0; + +enum class cbor_major_type : uint8_t +{ + unsigned_integer = 0x00, + negative_integer = 0x01, + byte_string = 0x02, + text_string = 0x03, + array = 0x04, + map = 0x05, + semantic_tag = 0x06, + simple = 0x7 +}; + +namespace additional_info +{ + const uint8_t indefinite_length = 0x1f; +} + +inline +size_t min_length_for_stringref(uint64_t index) +{ + std::size_t n; + if (index <= 23) + { + n = 3; + } + else if (index <= 255) + { + n = 4; + } + else if (index <= 65535) + { + n = 5; + } + else if (index <= 4294967295) + { + n = 7; + } + else + { + n = 11; + } + return n; +} + +}}} + +#endif diff --git a/include/jsoncons_ext/cbor/cbor_encoder.hpp b/include/jsoncons_ext/cbor/cbor_encoder.hpp new file mode 100644 index 0000000..f4699ee --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor_encoder.hpp @@ -0,0 +1,1766 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_CBOR_ENCODER_HPP +#define JSONCONS_CBOR_CBOR_ENCODER_HPP + +#include <string> +#include <vector> +#include <limits> // std::numeric_limits +#include <memory> +#include <utility> // std::move +#include <jsoncons/json_exception.hpp> // jsoncons::ser_error +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/sink.hpp> +#include <jsoncons/detail/parse_number.hpp> +#include <jsoncons_ext/cbor/cbor_error.hpp> +#include <jsoncons_ext/cbor/cbor_options.hpp> + +namespace jsoncons { namespace cbor { + +enum class cbor_container_type {object, indefinite_length_object, array, indefinite_length_array}; + +template<class Sink=jsoncons::binary_stream_sink,class Allocator=std::allocator<char>> +class basic_cbor_encoder final : public basic_json_visitor<char> +{ + using super_type = basic_json_visitor<char>; + + enum class decimal_parse_state { start, integer, exp1, exp2, fraction1 }; + enum class hexfloat_parse_state { start, expect_0, expect_x, integer, exp1, exp2, fraction1 }; + + static constexpr int64_t nanos_in_second = 1000000000; + static constexpr int64_t millis_in_second = 1000; + +public: + using allocator_type = Allocator; + using sink_type = Sink; + using typename super_type::char_type; + using typename super_type::string_view_type; + +private: + using char_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<char_type>; + using byte_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<uint8_t>; + + using string_type = std::basic_string<char_type,std::char_traits<char_type>,char_allocator_type>; + using byte_string_type = basic_byte_string<byte_allocator_type>; + + struct stack_item + { + cbor_container_type type_; + std::size_t length_; + std::size_t count_; + + stack_item(cbor_container_type type, std::size_t length = 0) noexcept + : type_(type), length_(length), count_(0) + { + } + + std::size_t length() const + { + return length_; + } + + std::size_t count() const + { + return count_; + } + + bool is_object() const + { + return type_ == cbor_container_type::object || type_ == cbor_container_type::indefinite_length_object; + } + + bool is_indefinite_length() const + { + return type_ == cbor_container_type::indefinite_length_array || type_ == cbor_container_type::indefinite_length_object; + } + + }; + + typedef typename std::allocator_traits<allocator_type>:: template rebind_alloc<std::pair<const string_type,size_t>> string_size_allocator_type; + typedef typename std::allocator_traits<allocator_type>:: template rebind_alloc<std::pair<const byte_string_type,size_t>> byte_string_size_allocator_type; + typedef typename std::allocator_traits<allocator_type>:: template rebind_alloc<stack_item> stack_item_allocator_type; + + Sink sink_; + const cbor_encode_options options_; + allocator_type alloc_; + + std::vector<stack_item,stack_item_allocator_type> stack_; + std::map<string_type,size_t,std::less<string_type>,string_size_allocator_type> stringref_map_; + std::map<byte_string_type,size_t,std::less<byte_string_type>,byte_string_size_allocator_type> bytestringref_map_; + std::size_t next_stringref_ = 0; + int nesting_depth_; + + // Noncopyable and nonmoveable + basic_cbor_encoder(const basic_cbor_encoder&) = delete; + basic_cbor_encoder& operator=(const basic_cbor_encoder&) = delete; +public: + explicit basic_cbor_encoder(Sink&& sink, + const Allocator& alloc = Allocator()) + : basic_cbor_encoder(std::forward<Sink>(sink), cbor_encode_options(), alloc) + { + } + basic_cbor_encoder(Sink&& sink, + const cbor_encode_options& options, + const Allocator& alloc = Allocator()) + : sink_(std::forward<Sink>(sink)), + options_(options), + alloc_(alloc), + stack_(alloc), +#if !defined(JSONCONS_NO_MAP_CONS_TAKES_ALLOCATOR) + stringref_map_(alloc), + bytestringref_map_(alloc), +#endif + nesting_depth_(0) + { + if (options.pack_strings()) + { + write_tag(256); + } + } + + ~basic_cbor_encoder() noexcept + { + JSONCONS_TRY + { + sink_.flush(); + } + JSONCONS_CATCH(...) + { + } + } + + void reset() + { + stack_.clear(); + stringref_map_.clear(); + bytestringref_map_.clear(); + next_stringref_ = 0; + nesting_depth_ = 0; + } + + void reset(Sink&& sink) + { + sink_ = std::move(sink); + reset(); + } + +private: + // Implementing methods + + void visit_flush() override + { + sink_.flush(); + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = cbor_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(cbor_container_type::indefinite_length_object); + + sink_.push_back(0xbf); + return true; + } + + bool visit_begin_object(std::size_t length, semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = cbor_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(cbor_container_type::object, length); + + if (length <= 0x17) + { + binary::native_to_big(static_cast<uint8_t>(0xa0 + length), + std::back_inserter(sink_)); + } + else if (length <= 0xff) + { + binary::native_to_big(static_cast<uint8_t>(0xb8), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint8_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffff) + { + binary::native_to_big(static_cast<uint8_t>(0xb9), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint16_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0xba), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint32_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffffffffffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0xbb), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint64_t>(length), + std::back_inserter(sink_)); + } + + return true; + } + + bool visit_end_object(const ser_context&, std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + --nesting_depth_; + + if (stack_.back().is_indefinite_length()) + { + sink_.push_back(0xff); + } + else + { + if (stack_.back().count() < stack_.back().length()) + { + ec = cbor_errc::too_few_items; + return false; + } + if (stack_.back().count() > stack_.back().length()) + { + ec = cbor_errc::too_many_items; + return false; + } + } + + stack_.pop_back(); + end_value(); + + return true; + } + + bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = cbor_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(cbor_container_type::indefinite_length_array); + sink_.push_back(0x9f); + return true; + } + + bool visit_begin_array(std::size_t length, semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = cbor_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(cbor_container_type::array, length); + if (length <= 0x17) + { + binary::native_to_big(static_cast<uint8_t>(0x80 + length), + std::back_inserter(sink_)); + } + else if (length <= 0xff) + { + binary::native_to_big(static_cast<uint8_t>(0x98), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint8_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffff) + { + binary::native_to_big(static_cast<uint8_t>(0x99), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint16_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0x9a), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint32_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffffffffffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0x9b), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint64_t>(length), + std::back_inserter(sink_)); + } + return true; + } + + bool visit_end_array(const ser_context&, std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + --nesting_depth_; + + if (stack_.back().is_indefinite_length()) + { + sink_.push_back(0xff); + } + else + { + if (stack_.back().count() < stack_.back().length()) + { + ec = cbor_errc::too_few_items; + return false; + } + if (stack_.back().count() > stack_.back().length()) + { + ec = cbor_errc::too_many_items; + return false; + } + } + + stack_.pop_back(); + end_value(); + + return true; + } + + bool visit_key(const string_view_type& name, const ser_context&, std::error_code&) override + { + write_string(name); + return true; + } + + bool visit_null(semantic_tag tag, const ser_context&, std::error_code&) override + { + if (tag == semantic_tag::undefined) + { + sink_.push_back(0xf7); + } + else + { + sink_.push_back(0xf6); + } + + end_value(); + return true; + } + + void write_string(const string_view& sv) + { + auto sink = unicode_traits::validate(sv.data(), sv.size()); + if (sink.ec != unicode_traits::conv_errc()) + { + JSONCONS_THROW(ser_error(cbor_errc::invalid_utf8_text_string)); + } + + if (options_.pack_strings() && sv.size() >= jsoncons::cbor::detail::min_length_for_stringref(next_stringref_)) + { + string_type s(sv.data(), sv.size(), alloc_); + auto it = stringref_map_.find(s); + if (it == stringref_map_.end()) + { + stringref_map_.emplace(std::make_pair(std::move(s), next_stringref_++)); + write_utf8_string(sv); + } + else + { + write_tag(25); + write_uint64_value(it->second); + } + } + else + { + write_utf8_string(sv); + } + } + + void write_utf8_string(const string_view& sv) + { + const size_t length = sv.size(); + + if (length <= 0x17) + { + // fixstr stores a byte array whose length is upto 31 bytes + binary::native_to_big(static_cast<uint8_t>(0x60 + length), + std::back_inserter(sink_)); + } + else if (length <= 0xff) + { + binary::native_to_big(static_cast<uint8_t>(0x78), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint8_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffff) + { + binary::native_to_big(static_cast<uint8_t>(0x79), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint16_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0x7a), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint32_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffffffffffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0x7b), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint64_t>(length), + std::back_inserter(sink_)); + } + + for (auto c : sv) + { + sink_.push_back(c); + } + } + + void write_bignum(bigint& n) + { + bool is_neg = n < 0; + if (is_neg) + { + n = - n -1; + } + + int signum; + std::vector<uint8_t> data; + n.write_bytes_be(signum, data); + std::size_t length = data.size(); + + if (is_neg) + { + write_tag(3); + } + else + { + write_tag(2); + } + + if (length <= 0x17) + { + // fixstr stores a byte array whose length is upto 31 bytes + binary::native_to_big(static_cast<uint8_t>(0x40 + length), + std::back_inserter(sink_)); + } + else if (length <= 0xff) + { + binary::native_to_big(static_cast<uint8_t>(0x58), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint8_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffff) + { + binary::native_to_big(static_cast<uint8_t>(0x59), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint16_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0x5a), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint32_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 0xffffffffffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0x5b), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint64_t>(length), + std::back_inserter(sink_)); + } + + for (auto c : data) + { + sink_.push_back(c); + } + } + + bool write_decimal_value(const string_view_type& sv, const ser_context& context, std::error_code& ec) + { + bool more = true; + + decimal_parse_state state = decimal_parse_state::start; + std::basic_string<char> s; + std::basic_string<char> exponent; + int64_t scale = 0; + for (auto c : sv) + { + switch (state) + { + case decimal_parse_state::start: + { + switch (c) + { + case '-': + s.push_back(c); + state = decimal_parse_state::integer; + break; + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + s.push_back(c); + state = decimal_parse_state::integer; + break; + default: + { + ec = cbor_errc::invalid_decimal_fraction; + return false; + } + } + break; + } + case decimal_parse_state::integer: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + s.push_back(c); + break; + case 'e': case 'E': + state = decimal_parse_state::exp1; + break; + case '.': + state = decimal_parse_state::fraction1; + break; + default: + { + ec = cbor_errc::invalid_decimal_fraction; + return false; + } + } + break; + } + case decimal_parse_state::exp1: + { + switch (c) + { + case '+': + state = decimal_parse_state::exp2; + break; + case '-': + exponent.push_back(c); + state = decimal_parse_state::exp2; + break; + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + exponent.push_back(c); + state = decimal_parse_state::exp2; + break; + default: + { + ec = cbor_errc::invalid_decimal_fraction; + return false; + } + } + break; + } + case decimal_parse_state::exp2: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + exponent.push_back(c); + break; + default: + { + ec = cbor_errc::invalid_decimal_fraction; + return false; + } + } + break; + } + case decimal_parse_state::fraction1: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + s.push_back(c); + --scale; + break; + default: + { + ec = cbor_errc::invalid_decimal_fraction; + return false; + } + } + break; + } + } + } + + write_tag(4); + more = visit_begin_array((std::size_t)2, semantic_tag::none, context, ec); + if (!more) {return more;} + if (exponent.length() > 0) + { + int64_t val; + auto r = jsoncons::detail::to_integer(exponent.data(), exponent.length(), val); + if (!r) + { + ec = r.error_code(); + return false; + } + scale += val; + } + more = visit_int64(scale, semantic_tag::none, context, ec); + if (!more) {return more;} + + int64_t val{ 0 }; + auto r = jsoncons::detail::to_integer(s.data(),s.length(), val); + if (r) + { + more = visit_int64(val, semantic_tag::none, context, ec); + if (!more) {return more;} + } + else if (r.error_code() == jsoncons::detail::to_integer_errc::overflow) + { + bigint n = bigint::from_string(s.data(), s.length()); + write_bignum(n); + end_value(); + } + else + { + ec = r.error_code(); + return false; + } + more = visit_end_array(context, ec); + + return more; + } + + bool write_hexfloat_value(const string_view_type& sv, const ser_context& context, std::error_code& ec) + { + bool more = true; + + hexfloat_parse_state state = hexfloat_parse_state::start; + std::basic_string<char> s; + std::basic_string<char> exponent; + int64_t scale = 0; + + for (auto c : sv) + { + switch (state) + { + case hexfloat_parse_state::start: + { + switch (c) + { + case '-': + s.push_back(c); + state = hexfloat_parse_state::expect_0; + break; + case '0': + state = hexfloat_parse_state::expect_x; + break; + default: + { + ec = cbor_errc::invalid_bigfloat; + return false; + } + } + break; + } + case hexfloat_parse_state::expect_0: + { + switch (c) + { + case '0': + state = hexfloat_parse_state::expect_x; + break; + default: + { + ec = cbor_errc::invalid_bigfloat; + return false; + } + } + break; + } + case hexfloat_parse_state::expect_x: + { + switch (c) + { + case 'x': + case 'X': + state = hexfloat_parse_state::integer; + break; + default: + { + ec = cbor_errc::invalid_bigfloat; + return false; + } + } + break; + } + case hexfloat_parse_state::integer: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + s.push_back(c); + break; + case 'p': case 'P': + state = hexfloat_parse_state::exp1; + break; + case '.': + state = hexfloat_parse_state::fraction1; + break; + default: + { + ec = cbor_errc::invalid_bigfloat; + return false; + } + } + break; + } + case hexfloat_parse_state::exp1: + { + switch (c) + { + case '+': + state = hexfloat_parse_state::exp2; + break; + case '-': + exponent.push_back(c); + state = hexfloat_parse_state::exp2; + break; + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + exponent.push_back(c); + state = hexfloat_parse_state::exp2; + break; + default: + { + ec = cbor_errc::invalid_bigfloat; + return false; + } + } + break; + } + case hexfloat_parse_state::exp2: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + exponent.push_back(c); + break; + default: + { + ec = cbor_errc::invalid_bigfloat; + return false; + } + } + break; + } + case hexfloat_parse_state::fraction1: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + s.push_back(c); + scale -= 4; + break; + default: + { + ec = cbor_errc::invalid_bigfloat; + return false; + } + } + break; + } + } + } + + write_tag(5); + more = visit_begin_array((std::size_t)2, semantic_tag::none, context, ec); + if (!more) return more; + + if (exponent.length() > 0) + { + int64_t val{ 0 }; + auto r = jsoncons::detail::base16_to_integer(exponent.data(), exponent.length(), val); + if (!r) + { + ec = r.error_code(); + return false; + } + scale += val; + } + more = visit_int64(scale, semantic_tag::none, context, ec); + if (!more) return more; + + int64_t val{ 0 }; + auto r = jsoncons::detail::base16_to_integer(s.data(),s.length(), val); + if (r) + { + more = visit_int64(val, semantic_tag::none, context, ec); + if (!more) return more; + } + else if (r.error_code() == jsoncons::detail::to_integer_errc::overflow) + { + bigint n = bigint::from_string_radix(s.data(), s.length(), 16); + write_bignum(n); + end_value(); + } + else + { + JSONCONS_THROW(json_runtime_error<std::invalid_argument>(r.error_code().message())); + } + return visit_end_array(context, ec); + } + + bool visit_string(const string_view_type& sv, semantic_tag tag, const ser_context& context, std::error_code& ec) override + { + switch (tag) + { + case semantic_tag::bigint: + { + bigint n = bigint::from_string(sv.data(), sv.length()); + write_bignum(n); + end_value(); + break; + } + case semantic_tag::bigdec: + { + return write_decimal_value(sv, context, ec); + } + case semantic_tag::bigfloat: + { + return write_hexfloat_value(sv, context, ec); + } + case semantic_tag::datetime: + { + write_tag(0); + + write_string(sv); + end_value(); + break; + } + case semantic_tag::uri: + { + write_tag(32); + write_string(sv); + end_value(); + break; + } + case semantic_tag::base64url: + { + write_tag(33); + write_string(sv); + end_value(); + break; + } + case semantic_tag::base64: + { + write_tag(34); + write_string(sv); + end_value(); + break; + } + default: + { + write_string(sv); + end_value(); + break; + } + } + return true; + } + + bool visit_byte_string(const byte_string_view& b, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + byte_string_chars_format encoding_hint; + switch (tag) + { + case semantic_tag::base16: + encoding_hint = byte_string_chars_format::base16; + break; + case semantic_tag::base64: + encoding_hint = byte_string_chars_format::base64; + break; + case semantic_tag::base64url: + encoding_hint = byte_string_chars_format::base64url; + break; + default: + encoding_hint = byte_string_chars_format::none; + break; + } + switch (encoding_hint) + { + case byte_string_chars_format::base64url: + write_tag(21); + break; + case byte_string_chars_format::base64: + write_tag(22); + break; + case byte_string_chars_format::base16: + write_tag(23); + break; + default: + break; + } + if (options_.pack_strings() && b.size() >= jsoncons::cbor::detail::min_length_for_stringref(next_stringref_)) + { + byte_string_type bs(b.data(), b.size(), alloc_); + auto it = bytestringref_map_.find(bs); + if (it == bytestringref_map_.end()) + { + bytestringref_map_.emplace(std::make_pair(bs, next_stringref_++)); + write_byte_string_value(bs); + } + else + { + write_tag(25); + write_uint64_value(it->second); + } + } + else + { + write_byte_string_value(b); + } + + end_value(); + return true; + } + + bool visit_byte_string(const byte_string_view& b, + uint64_t ext_tag, + const ser_context&, + std::error_code&) override + { + if (options_.pack_strings() && b.size() >= jsoncons::cbor::detail::min_length_for_stringref(next_stringref_)) + { + byte_string_type bs(b.data(), b.size(), alloc_); + auto it = bytestringref_map_.find(bs); + if (it == bytestringref_map_.end()) + { + bytestringref_map_.emplace(std::make_pair(bs, next_stringref_++)); + write_tag(ext_tag); + write_byte_string_value(bs); + } + else + { + write_tag(25); + write_uint64_value(it->second); + } + } + else + { + write_tag(ext_tag); + write_byte_string_value(b); + } + + end_value(); + return true; + } + + void write_byte_string_value(const byte_string_view& b) + { + if (b.size() <= 0x17) + { + // fixstr stores a byte array whose length is upto 31 bytes + binary::native_to_big(static_cast<uint8_t>(0x40 + b.size()), + std::back_inserter(sink_)); + } + else if (b.size() <= 0xff) + { + binary::native_to_big(static_cast<uint8_t>(0x58), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint8_t>(b.size()), + std::back_inserter(sink_)); + } + else if (b.size() <= 0xffff) + { + binary::native_to_big(static_cast<uint8_t>(0x59), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint16_t>(b.size()), + std::back_inserter(sink_)); + } + else if (b.size() <= 0xffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0x5a), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint32_t>(b.size()), + std::back_inserter(sink_)); + } + else // if (b.size() <= 0xffffffffffffffff) + { + binary::native_to_big(static_cast<uint8_t>(0x5b), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint64_t>(b.size()), + std::back_inserter(sink_)); + } + + for (auto c : b) + { + sink_.push_back(c); + } + } + + bool visit_double(double val, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + switch (tag) + { + case semantic_tag::epoch_second: + write_tag(1); + break; + case semantic_tag::epoch_milli: + write_tag(1); + if (val != 0) + { + val /= millis_in_second; + } + break; + case semantic_tag::epoch_nano: + write_tag(1); + if (val != 0) + { + val /= nanos_in_second; + } + break; + default: + break; + } + + float valf = (float)val; + if ((double)valf == val) + { + binary::native_to_big(static_cast<uint8_t>(0xfa), + std::back_inserter(sink_)); + binary::native_to_big(valf, std::back_inserter(sink_)); + } + else + { + binary::native_to_big(static_cast<uint8_t>(0xfb), + std::back_inserter(sink_)); + binary::native_to_big(val, std::back_inserter(sink_)); + } + + // write double + + end_value(); + return true; + } + + bool visit_int64(int64_t value, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + switch (tag) + { + case semantic_tag::epoch_milli: + case semantic_tag::epoch_nano: + return visit_double(static_cast<double>(value), tag, context, ec); + case semantic_tag::epoch_second: + write_tag(1); + break; + default: + break; + } + if (value >= 0) + { + if (value <= 0x17) + { + binary::native_to_big(static_cast<uint8_t>(value), + std::back_inserter(sink_)); + } + else if (value <= (std::numeric_limits<uint8_t>::max)()) + { + binary::native_to_big(static_cast<uint8_t>(0x18), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint8_t>(value), + std::back_inserter(sink_)); + } + else if (value <= (std::numeric_limits<uint16_t>::max)()) + { + binary::native_to_big(static_cast<uint8_t>(0x19), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint16_t>(value), + std::back_inserter(sink_)); + } + else if (value <= (std::numeric_limits<uint32_t>::max)()) + { + binary::native_to_big(static_cast<uint8_t>(0x1a), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint32_t>(value), + std::back_inserter(sink_)); + } + else if (value <= (std::numeric_limits<int64_t>::max)()) + { + binary::native_to_big(static_cast<uint8_t>(0x1b), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<int64_t>(value), + std::back_inserter(sink_)); + } + } else + { + const auto posnum = -1 - value; + if (value >= -24) + { + binary::native_to_big(static_cast<uint8_t>(0x20 + posnum), + std::back_inserter(sink_)); + } + else if (posnum <= (std::numeric_limits<uint8_t>::max)()) + { + binary::native_to_big(static_cast<uint8_t>(0x38), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint8_t>(posnum), + std::back_inserter(sink_)); + } + else if (posnum <= (std::numeric_limits<uint16_t>::max)()) + { + binary::native_to_big(static_cast<uint8_t>(0x39), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint16_t>(posnum), + std::back_inserter(sink_)); + } + else if (posnum <= (std::numeric_limits<uint32_t>::max)()) + { + binary::native_to_big(static_cast<uint8_t>(0x3a), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint32_t>(posnum), + std::back_inserter(sink_)); + } + else if (posnum <= (std::numeric_limits<int64_t>::max)()) + { + binary::native_to_big(static_cast<uint8_t>(0x3b), + std::back_inserter(sink_)); + binary::native_to_big(static_cast<int64_t>(posnum), + std::back_inserter(sink_)); + } + } + end_value(); + return true; + } + + bool visit_uint64(uint64_t value, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + switch (tag) + { + case semantic_tag::epoch_milli: + case semantic_tag::epoch_nano: + return visit_double(static_cast<double>(value), tag, context, ec); + case semantic_tag::epoch_second: + write_tag(1); + break; + default: + break; + } + + write_uint64_value(value); + end_value(); + return true; + } + + void write_tag(uint64_t value) + { + if (value <= 0x17) + { + sink_.push_back(0xc0 | static_cast<uint8_t>(value)); + } + else if (value <=(std::numeric_limits<uint8_t>::max)()) + { + sink_.push_back(0xd8); + sink_.push_back(static_cast<uint8_t>(value)); + } + else if (value <=(std::numeric_limits<uint16_t>::max)()) + { + sink_.push_back(0xd9); + binary::native_to_big(static_cast<uint16_t>(value), + std::back_inserter(sink_)); + } + else if (value <=(std::numeric_limits<uint32_t>::max)()) + { + sink_.push_back(0xda); + binary::native_to_big(static_cast<uint32_t>(value), + std::back_inserter(sink_)); + } + else + { + sink_.push_back(0xdb); + binary::native_to_big(static_cast<uint64_t>(value), + std::back_inserter(sink_)); + } + } + + void write_uint64_value(uint64_t value) + { + if (value <= 0x17) + { + sink_.push_back(static_cast<uint8_t>(value)); + } + else if (value <=(std::numeric_limits<uint8_t>::max)()) + { + sink_.push_back(static_cast<uint8_t>(0x18)); + sink_.push_back(static_cast<uint8_t>(value)); + } + else if (value <=(std::numeric_limits<uint16_t>::max)()) + { + sink_.push_back(static_cast<uint8_t>(0x19)); + binary::native_to_big(static_cast<uint16_t>(value), + std::back_inserter(sink_)); + } + else if (value <=(std::numeric_limits<uint32_t>::max)()) + { + sink_.push_back(static_cast<uint8_t>(0x1a)); + binary::native_to_big(static_cast<uint32_t>(value), + std::back_inserter(sink_)); + } + else if (value <=(std::numeric_limits<uint64_t>::max)()) + { + sink_.push_back(static_cast<uint8_t>(0x1b)); + binary::native_to_big(static_cast<uint64_t>(value), + std::back_inserter(sink_)); + } + } + + bool visit_bool(bool value, semantic_tag, const ser_context&, std::error_code&) override + { + if (value) + { + sink_.push_back(0xf5); + } + else + { + sink_.push_back(0xf4); + } + + end_value(); + return true; + } + + bool visit_typed_array(const jsoncons::span<const uint8_t>& v, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + switch (tag) + { + case semantic_tag::clamped: + write_tag(0x44); + break; + default: + write_tag(0x40); + break; + } + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(v.size(), semantic_tag::none, context, ec); + for (auto p = v.begin(); more && p != v.end(); ++p) + { + more = this->uint64_value(*p, tag, context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const uint16_t>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + uint16_t(), + tag); + std::vector<uint8_t> v(data.size()*sizeof(uint16_t)); + std::memcpy(v.data(),data.data(),data.size()*sizeof(uint16_t)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none, context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->uint64_value(*p, tag, context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const uint32_t>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + uint32_t(), + tag); + std::vector<uint8_t> v(data.size()*sizeof(uint32_t)); + std::memcpy(v.data(), data.data(), data.size()*sizeof(uint32_t)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none, context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->uint64_value(*p, semantic_tag::none, context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const uint64_t>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + uint64_t(), + tag); + std::vector<uint8_t> v(data.size()*sizeof(uint64_t)); + std::memcpy(v.data(), data.data(), data.size()*sizeof(uint64_t)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none, context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->uint64_value(*p,semantic_tag::none,context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const int8_t>& data, + semantic_tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_tag(0x48); + std::vector<uint8_t> v(data.size()*sizeof(int8_t)); + std::memcpy(v.data(), data.data(), data.size()*sizeof(int8_t)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none,context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->int64_value(*p,semantic_tag::none,context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const int16_t>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + int16_t(), + tag); + std::vector<uint8_t> v(data.size()*sizeof(int16_t)); + std::memcpy(v.data(), data.data(), data.size()*sizeof(int16_t)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none,context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->int64_value(*p,semantic_tag::none,context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const int32_t>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + int32_t(), + tag); + std::vector<uint8_t> v(data.size()*sizeof(int32_t)); + std::memcpy(v.data(), data.data(), data.size()*sizeof(int32_t)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none,context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->int64_value(*p,semantic_tag::none,context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const int64_t>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + int64_t(), + tag); + std::vector<uint8_t> v(data.size()*sizeof(int64_t)); + std::memcpy(v.data(), data.data(), data.size()*sizeof(int64_t)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none,context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->int64_value(*p,semantic_tag::none,context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(half_arg_t, const jsoncons::span<const uint16_t>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + half_arg, + tag); + std::vector<uint8_t> v(data.size()*sizeof(uint16_t)); + std::memcpy(v.data(),data.data(),data.size()*sizeof(uint16_t)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none, context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->half_value(*p, tag, context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const float>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + float(), + tag); + std::vector<uint8_t> v(data.size()*sizeof(float)); + std::memcpy(v.data(), data.data(), data.size()*sizeof(float)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none,context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->double_value(*p,semantic_tag::none,context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } + + bool visit_typed_array(const jsoncons::span<const double>& data, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + if (options_.use_typed_arrays()) + { + write_typed_array_tag(std::integral_constant<bool, jsoncons::endian::native == jsoncons::endian::big>(), + double(), + tag); + std::vector<uint8_t> v(data.size()*sizeof(double)); + std::memcpy(v.data(), data.data(), data.size()*sizeof(double)); + write_byte_string_value(byte_string_view(v)); + return true; + } + else + { + bool more = this->begin_array(data.size(), semantic_tag::none,context, ec); + for (auto p = data.begin(); more && p != data.end(); ++p) + { + more = this->double_value(*p,semantic_tag::none,context, ec); + } + if (more) + { + more = this->end_array(context, ec); + } + return more; + } + } +/* + bool visit_typed_array(const jsoncons::span<const float128_type>&, + semantic_tag, + const ser_context&, + std::error_code&) override + { + return true; + } +*/ + bool visit_begin_multi_dim(const jsoncons::span<const size_t>& shape, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + switch (tag) + { + case semantic_tag::multi_dim_column_major: + write_tag(1040); + break; + default: + write_tag(40); + break; + } + bool more = visit_begin_array(2, semantic_tag::none, context, ec); + if (more) + more = visit_begin_array(shape.size(), semantic_tag::none, context, ec); + for (auto it = shape.begin(); more && it != shape.end(); ++it) + { + more = visit_uint64(*it, semantic_tag::none, context, ec); + } + if (more) + { + more = visit_end_array(context, ec); + } + return more; + } + + bool visit_end_multi_dim(const ser_context& context, + std::error_code& ec) override + { + bool more = visit_end_array(context, ec); + return more; + } + + void write_typed_array_tag(std::true_type, + uint16_t, + semantic_tag) + { + write_tag(0x41); // big endian + } + void write_typed_array_tag(std::false_type, + uint16_t, + semantic_tag) + { + write_tag(0x45); + } + + void write_typed_array_tag(std::true_type, + uint32_t, + semantic_tag) + { + write_tag(0x42); // big endian + } + void write_typed_array_tag(std::false_type, + uint32_t, + semantic_tag) + { + write_tag(0x46); // little endian + } + + void write_typed_array_tag(std::true_type, + uint64_t, + semantic_tag) + { + write_tag(0x43); // big endian + } + void write_typed_array_tag(std::false_type, + uint64_t, + semantic_tag) + { + write_tag(0x47); // little endian + } + + void write_typed_array_tag(std::true_type, + int16_t, + semantic_tag) + { + write_tag(0x49); // big endian + } + void write_typed_array_tag(std::false_type, + int16_t, + semantic_tag) + { + write_tag(0x4d); // little endian + } + + void write_typed_array_tag(std::true_type, + int32_t, + semantic_tag) + { + write_tag(0x4a); // big endian + } + void write_typed_array_tag(std::false_type, + int32_t, + semantic_tag) + { + write_tag(0x4e); // little endian + } + + void write_typed_array_tag(std::true_type, + int64_t, + semantic_tag) + { + write_tag(0x4b); // big endian + } + void write_typed_array_tag(std::false_type, + int64_t, + semantic_tag) + { + write_tag(0x4f); // little endian + } + + void write_typed_array_tag(std::true_type, + half_arg_t, + semantic_tag) + { + write_tag(0x50); + } + void write_typed_array_tag(std::false_type, + half_arg_t, + semantic_tag) + { + write_tag(0x54); + } + + void write_typed_array_tag(std::true_type, + float, + semantic_tag) + { + write_tag(0x51); // big endian + } + void write_typed_array_tag(std::false_type, + float, + semantic_tag) + { + write_tag(0x55); // little endian + } + + void write_typed_array_tag(std::true_type, + double, + semantic_tag) + { + write_tag(0x52); // big endian + } + void write_typed_array_tag(std::false_type, + double, + semantic_tag) + { + write_tag(0x56); // little endian + } + + void end_value() + { + if (!stack_.empty()) + { + ++stack_.back().count_; + } + } +}; + +using cbor_stream_encoder = basic_cbor_encoder<jsoncons::binary_stream_sink>; +using cbor_bytes_encoder = basic_cbor_encoder<jsoncons::bytes_sink<std::vector<uint8_t>>>; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use cbor_bytes_encoder") typedef cbor_bytes_encoder cbor_bytes_serializer; + +template<class Sink=jsoncons::binary_stream_sink> +using basic_cbor_serializer = basic_cbor_encoder<Sink>; + +JSONCONS_DEPRECATED_MSG("Instead, use cbor_stream_encoder") typedef cbor_stream_encoder cbor_encoder; +JSONCONS_DEPRECATED_MSG("Instead, use cbor_stream_encoder") typedef cbor_stream_encoder cbor_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use cbor_bytes_encoder") typedef cbor_bytes_encoder cbor_buffer_serializer; +#endif + +}} +#endif diff --git a/include/jsoncons_ext/cbor/cbor_error.hpp b/include/jsoncons_ext/cbor/cbor_error.hpp new file mode 100644 index 0000000..a7a6626 --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor_error.hpp @@ -0,0 +1,105 @@ +/// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_CBOR_ERROR_HPP +#define JSONCONS_CBOR_CBOR_ERROR_HPP + +#include <system_error> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_exception.hpp> // jsoncons::ser_error + +namespace jsoncons { namespace cbor { + +enum class cbor_errc +{ + success = 0, + unexpected_eof, + source_error, + invalid_decimal_fraction, + invalid_bigfloat, + invalid_utf8_text_string, + too_many_items, + too_few_items, + number_too_large, + stringref_too_large, + max_nesting_depth_exceeded, + unknown_type, + illegal_chunked_string +}; + +class cbor_error_category_impl + : public std::error_category +{ +public: + const char* name() const noexcept override + { + return "jsoncons/cbor"; + } + std::string message(int ev) const override + { + switch (static_cast<cbor_errc>(ev)) + { + case cbor_errc::unexpected_eof: + return "Unexpected end of file"; + case cbor_errc::source_error: + return "Source error"; + case cbor_errc::invalid_decimal_fraction: + return "Invalid decimal fraction"; + case cbor_errc::invalid_bigfloat: + return "Invalid bigfloat"; + case cbor_errc::invalid_utf8_text_string: + return "Illegal UTF-8 encoding in text string"; + case cbor_errc::too_many_items: + return "Too many items were added to a CBOR map or array of known length"; + case cbor_errc::too_few_items: + return "Too few items were added to a CBOR map or array of known length"; + case cbor_errc::number_too_large: + return "Number exceeds implementation limits"; + case cbor_errc::stringref_too_large: + return "stringref exceeds stringref map size"; + case cbor_errc::max_nesting_depth_exceeded: + return "Data item nesting exceeds limit in options"; + case cbor_errc::unknown_type: + return "An unknown type was found in the stream"; + case cbor_errc::illegal_chunked_string: + return "An illegal type was found while parsing an indefinite length string"; + default: + return "Unknown CBOR parser error"; + } + } +}; + +inline +const std::error_category& cbor_error_category() +{ + static cbor_error_category_impl instance; + return instance; +} + +inline +std::error_code make_error_code(cbor_errc e) +{ + return std::error_code(static_cast<int>(e),cbor_error_category()); +} + + +#if !defined(JSONCONS_NO_DEPRECATED) + +JSONCONS_DEPRECATED_MSG("Instead, use ser_error") typedef ser_error cbor_error; +JSONCONS_DEPRECATED_MSG("Instead, use ser_error") typedef ser_error cbor_decode_error; +JSONCONS_DEPRECATED_MSG("Instead, use ser_error") typedef ser_error cbor_reader_errc; +#endif + +}} + +namespace std { + template<> + struct is_error_code_enum<jsoncons::cbor::cbor_errc> : public true_type + { + }; +} + +#endif diff --git a/include/jsoncons_ext/cbor/cbor_options.hpp b/include/jsoncons_ext/cbor/cbor_options.hpp new file mode 100644 index 0000000..1de4a4e --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor_options.hpp @@ -0,0 +1,113 @@ +// Copyright 2019 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_CBOR_OPTIONS_HPP +#define JSONCONS_CBOR_CBOR_OPTIONS_HPP + +#include <string> +#include <limits> // std::numeric_limits +#include <cwchar> +#include <jsoncons/json_exception.hpp> +#include <jsoncons_ext/cbor/cbor_detail.hpp> + +namespace jsoncons { namespace cbor { + +class cbor_options; + +class cbor_options_common +{ + friend class cbor_options; + + int max_nesting_depth_; +protected: + virtual ~cbor_options_common() = default; + + cbor_options_common() + : max_nesting_depth_(1024) + { + } + + cbor_options_common(const cbor_options_common&) = default; + cbor_options_common& operator=(const cbor_options_common&) = default; + cbor_options_common(cbor_options_common&&) = default; + cbor_options_common& operator=(cbor_options_common&&) = default; +public: + int max_nesting_depth() const + { + return max_nesting_depth_; + } +}; + +class cbor_decode_options : public virtual cbor_options_common +{ + friend class cbor_options; +public: + cbor_decode_options() + { + } +}; + +class cbor_encode_options : public virtual cbor_options_common +{ + friend class cbor_options; + + bool use_stringref_; + bool use_typed_arrays_; +public: + cbor_encode_options() + : use_stringref_(false), + use_typed_arrays_(false) + { + } + + bool pack_strings() const + { + return use_stringref_; + } + + bool use_typed_arrays() const + { + return use_typed_arrays_; + } +}; + +class cbor_options final : public cbor_decode_options, public cbor_encode_options +{ +public: + using cbor_options_common::max_nesting_depth; + using cbor_encode_options::pack_strings; + using cbor_encode_options::use_typed_arrays; + + cbor_options& max_nesting_depth(int value) + { + this->max_nesting_depth_ = value; + return *this; + } + + cbor_options& pack_strings(bool value) + { + this->use_stringref_ = value; + return *this; + } + + cbor_options& use_typed_arrays(bool value) + { + this->use_typed_arrays_ = value; + return *this; + } + +#if !defined(JSONCONS_NO_DEPRECATED) + JSONCONS_DEPRECATED_MSG("Instead, use use_typed_arrays(bool)") + cbor_options& enable_typed_arrays(bool value) + { + this->use_typed_arrays_ = value; + return *this; + } +#endif +}; + +}} +#endif diff --git a/include/jsoncons_ext/cbor/cbor_parser.hpp b/include/jsoncons_ext/cbor/cbor_parser.hpp new file mode 100644 index 0000000..f3d03bb --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor_parser.hpp @@ -0,0 +1,1942 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_CBOR_PARSER_HPP +#define JSONCONS_CBOR_CBOR_PARSER_HPP + +#include <string> +#include <vector> +#include <memory> +#include <utility> // std::move +#include <bitset> // std::bitset +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/cbor/cbor_error.hpp> +#include <jsoncons_ext/cbor/cbor_detail.hpp> +#include <jsoncons_ext/cbor/cbor_options.hpp> +#include <jsoncons/json_visitor2.hpp> + +namespace jsoncons { namespace cbor { + +enum class parse_mode {root,accept,array,indefinite_array,map_key,map_value,indefinite_map_key,indefinite_map_value,multi_dim}; + +struct mapped_string +{ + jsoncons::cbor::detail::cbor_major_type type; + std::string s; + std::vector<uint8_t> bytes; + + mapped_string(const std::string& s) + : type(jsoncons::cbor::detail::cbor_major_type::text_string), s(s) + { + } + + mapped_string(std::string&& s) + : type(jsoncons::cbor::detail::cbor_major_type::text_string), s(std::move(s)) + { + } + + mapped_string(const std::vector<uint8_t>& bytes) + : type(jsoncons::cbor::detail::cbor_major_type::byte_string), bytes(bytes) + { + } + + mapped_string(std::vector<uint8_t>&& bytes) + : type(jsoncons::cbor::detail::cbor_major_type::byte_string), bytes(std::move(bytes)) + { + } + + mapped_string(const mapped_string&) = default; + + mapped_string(mapped_string&&) = default; + + mapped_string& operator=(const mapped_string&) = default; + + mapped_string& operator=(mapped_string&&) = default; +}; + +struct parse_state +{ + parse_mode mode; + std::size_t length; + std::size_t index; + bool pop_stringref_map_stack; + + parse_state(parse_mode mode, std::size_t length, bool pop_stringref_map_stack = false) noexcept + : mode(mode), length(length), index(0), pop_stringref_map_stack(pop_stringref_map_stack) + { + } + + parse_state(const parse_state&) = default; + parse_state(parse_state&&) = default; +}; + +template <class Source,class Allocator=std::allocator<char>> +class basic_cbor_parser : public ser_context +{ + using char_type = char; + using char_traits_type = std::char_traits<char>; + using allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<char_type>; + using byte_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<uint8_t>; + using tag_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<uint64_t>; + using parse_state_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<parse_state>; + using stringref_map = std::vector<mapped_string>; + using stringref_map_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<stringref_map>; + + using string_type = std::basic_string<char_type,char_traits_type,char_allocator_type>; + + enum {stringref_tag, // 25 + stringref_namespace_tag, // 256 + item_tag, + num_of_tags}; + + std::bitset<num_of_tags> other_tags_; + + allocator_type alloc_; + Source source_; + cbor_decode_options options_; + + bool more_; + bool done_; + string_type text_buffer_; + std::vector<uint8_t,byte_allocator_type> bytes_buffer_; + uint64_t item_tag_; + std::vector<parse_state,parse_state_allocator_type> state_stack_; + std::vector<uint8_t,byte_allocator_type> typed_array_; + std::vector<std::size_t> shape_; + std::size_t index_; // TODO: Never used! + std::vector<stringref_map,stringref_map_allocator_type> stringref_map_stack_; + int nesting_depth_; + + struct read_byte_string_from_buffer + { + byte_string_view bytes; + + read_byte_string_from_buffer(const byte_string_view& b) + : bytes(b) + { + } + template <class Container> + void operator()(Container& c, std::error_code&) + { + c.clear(); + c.reserve(bytes.size()); + for (auto b : bytes) + { + c.push_back(b); + } + } + }; + + struct read_byte_string_from_source + { + basic_cbor_parser<Source,Allocator>* source; + + read_byte_string_from_source(basic_cbor_parser<Source,Allocator>* source) + : source(source) + { + } + template <class Container> + void operator()(Container& c, std::error_code& ec) + { + source->read_byte_string(c,ec); + } + }; + +public: + template <class Sourceable> + basic_cbor_parser(Sourceable&& source, + const cbor_decode_options& options = cbor_decode_options(), + const Allocator alloc = Allocator()) + : alloc_(alloc), + source_(std::forward<Sourceable>(source)), + options_(options), + more_(true), + done_(false), + text_buffer_(alloc), + bytes_buffer_(alloc), + item_tag_(0), + state_stack_(alloc), + typed_array_(alloc), + index_(0), + stringref_map_stack_(alloc), + nesting_depth_(0) + { + state_stack_.emplace_back(parse_mode::root,0); + } + + void restart() + { + more_ = true; + } + + void reset() + { + more_ = true; + done_ = false; + text_buffer_.clear(); + bytes_buffer_.clear(); + item_tag_ = 0; + state_stack_.clear(); + state_stack_.emplace_back(parse_mode::root,0); + typed_array_.clear(); + stringref_map_stack_.clear(); + nesting_depth_ = 0; + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + source_ = std::forward<Sourceable>(source); + reset(); + } + + bool done() const + { + return done_; + } + + bool stopped() const + { + return !more_; + } + + std::size_t line() const override + { + return 0; + } + + std::size_t column() const override + { + return source_.position(); + } + + void parse(json_visitor2& visitor, std::error_code& ec) + { + while (!done_ && more_) + { + switch (state_stack_.back().mode) + { + case parse_mode::multi_dim: + { + if (state_stack_.back().index == 0) + { + ++state_stack_.back().index; + read_item(visitor, ec); + } + else + { + produce_end_multi_dim(visitor, ec); + } + break; + } + case parse_mode::array: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_item(visitor, ec); + } + else + { + end_array(visitor, ec); + } + break; + } + case parse_mode::indefinite_array: + { + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == 0xff) + { + source_.ignore(1); + end_array(visitor, ec); + } + else + { + read_item(visitor, ec); + } + break; + } + case parse_mode::map_key: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + state_stack_.back().mode = parse_mode::map_value; + read_item(visitor, ec); + } + else + { + end_object(visitor, ec); + } + break; + } + case parse_mode::map_value: + { + state_stack_.back().mode = parse_mode::map_key; + read_item(visitor, ec); + break; + } + case parse_mode::indefinite_map_key: + { + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == 0xff) + { + source_.ignore(1); + end_object(visitor, ec); + } + else + { + state_stack_.back().mode = parse_mode::indefinite_map_value; + read_item(visitor, ec); + } + break; + } + case parse_mode::indefinite_map_value: + { + state_stack_.back().mode = parse_mode::indefinite_map_key; + read_item(visitor, ec); + break; + } + case parse_mode::root: + { + state_stack_.back().mode = parse_mode::accept; + read_item(visitor, ec); + break; + } + case parse_mode::accept: + { + JSONCONS_ASSERT(state_stack_.size() == 1); + state_stack_.clear(); + more_ = false; + done_ = true; + visitor.flush(); + break; + } + } + } + } +private: + void read_item(json_visitor2& visitor, std::error_code& ec) + { + read_tags(ec); + if (!more_) + { + return; + } + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + jsoncons::cbor::detail::cbor_major_type major_type = get_major_type(c.value); + uint8_t info = get_additional_information_value(c.value); + + switch (major_type) + { + case jsoncons::cbor::detail::cbor_major_type::unsigned_integer: + { + uint64_t val = get_uint64_value(ec); + if (ec) + { + return; + } + if (!stringref_map_stack_.empty() && other_tags_[stringref_tag]) + { + other_tags_[stringref_tag] = false; + if (val >= stringref_map_stack_.back().size()) + { + ec = cbor_errc::stringref_too_large; + more_ = false; + return; + } + stringref_map::size_type index = (stringref_map::size_type)val; + if (index != val) + { + ec = cbor_errc::number_too_large; + more_ = false; + return; + } + auto& str = stringref_map_stack_.back().at(index); + switch (str.type) + { + case jsoncons::cbor::detail::cbor_major_type::text_string: + { + handle_string(visitor, jsoncons::basic_string_view<char>(str.s.data(),str.s.length()),ec); + if (ec) + { + return; + } + break; + } + case jsoncons::cbor::detail::cbor_major_type::byte_string: + { + read_byte_string_from_buffer read(byte_string_view(str.bytes)); + write_byte_string(read, visitor, ec); + if (ec) + { + return; + } + break; + } + default: + JSONCONS_UNREACHABLE(); + break; + } + } + else + { + semantic_tag tag = semantic_tag::none; + if (other_tags_[item_tag]) + { + if (item_tag_ == 1) + { + tag = semantic_tag::epoch_second; + } + other_tags_[item_tag] = false; + } + more_ = visitor.uint64_value(val, tag, *this, ec); + } + break; + } + case jsoncons::cbor::detail::cbor_major_type::negative_integer: + { + int64_t val = get_int64_value(ec); + if (ec) + { + return; + } + semantic_tag tag = semantic_tag::none; + if (other_tags_[item_tag]) + { + if (item_tag_ == 1) + { + tag = semantic_tag::epoch_second; + } + other_tags_[item_tag] = false; + } + more_ = visitor.int64_value(val, tag, *this, ec); + break; + } + case jsoncons::cbor::detail::cbor_major_type::byte_string: + { + read_byte_string_from_source read(this); + write_byte_string(read, visitor, ec); + if (ec) + { + return; + } + break; + } + case jsoncons::cbor::detail::cbor_major_type::text_string: + { + text_buffer_.clear(); + read_text_string(text_buffer_, ec); + if (ec) + { + return; + } + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = cbor_errc::invalid_utf8_text_string; + more_ = false; + return; + } + handle_string(visitor, jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()),ec); + if (ec) + { + return; + } + break; + } + case jsoncons::cbor::detail::cbor_major_type::semantic_tag: + { + JSONCONS_UNREACHABLE(); + break; + } + case jsoncons::cbor::detail::cbor_major_type::simple: + { + switch (info) + { + case 0x14: + more_ = visitor.bool_value(false, semantic_tag::none, *this, ec); + source_.ignore(1); + break; + case 0x15: + more_ = visitor.bool_value(true, semantic_tag::none, *this, ec); + source_.ignore(1); + break; + case 0x16: + more_ = visitor.null_value(semantic_tag::none, *this, ec); + source_.ignore(1); + break; + case 0x17: + more_ = visitor.null_value(semantic_tag::undefined, *this, ec); + source_.ignore(1); + break; + case 0x19: // Half-Precision Float (two-byte IEEE 754) + { + uint64_t val = get_uint64_value(ec); + if (ec) + { + return; + } + more_ = visitor.half_value(static_cast<uint16_t>(val), semantic_tag::none, *this, ec); + break; + } + case 0x1a: // Single-Precision Float (four-byte IEEE 754) + case 0x1b: // Double-Precision Float (eight-byte IEEE 754) + { + double val = get_double(ec); + if (ec) + { + return; + } + semantic_tag tag = semantic_tag::none; + if (other_tags_[item_tag]) + { + if (item_tag_ == 1) + { + tag = semantic_tag::epoch_second; + } + other_tags_[item_tag] = false; + } + more_ = visitor.double_value(val, tag, *this, ec); + break; + } + default: + { + ec = cbor_errc::unknown_type; + more_ = false; + return; + } + } + break; + } + case jsoncons::cbor::detail::cbor_major_type::array: + { + if (other_tags_[item_tag]) + { + switch (item_tag_) + { + case 0x04: + text_buffer_.clear(); + read_decimal_fraction(text_buffer_, ec); + if (ec) + { + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::bigdec, *this, ec); + break; + case 0x05: + text_buffer_.clear(); + read_bigfloat(text_buffer_, ec); + if (ec) + { + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::bigfloat, *this, ec); + break; + case 40: // row major storage + produce_begin_multi_dim(visitor, semantic_tag::multi_dim_row_major, ec); + break; + case 1040: // column major storage + produce_begin_multi_dim(visitor, semantic_tag::multi_dim_column_major, ec); + break; + default: + begin_array(visitor, info, ec); + break; + } + other_tags_[item_tag] = false; + } + else + { + begin_array(visitor, info, ec); + } + break; + } + case jsoncons::cbor::detail::cbor_major_type::map: + { + begin_object(visitor, info, ec); + break; + } + default: + break; + } + other_tags_[item_tag] = false; + } + + void begin_array(json_visitor2& visitor, uint8_t info, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = cbor_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + semantic_tag tag = semantic_tag::none; + bool pop_stringref_map_stack = false; + if (other_tags_[stringref_namespace_tag]) + { + stringref_map_stack_.emplace_back(alloc_); + other_tags_[stringref_namespace_tag] = false; + pop_stringref_map_stack = true; + } + switch (info) + { + case jsoncons::cbor::detail::additional_info::indefinite_length: + { + state_stack_.emplace_back(parse_mode::indefinite_array,0,pop_stringref_map_stack); + more_ = visitor.begin_array(tag, *this, ec); + source_.ignore(1); + break; + } + default: // definite length + { + std::size_t len = get_size(ec); + if (!more_) + { + return; + } + state_stack_.emplace_back(parse_mode::array,len,pop_stringref_map_stack); + more_ = visitor.begin_array(len, tag, *this, ec); + break; + } + } + } + + void end_array(json_visitor2& visitor, std::error_code& ec) + { + --nesting_depth_; + + more_ = visitor.end_array(*this, ec); + if (state_stack_.back().pop_stringref_map_stack) + { + stringref_map_stack_.pop_back(); + } + state_stack_.pop_back(); + } + + void begin_object(json_visitor2& visitor, uint8_t info, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = cbor_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + bool pop_stringref_map_stack = false; + if (other_tags_[stringref_namespace_tag]) + { + stringref_map_stack_.emplace_back(alloc_); + other_tags_[stringref_namespace_tag] = false; + pop_stringref_map_stack = true; + } + switch (info) + { + case jsoncons::cbor::detail::additional_info::indefinite_length: + { + state_stack_.emplace_back(parse_mode::indefinite_map_key,0,pop_stringref_map_stack); + more_ = visitor.begin_object(semantic_tag::none, *this, ec); + source_.ignore(1); + break; + } + default: // definite_length + { + std::size_t len = get_size(ec); + if (!more_) + { + return; + } + state_stack_.emplace_back(parse_mode::map_key,len,pop_stringref_map_stack); + more_ = visitor.begin_object(len, semantic_tag::none, *this, ec); + break; + } + } + } + + void end_object(json_visitor2& visitor, std::error_code& ec) + { + --nesting_depth_; + more_ = visitor.end_object(*this, ec); + if (state_stack_.back().pop_stringref_map_stack) + { + stringref_map_stack_.pop_back(); + } + state_stack_.pop_back(); + } + + void read_text_string(string_type& s, std::error_code& ec) + { + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + jsoncons::cbor::detail::cbor_major_type major_type = get_major_type(c.value); + uint8_t info = get_additional_information_value(c.value); + + JSONCONS_ASSERT(major_type == jsoncons::cbor::detail::cbor_major_type::text_string); + auto func = [&s](Source& source, std::size_t length, std::error_code& ec) -> bool + { + if (source_reader<Source>::read(source, s, length) != length) + { + ec = cbor_errc::unexpected_eof; + return false; + } + return true; + }; + iterate_string_chunks(func, major_type, ec); + if (!stringref_map_stack_.empty() && + info != jsoncons::cbor::detail::additional_info::indefinite_length && + s.length() >= jsoncons::cbor::detail::min_length_for_stringref(stringref_map_stack_.back().size())) + { + stringref_map_stack_.back().emplace_back(s); + } + } + + std::size_t get_size(std::error_code& ec) + { + uint64_t u = get_uint64_value(ec); + if (!more_) + { + return 0; + } + std::size_t len = static_cast<std::size_t>(u); + if (len != u) + { + ec = cbor_errc::number_too_large; + more_ = false; + } + return len; + } + + bool read_byte_string(std::vector<uint8_t,byte_allocator_type>& v, std::error_code& ec) + { + bool more = true; + v.clear(); + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more = false; + return more; + } + jsoncons::cbor::detail::cbor_major_type major_type = get_major_type(c.value); + uint8_t info = get_additional_information_value(c.value); + + JSONCONS_ASSERT(major_type == jsoncons::cbor::detail::cbor_major_type::byte_string); + + switch(info) + { + case jsoncons::cbor::detail::additional_info::indefinite_length: + { + auto func = [&v,&more](Source& source, std::size_t length, std::error_code& ec) -> bool + { + if (source_reader<Source>::read(source, v, length) != length) + { + ec = cbor_errc::unexpected_eof; + more = false; + return more; + } + return true; + }; + iterate_string_chunks(func, major_type, ec); + break; + } + default: + { + std::size_t length = get_size(ec); + if (ec) + { + more = false; + return more; + } + if (source_reader<Source>::read(source_, v, length) != length) + { + ec = cbor_errc::unexpected_eof; + more = false; + return more; + } + if (!stringref_map_stack_.empty() && + v.size() >= jsoncons::cbor::detail::min_length_for_stringref(stringref_map_stack_.back().size())) + { + stringref_map_stack_.back().emplace_back(v); + } + break; + } + } + return more; + } + + template <class Function> + void iterate_string_chunks(Function& func, jsoncons::cbor::detail::cbor_major_type type, std::error_code& ec) + { + int nesting_level = 0; + + bool done = false; + while (!done) + { + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + if (nesting_level > 0 && c.value == 0xff) + { + --nesting_level; + if (nesting_level == 0) + { + done = true; + } + source_.ignore(1); + continue; + } + + jsoncons::cbor::detail::cbor_major_type major_type = get_major_type(c.value); + if (major_type != type) + { + ec = cbor_errc::illegal_chunked_string; + more_ = false; + return; + } + uint8_t info = get_additional_information_value(c.value); + + switch (info) + { + case jsoncons::cbor::detail::additional_info::indefinite_length: + { + ++nesting_level; + source_.ignore(1); + break; + } + default: // definite length + { + std::size_t length = get_size(ec); + if (!more_) + { + return; + } + more_ = func(source_, length, ec); + if (!more_) + { + return; + } + if (nesting_level == 0) + { + done = true; + } + break; + } + } + } + } + + uint64_t get_uint64_value(std::error_code& ec) + { + uint64_t val = 0; + + uint8_t initial_b; + if (source_.read(&initial_b, 1) == 0) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return 0; + } + uint8_t info = get_additional_information_value(initial_b); + switch (info) + { + case JSONCONS_CBOR_0x00_0x17: // Integer 0x00..0x17 (0..23) + { + val = info; + break; + } + + case 0x18: // Unsigned integer (one-byte uint8_t follows) + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return val; + } + val = b; + break; + } + + case 0x19: // Unsigned integer (two-byte uint16_t follows) + { + uint8_t buf[sizeof(uint16_t)]; + source_.read(buf, sizeof(uint16_t)); + val = binary::big_to_native<uint16_t>(buf, sizeof(buf)); + break; + } + + case 0x1a: // Unsigned integer (four-byte uint32_t follows) + { + uint8_t buf[sizeof(uint32_t)]; + source_.read(buf, sizeof(uint32_t)); + val = binary::big_to_native<uint32_t>(buf, sizeof(buf)); + break; + } + + case 0x1b: // Unsigned integer (eight-byte uint64_t follows) + { + uint8_t buf[sizeof(uint64_t)]; + source_.read(buf, sizeof(uint64_t)); + val = binary::big_to_native<uint64_t>(buf, sizeof(buf)); + break; + } + default: + break; + } + return val; + } + + int64_t get_int64_value(std::error_code& ec) + { + int64_t val = 0; + + auto ch = source_.peek(); + if (ch.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return val; + } + + jsoncons::cbor::detail::cbor_major_type major_type = get_major_type(ch.value); + uint8_t info = get_additional_information_value(ch.value); + switch (major_type) + { + case jsoncons::cbor::detail::cbor_major_type::negative_integer: + source_.ignore(1); + switch (info) + { + case JSONCONS_CBOR_0x00_0x17: // 0x00..0x17 (0..23) + { + val = static_cast<int8_t>(- 1 - info); + break; + } + case 0x18: // Negative integer (one-byte uint8_t follows) + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return val; + } + val = static_cast<int64_t>(-1) - static_cast<int64_t>(b); + break; + } + + case 0x19: // Negative integer -1-n (two-byte uint16_t follows) + { + uint8_t buf[sizeof(uint16_t)]; + if (source_.read(buf, sizeof(uint16_t)) != sizeof(uint16_t)) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return val; + } + auto x = binary::big_to_native<uint16_t>(buf, sizeof(buf)); + val = static_cast<int64_t>(-1)- x; + break; + } + + case 0x1a: // Negative integer -1-n (four-byte uint32_t follows) + { + uint8_t buf[sizeof(uint32_t)]; + if (source_.read(buf, sizeof(uint32_t)) != sizeof(uint32_t)) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return val; + } + auto x = binary::big_to_native<uint32_t>(buf, sizeof(buf)); + val = static_cast<int64_t>(-1)- x; + break; + } + + case 0x1b: // Negative integer -1-n (eight-byte uint64_t follows) + { + uint8_t buf[sizeof(uint64_t)]; + if (source_.read(buf, sizeof(uint64_t)) != sizeof(uint64_t)) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return val; + } + auto x = binary::big_to_native<uint64_t>(buf, sizeof(buf)); + val = static_cast<int64_t>(-1)- static_cast<int64_t>(x); + break; + } + } + break; + + case jsoncons::cbor::detail::cbor_major_type::unsigned_integer: + { + uint64_t x = get_uint64_value(ec); + if (ec) + { + return 0; + } + if (x <= static_cast<uint64_t>((std::numeric_limits<int64_t>::max)())) + { + val = x; + } + else + { + // error; + } + + break; + } + break; + default: + break; + } + + return val; + } + + double get_double(std::error_code& ec) + { + double val = 0; + + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return 0; + } + uint8_t info = get_additional_information_value(b); + switch (info) + { + case 0x1a: // Single-Precision Float (four-byte IEEE 754) + { + uint8_t buf[sizeof(float)]; + if (source_.read(buf, sizeof(float)) !=sizeof(float)) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return 0; + } + val = binary::big_to_native<float>(buf, sizeof(buf)); + break; + } + + case 0x1b: // Double-Precision Float (eight-byte IEEE 754) + { + uint8_t buf[sizeof(double)]; + if (source_.read(buf, sizeof(double)) != sizeof(double)) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return 0; + } + val = binary::big_to_native<double>(buf, sizeof(buf)); + break; + } + default: + break; + } + + return val; + } + + void read_decimal_fraction(string_type& result, std::error_code& ec) + { + std::size_t size = get_size(ec); + if (!more_) + { + return; + } + if (size != 2) + { + ec = cbor_errc::invalid_decimal_fraction; + more_ = false; + return; + } + + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + int64_t exponent = 0; + switch (get_major_type(c.value)) + { + case jsoncons::cbor::detail::cbor_major_type::unsigned_integer: + { + exponent = get_uint64_value(ec); + if (ec) + { + return; + } + break; + } + case jsoncons::cbor::detail::cbor_major_type::negative_integer: + { + exponent = get_int64_value(ec); + if (ec) + { + return; + } + break; + } + default: + { + ec = cbor_errc::invalid_decimal_fraction; + more_ = false; + return; + } + } + + string_type s; + + c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + + switch (get_major_type(c.value)) + { + case jsoncons::cbor::detail::cbor_major_type::unsigned_integer: + { + uint64_t val = get_uint64_value(ec); + if (ec) + { + return; + } + jsoncons::detail::from_integer(val, s); + break; + } + case jsoncons::cbor::detail::cbor_major_type::negative_integer: + { + int64_t val = get_int64_value(ec); + if (ec) + { + return; + } + jsoncons::detail::from_integer(val, s); + break; + } + case jsoncons::cbor::detail::cbor_major_type::semantic_tag: + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + uint8_t tag = get_additional_information_value(b); + c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + + if (get_major_type(c.value) == jsoncons::cbor::detail::cbor_major_type::byte_string) + { + bytes_buffer_.clear(); + read_byte_string(bytes_buffer_, ec); + if (ec) + { + more_ = false; + return; + } + if (tag == 2) + { + bigint n = bigint::from_bytes_be(1, bytes_buffer_.data(), bytes_buffer_.size()); + n.write_string(s); + } + else if (tag == 3) + { + bigint n = bigint::from_bytes_be(1, bytes_buffer_.data(), bytes_buffer_.size()); + n = -1 - n; + n.write_string(s); + } + } + break; + } + default: + { + ec = cbor_errc::invalid_decimal_fraction; + more_ = false; + return; + } + } + + if (s.size() >= static_cast<std::size_t>((std::numeric_limits<int32_t>::max)()) || + exponent >= (std::numeric_limits<int32_t>::max)() || + exponent <= (std::numeric_limits<int32_t>::min)()) + { + ec = cbor_errc::invalid_decimal_fraction; + more_ = false; + return; + } + else if (s.size() > 0) + { + if (s[0] == '-') + { + result.push_back('-'); + jsoncons::detail::prettify_string(s.c_str()+1, s.size()-1, (int)exponent, -4, 17, result); + } + else + { + jsoncons::detail::prettify_string(s.c_str(), s.size(), (int)exponent, -4, 17, result); + } + } + else + { + ec = cbor_errc::invalid_decimal_fraction; + more_ = false; + return; + } + } + + void read_bigfloat(string_type& s, std::error_code& ec) + { + std::size_t size = get_size(ec); + if (!more_) + { + return; + } + if (size != 2) + { + ec = cbor_errc::invalid_bigfloat; + more_ = false; + return; + } + + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + int64_t exponent = 0; + switch (get_major_type(c.value)) + { + case jsoncons::cbor::detail::cbor_major_type::unsigned_integer: + { + exponent = get_uint64_value(ec); + if (ec) + { + return; + } + break; + } + case jsoncons::cbor::detail::cbor_major_type::negative_integer: + { + exponent = get_int64_value(ec); + if (ec) + { + return; + } + break; + } + default: + { + ec = cbor_errc::invalid_bigfloat; + more_ = false; + return; + } + } + + c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + switch (get_major_type(c.value)) + { + case jsoncons::cbor::detail::cbor_major_type::unsigned_integer: + { + uint64_t val = get_uint64_value(ec); + if (ec) + { + return; + } + s.push_back('0'); + s.push_back('x'); + jsoncons::detail::integer_to_string_hex(val, s); + break; + } + case jsoncons::cbor::detail::cbor_major_type::negative_integer: + { + int64_t val = get_int64_value(ec); + if (ec) + { + return; + } + s.push_back('-'); + s.push_back('0'); + s.push_back('x'); + jsoncons::detail::integer_to_string_hex(static_cast<uint64_t>(-val), s); + break; + } + case jsoncons::cbor::detail::cbor_major_type::semantic_tag: + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + uint8_t tag = get_additional_information_value(b); + + c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + + if (get_major_type(c.value) == jsoncons::cbor::detail::cbor_major_type::byte_string) + { + bytes_buffer_.clear(); + more_ = read_byte_string(bytes_buffer_, ec); + if (!more_) + { + return; + } + if (tag == 2) + { + s.push_back('0'); + s.push_back('x'); + bigint n = bigint::from_bytes_be(1, bytes_buffer_.data(), bytes_buffer_.size()); + n.write_string_hex(s); + } + else if (tag == 3) + { + s.push_back('-'); + s.push_back('0'); + bigint n = bigint::from_bytes_be(1, bytes_buffer_.data(), bytes_buffer_.size()); + n = -1 - n; + n.write_string_hex(s); + s[2] = 'x'; // overwrite minus + } + } + break; + } + default: + { + ec = cbor_errc::invalid_bigfloat; + more_ = false; + return; + } + } + + s.push_back('p'); + if (exponent >=0) + { + jsoncons::detail::integer_to_string_hex(static_cast<uint64_t>(exponent), s); + } + else + { + s.push_back('-'); + jsoncons::detail::integer_to_string_hex(static_cast<uint64_t>(-exponent), s); + } + } + + static jsoncons::cbor::detail::cbor_major_type get_major_type(uint8_t type) + { + static constexpr uint8_t major_type_shift = 0x05; + uint8_t value = type >> major_type_shift; + return static_cast<jsoncons::cbor::detail::cbor_major_type>(value); + } + + static uint8_t get_additional_information_value(uint8_t type) + { + static constexpr uint8_t additional_information_mask = (1U << 5) - 1; + uint8_t value = type & additional_information_mask; + return value; + } + + void read_tags(std::error_code& ec) + { + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + jsoncons::cbor::detail::cbor_major_type major_type = get_major_type(c.value); + + while (major_type == jsoncons::cbor::detail::cbor_major_type::semantic_tag) + { + uint64_t val = get_uint64_value(ec); + if (!more_) + { + return; + } + switch(val) + { + case 25: // stringref + other_tags_[stringref_tag] = true; + break; + case 256: // stringref-namespace + other_tags_[stringref_namespace_tag] = true; + break; + default: + other_tags_[item_tag] = true; + item_tag_ = val; + break; + } + c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + major_type = get_major_type(c.value); + } + } + + void handle_string(json_visitor2& visitor, const jsoncons::basic_string_view<char>& v, std::error_code& ec) + { + semantic_tag tag = semantic_tag::none; + if (other_tags_[item_tag]) + { + switch (item_tag_) + { + case 0: + tag = semantic_tag::datetime; + break; + case 32: + tag = semantic_tag::uri; + break; + case 33: + tag = semantic_tag::base64url; + break; + case 34: + tag = semantic_tag::base64; + break; + default: + break; + } + other_tags_[item_tag] = false; + } + more_ = visitor.string_value(v, tag, *this, ec); + } + + static jsoncons::endian get_typed_array_endianness(const uint8_t tag) + { + return ((tag & detail::cbor_array_tags_e_mask) >> detail::cbor_array_tags_e_shift) == 0 ? jsoncons::endian::big : jsoncons::endian::little; + } + + static std::size_t get_typed_array_bytes_per_element(const uint8_t tag) + { + const uint8_t f = (tag & detail::cbor_array_tags_f_mask) >> detail::cbor_array_tags_f_shift; + const uint8_t ll = (tag & detail::cbor_array_tags_ll_mask) >> detail::cbor_array_tags_ll_shift; + + return std::size_t(1) << (f + ll); + } + + template <typename Read> + void write_byte_string(Read read, json_visitor2& visitor, std::error_code& ec) + { + if (other_tags_[item_tag]) + { + switch (item_tag_) + { + case 0x2: + { + bytes_buffer_.clear(); + read(bytes_buffer_,ec); + if (ec) + { + more_ = false; + return; + } + bigint n = bigint::from_bytes_be(1, bytes_buffer_.data(), bytes_buffer_.size()); + text_buffer_.clear(); + n.write_string(text_buffer_); + more_ = visitor.string_value(text_buffer_, semantic_tag::bigint, *this, ec); + break; + } + case 0x3: + { + bytes_buffer_.clear(); + read(bytes_buffer_,ec); + if (ec) + { + more_ = false; + return; + } + bigint n = bigint::from_bytes_be(1, bytes_buffer_.data(), bytes_buffer_.size()); + n = -1 - n; + text_buffer_.clear(); + n.write_string(text_buffer_); + more_ = visitor.string_value(text_buffer_, semantic_tag::bigint, *this, ec); + break; + } + case 0x15: + { + read(bytes_buffer_,ec); + if (ec) + { + more_ = false; + return; + } + more_ = visitor.byte_string_value(bytes_buffer_, semantic_tag::base64url, *this, ec); + break; + } + case 0x16: + { + read(bytes_buffer_,ec); + if (ec) + { + more_ = false; + return; + } + more_ = visitor.byte_string_value(bytes_buffer_, semantic_tag::base64, *this, ec); + break; + } + case 0x17: + { + read(bytes_buffer_,ec); + if (ec) + { + more_ = false; + return; + } + more_ = visitor.byte_string_value(bytes_buffer_, semantic_tag::base16, *this, ec); + break; + } + case 0x40: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + uint8_t* data = reinterpret_cast<uint8_t*>(typed_array_.data()); + std::size_t size = typed_array_.size(); + more_ = visitor.typed_array(jsoncons::span<const uint8_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x44: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + uint8_t* data = reinterpret_cast<uint8_t*>(typed_array_.data()); + std::size_t size = typed_array_.size(); + more_ = visitor.typed_array(jsoncons::span<const uint8_t>(data,size), semantic_tag::clamped, *this, ec); + break; + } + case 0x41: + case 0x45: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + uint16_t* data = reinterpret_cast<uint16_t*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<uint16_t>(data[i]); + } + } + more_ = visitor.typed_array(jsoncons::span<const uint16_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x42: + case 0x46: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + uint32_t* data = reinterpret_cast<uint32_t*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<uint32_t>(data[i]); + } + } + more_ = visitor.typed_array(jsoncons::span<const uint32_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x43: + case 0x47: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + uint64_t* data = reinterpret_cast<uint64_t*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<uint64_t>(data[i]); + } + } + more_ = visitor.typed_array(jsoncons::span<const uint64_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x48: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + int8_t* data = reinterpret_cast<int8_t*>(typed_array_.data()); + std::size_t size = typed_array_.size(); + more_ = visitor.typed_array(jsoncons::span<const int8_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x49: + case 0x4d: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + int16_t* data = reinterpret_cast<int16_t*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<int16_t>(data[i]); + } + } + more_ = visitor.typed_array(jsoncons::span<const int16_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x4a: + case 0x4e: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + int32_t* data = reinterpret_cast<int32_t*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<int32_t>(data[i]); + } + } + more_ = visitor.typed_array(jsoncons::span<const int32_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x4b: + case 0x4f: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + int64_t* data = reinterpret_cast<int64_t*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<int64_t>(data[i]); + } + } + more_ = visitor.typed_array(jsoncons::span<const int64_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x50: + case 0x54: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + uint16_t* data = reinterpret_cast<uint16_t*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<uint16_t>(data[i]); + } + } + more_ = visitor.typed_array(half_arg, jsoncons::span<const uint16_t>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x51: + case 0x55: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + float* data = reinterpret_cast<float*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<float>(data[i]); + } + } + more_ = visitor.typed_array(jsoncons::span<const float>(data,size), semantic_tag::none, *this, ec); + break; + } + case 0x52: + case 0x56: + { + typed_array_.clear(); + read(typed_array_,ec); + if (ec) + { + more_ = false; + return; + } + const uint8_t tag = (uint8_t)item_tag_; + jsoncons::endian e = get_typed_array_endianness(tag); + const size_t bytes_per_elem = get_typed_array_bytes_per_element(tag); + + double* data = reinterpret_cast<double*>(typed_array_.data()); + std::size_t size = typed_array_.size()/bytes_per_elem; + + if (e != jsoncons::endian::native) + { + for (std::size_t i = 0; i < size; ++i) + { + data[i] = binary::byte_swap<double>(data[i]); + } + } + more_ = visitor.typed_array(jsoncons::span<const double>(data,size), semantic_tag::none, *this, ec); + break; + } + default: + { + read(bytes_buffer_,ec); + if (ec) + { + more_ = false; + return; + } + more_ = visitor.byte_string_value(bytes_buffer_, item_tag_, *this, ec); + break; + } + } + other_tags_[item_tag] = false; + } + else + { + read(bytes_buffer_,ec); + if (ec) + { + return; + } + more_ = visitor.byte_string_value(bytes_buffer_, semantic_tag::none, *this, ec); + } + } + + void produce_begin_multi_dim(json_visitor2& visitor, + semantic_tag tag, + std::error_code& ec) + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + jsoncons::cbor::detail::cbor_major_type major_type = get_major_type(b); + JSONCONS_ASSERT(major_type == jsoncons::cbor::detail::cbor_major_type::array); + uint8_t info = get_additional_information_value(b); + + read_shape(info, ec); + if (ec) + { + return; + } + + state_stack_.emplace_back(parse_mode::multi_dim, 0); + more_ = visitor.begin_multi_dim(shape_, tag, *this, ec); + } + + void produce_end_multi_dim(json_visitor2& visitor, std::error_code& ec) + { + more_ = visitor.end_multi_dim(*this, ec); + state_stack_.pop_back(); + } + + void read_shape(uint8_t info, std::error_code& ec) + { + shape_.clear(); + switch (info) + { + case jsoncons::cbor::detail::additional_info::indefinite_length: + { + while (true) + { + auto c = source_.peek(); + if (c.eof) + { + ec = cbor_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == 0xff) + { + source_.ignore(1); + } + else + { + std::size_t dim = get_size(ec); + if (!more_) + { + return; + } + shape_.push_back(dim); + } + } + break; + } + default: + { + std::size_t size = get_size(ec); + if (!more_) + { + return; + } + for (std::size_t i = 0; more_ && i < size; ++i) + { + std::size_t dim = get_size(ec); + if (!more_) + { + return; + } + shape_.push_back(dim); + } + break; + } + } + } +}; + +}} + +#endif diff --git a/include/jsoncons_ext/cbor/cbor_reader.hpp b/include/jsoncons_ext/cbor/cbor_reader.hpp new file mode 100644 index 0000000..a46a52e --- /dev/null +++ b/include/jsoncons_ext/cbor/cbor_reader.hpp @@ -0,0 +1,116 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_CBOR_READER_HPP +#define JSONCONS_CBOR_CBOR_READER_HPP + +#include <string> +#include <vector> +#include <memory> +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/cbor/cbor_encoder.hpp> +#include <jsoncons_ext/cbor/cbor_error.hpp> +#include <jsoncons_ext/cbor/cbor_detail.hpp> +#include <jsoncons_ext/cbor/cbor_parser.hpp> + +namespace jsoncons { namespace cbor { + +template <class Source,class Allocator=std::allocator<char>> +class basic_cbor_reader +{ + using char_type = char; + + basic_cbor_parser<Source,Allocator> parser_; + basic_json_visitor2_to_visitor_adaptor<char_type,Allocator> adaptor_; + json_visitor2& visitor_; +public: + template <class Sourceable> + basic_cbor_reader(Sourceable&& source, + json_visitor& visitor, + const Allocator alloc) + : basic_cbor_reader(std::forward<Sourceable>(source), + visitor, + cbor_decode_options(), + alloc) + { + } + + template <class Sourceable> + basic_cbor_reader(Sourceable&& source, + json_visitor& visitor, + const cbor_decode_options& options = cbor_decode_options(), + const Allocator alloc=Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + adaptor_(visitor, alloc), visitor_(adaptor_) + { + } + template <class Sourceable> + basic_cbor_reader(Sourceable&& source, + json_visitor2& visitor, + const Allocator alloc) + : basic_cbor_reader(std::forward<Sourceable>(source), + visitor, + cbor_decode_options(), + alloc) + { + } + + template <class Sourceable> + basic_cbor_reader(Sourceable&& source, + json_visitor2& visitor, + const cbor_decode_options& options = cbor_decode_options(), + const Allocator alloc=Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + visitor_(visitor) + { + } + + void read() + { + std::error_code ec; + read(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,line(),column())); + } + } + + void read(std::error_code& ec) + { + parser_.reset(); + parser_.parse(visitor_, ec); + if (ec) + { + return; + } + } + + std::size_t line() const + { + return parser_.line(); + } + + std::size_t column() const + { + return parser_.column(); + } +}; + +using cbor_stream_reader = basic_cbor_reader<jsoncons::binary_stream_source>; + +using cbor_bytes_reader = basic_cbor_reader<jsoncons::bytes_source>; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use cbor_stream_reader") typedef cbor_stream_reader cbor_reader; +JSONCONS_DEPRECATED_MSG("Instead, use cbor_bytes_reader") typedef cbor_bytes_reader cbor_buffer_reader; +#endif + +}} + +#endif diff --git a/include/jsoncons_ext/cbor/decode_cbor.hpp b/include/jsoncons_ext/cbor/decode_cbor.hpp new file mode 100644 index 0000000..ab5c913 --- /dev/null +++ b/include/jsoncons_ext/cbor/decode_cbor.hpp @@ -0,0 +1,203 @@ +// Copyright 2017 Daniel Parkerstd +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_DECODE_CBOR_HPP +#define JSONCONS_CBOR_DECODE_CBOR_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/json_filter.hpp> +#include <jsoncons/decode_traits.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/cbor/cbor_reader.hpp> +#include <jsoncons_ext/cbor/cbor_cursor.hpp> + +namespace jsoncons { +namespace cbor { + + template<class T, class Source> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_cbor(const Source& v, + const cbor_decode_options& options = cbor_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_cbor_reader<jsoncons::bytes_source> reader(v, adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class Source> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_cbor(const Source& v, + const cbor_decode_options& options = cbor_decode_options()) + { + basic_cbor_cursor<bytes_source> cursor(v, options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_cbor(std::istream& is, + const cbor_decode_options& options = cbor_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + cbor_stream_reader reader(is, adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_cbor(std::istream& is, + const cbor_decode_options& options = cbor_decode_options()) + { + basic_cbor_cursor<binary_stream_source> cursor(is, options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T, class InputIt> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_cbor(InputIt first, InputIt last, + const cbor_decode_options& options = cbor_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_cbor_reader<binary_iterator_source<InputIt>> reader(binary_iterator_source<InputIt>(first, last), adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class InputIt> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_cbor(InputIt first, InputIt last, + const cbor_decode_options& options = cbor_decode_options()) + { + basic_cbor_cursor<binary_iterator_source<InputIt>> cursor(binary_iterator_source<InputIt>(first, last), options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + // With leading allocator parameter + + template<class T, class Source, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_cbor(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& v, + const cbor_decode_options& options = cbor_decode_options()) + { + json_decoder<T,TempAllocator> decoder(temp_alloc); + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_cbor_reader<jsoncons::bytes_source,TempAllocator> reader(v, adaptor, options, temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class Source, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_cbor(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& v, + const cbor_decode_options& options = cbor_decode_options()) + { + basic_cbor_cursor<bytes_source,TempAllocator> cursor(v, options, temp_alloc); + json_decoder<basic_json<char,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_cbor(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::istream& is, + const cbor_decode_options& options = cbor_decode_options()) + { + json_decoder<T,TempAllocator> decoder(temp_alloc); + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_cbor_reader<jsoncons::binary_stream_source,TempAllocator> reader(is, adaptor, options, temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_cbor(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::istream& is, + const cbor_decode_options& options = cbor_decode_options()) + { + basic_cbor_cursor<binary_stream_source,TempAllocator> cursor(is, options, temp_alloc); + json_decoder<basic_json<char,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + +} // namespace cbor +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/cbor/encode_cbor.hpp b/include/jsoncons_ext/cbor/encode_cbor.hpp new file mode 100644 index 0000000..8576f1c --- /dev/null +++ b/include/jsoncons_ext/cbor/encode_cbor.hpp @@ -0,0 +1,151 @@ +// Copyright 2017 Daniel Parkerstd +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CBOR_ENCODE_CBOR_HPP +#define JSONCONS_CBOR_ENCODE_CBOR_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/json_filter.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/encode_traits.hpp> +#include <jsoncons_ext/cbor/cbor_encoder.hpp> + +namespace jsoncons { +namespace cbor { + + // to bytes + + template<class T, class Container> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_cbor(const T& j, + Container& v, + const cbor_encode_options& options = cbor_encode_options()) + { + using char_type = typename T::char_type; + basic_cbor_encoder<jsoncons::bytes_sink<Container>> encoder(v, options); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T, class Container> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_cbor(const T& val, Container& v, + const cbor_encode_options& options = cbor_encode_options()) + { + basic_cbor_encoder<jsoncons::bytes_sink<Container>> encoder(v, options); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + // stream + + template<class T> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_cbor(const T& j, + std::ostream& os, + const cbor_encode_options& options = cbor_encode_options()) + { + using char_type = typename T::char_type; + cbor_stream_encoder encoder(os, options); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_cbor(const T& val, + std::ostream& os, + const cbor_encode_options& options = cbor_encode_options()) + { + cbor_stream_encoder encoder(os, options); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + // temp_allocator_arg + + // to bytes + + template<class T, class Container, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_cbor(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, + Container& v, + const cbor_encode_options& options = cbor_encode_options()) + { + using char_type = typename T::char_type; + basic_cbor_encoder<bytes_sink<Container>,TempAllocator> encoder(v, options, temp_alloc); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T, class Container, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_cbor(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, + Container& v, + const cbor_encode_options& options = cbor_encode_options()) + { + basic_cbor_encoder<jsoncons::bytes_sink<Container>,TempAllocator> encoder(v, options, temp_alloc); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + // stream + + template<class T,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_cbor(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, + std::ostream& os, + const cbor_encode_options& options = cbor_encode_options()) + { + using char_type = typename T::char_type; + basic_cbor_encoder<binary_stream_sink,TempAllocator> encoder(os, options, temp_alloc); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_cbor(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, + std::ostream& os, + const cbor_encode_options& options = cbor_encode_options()) + { + std::error_code ec; + encode_cbor(temp_allocator_arg, temp_alloc, val, os, options, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + +} // namespace cbor +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/csv/csv.hpp b/include/jsoncons_ext/csv/csv.hpp new file mode 100644 index 0000000..9f8a9c5 --- /dev/null +++ b/include/jsoncons_ext/csv/csv.hpp @@ -0,0 +1,17 @@ +/// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_HPP +#define JSONCONS_CSV_CSV_HPP + +#include <jsoncons_ext/csv/csv_options.hpp> +#include <jsoncons_ext/csv/csv_reader.hpp> +#include <jsoncons_ext/csv/csv_encoder.hpp> +#include <jsoncons_ext/csv/csv_cursor.hpp> +#include <jsoncons_ext/csv/decode_csv.hpp> +#include <jsoncons_ext/csv/encode_csv.hpp> + +#endif diff --git a/include/jsoncons_ext/csv/csv_cursor.hpp b/include/jsoncons_ext/csv/csv_cursor.hpp new file mode 100644 index 0000000..67d55a6 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_cursor.hpp @@ -0,0 +1,358 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_CURSOR_HPP +#define JSONCONS_CSV_CSV_CURSOR_HPP + +#include <memory> // std::allocator +#include <string> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <ios> +#include <istream> // std::basic_istream +#include <jsoncons/byte_string.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons_ext/csv/csv_parser.hpp> +#include <jsoncons/staj_cursor.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/source_adaptor.hpp> + +namespace jsoncons { namespace csv { + +template<class CharT,class Source=jsoncons::stream_source<CharT>,class Allocator=std::allocator<char>> +class basic_csv_cursor : public basic_staj_cursor<CharT>, private virtual ser_context +{ +public: + using source_type = Source; + using char_type = CharT; + using allocator_type = Allocator; +private: + static constexpr size_t default_max_buffer_size = 16384; + + typedef typename std::allocator_traits<allocator_type>:: template rebind_alloc<CharT> char_allocator_type; + + text_source_adaptor<Source> source_; + basic_csv_parser<CharT,Allocator> parser_; + basic_staj_visitor<CharT> cursor_visitor_; + + // Noncopyable and nonmoveable + basic_csv_cursor(const basic_csv_cursor&) = delete; + basic_csv_cursor& operator=(const basic_csv_cursor&) = delete; + +public: + using string_view_type = jsoncons::basic_string_view<CharT>; + + // Constructors that throw parse exceptions + + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>(), + std::function<bool(csv_errc,const ser_context&)> err_handler = default_csv_parsing(), + const Allocator& alloc = Allocator(), + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(std::forward<Sourceable>(source)), + parser_(options,err_handler,alloc), + cursor_visitor_(accept_all) + { + if (!done()) + { + next(); + } + } + + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>(), + std::function<bool(csv_errc,const ser_context&)> err_handler = default_csv_parsing(), + const Allocator& alloc = Allocator(), + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(), + parser_(options,err_handler,alloc), + cursor_visitor_(accept_all) + { + jsoncons::basic_string_view<CharT> sv(std::forward<Sourceable>(source)); + initialize_with_string_view(sv); + } + + + // Constructors that set parse error codes + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + std::error_code& ec) + : basic_csv_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + basic_csv_decode_options<CharT>(), + default_csv_parsing(), + ec) + { + } + + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + const basic_csv_decode_options<CharT>& options, + std::error_code& ec) + : basic_csv_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + default_csv_parsing(), + ec) + { + } + + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + std::error_code& ec) + : basic_csv_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + err_handler, + ec) + { + } + + template <class Sourceable> + basic_csv_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + std::error_code& ec, + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(std::forward<Sourceable>(source)), + parser_(options,err_handler,alloc), + cursor_visitor_(accept_all) + { + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + basic_csv_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + std::error_code& ec, + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(), + parser_(options,err_handler,alloc), + cursor_visitor_(accept_all) + { + jsoncons::basic_string_view<CharT> sv(std::forward<Sourceable>(source)); + initialize_with_string_view(sv, ec); + } + + template <class Sourceable> + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type + reset(Sourceable&& source) + { + source_ = std::forward<Sourceable>(source); + parser_.reinitialize(); + cursor_visitor_.reset(); + if (!done()) + { + next(); + } + } + + template <class Sourceable> + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type + reset(Sourceable&& source) + { + source_ = {}; + parser_.reinitialize(); + cursor_visitor_.reset(); + initialize_with_string_view(std::forward<Sourceable>(source)); + } + + template <class Sourceable> + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type + reset(Sourceable&& source, std::error_code& ec) + { + source_ = std::forward<Sourceable>(source); + parser_.reinitialize(); + cursor_visitor_.reset(); + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type + reset(Sourceable&& source, std::error_code& ec) + { + source_ = {}; + parser_.reinitialize(); + initialize_with_string_view(std::forward<Sourceable>(source), ec); + } + + bool done() const override + { + return parser_.done(); + } + + const basic_staj_event<CharT>& current() const override + { + return cursor_visitor_.event(); + } + + void read_to(basic_json_visitor<CharT>& visitor) override + { + std::error_code ec; + read_to(visitor, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read_to(basic_json_visitor<CharT>& visitor, + std::error_code& ec) override + { + if (staj_to_saj_event(cursor_visitor_.event(), visitor, *this, ec)) + { + read_next(visitor, ec); + } + } + + void next() override + { + std::error_code ec; + next(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void next(std::error_code& ec) override + { + read_next(ec); + } + + static bool accept_all(const basic_staj_event<CharT>&, const ser_context&) + { + return true; + } + + const ser_context& context() const override + { + return *this; + } + + bool eof() const + { + return parser_.source_exhausted() && source_.eof(); + } + + std::size_t line() const override + { + return parser_.line(); + } + + std::size_t column() const override + { + return parser_.column(); + } + + friend + basic_staj_filter_view<CharT> operator|(basic_csv_cursor& cursor, + std::function<bool(const basic_staj_event<CharT>&, const ser_context&)> pred) + { + return basic_staj_filter_view<CharT>(cursor, pred); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<CharT>&)") + void read(basic_json_visitor<CharT>& visitor) + { + read_to(visitor); + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<CharT>&, std::error_code&)") + void read(basic_json_visitor<CharT>& visitor, + std::error_code& ec) + { + read_to(visitor, ec); + } +#endif +private: + + void initialize_with_string_view(string_view_type sv) + { + auto r = unicode_traits::detect_json_encoding(sv.data(), sv.size()); + if (!(r.encoding == unicode_traits::encoding_kind::utf8 || r.encoding == unicode_traits::encoding_kind::undetected)) + { + JSONCONS_THROW(ser_error(json_errc::illegal_unicode_character,parser_.line(),parser_.column())); + } + std::size_t offset = (r.ptr - sv.data()); + parser_.update(sv.data()+offset,sv.size()-offset); + if (!done()) + { + next(); + } + } + + void initialize_with_string_view(string_view_type sv, std::error_code& ec) + { + auto r = unicode_traits::detect_encoding_from_bom(sv.data(), sv.size()); + if (!(r.encoding == unicode_traits::encoding_kind::utf8 || r.encoding == unicode_traits::encoding_kind::undetected)) + { + ec = json_errc::illegal_unicode_character; + return; + } + std::size_t offset = (r.ptr - sv.data()); + parser_.update(sv.data()+offset,sv.size()-offset); + if (!done()) + { + next(ec); + } + } + + void read_next(std::error_code& ec) + { + read_next(cursor_visitor_, ec); + } + + void read_next(basic_json_visitor<CharT>& visitor, std::error_code& ec) + { + parser_.restart(); + while (!parser_.stopped()) + { + if (parser_.source_exhausted()) + { + auto s = source_.read_buffer(ec); + if (ec) return; + if (s.size() > 0) + { + parser_.update(s.data(),s.size()); + } + } + parser_.parse_some(visitor, ec); + if (ec) return; + } + } +}; + +using csv_stream_cursor = basic_csv_cursor<char,jsoncons::stream_source<char>>; +using csv_string_cursor = basic_csv_cursor<char,jsoncons::string_source<char>>; +using wcsv_stream_cursor = basic_csv_cursor<wchar_t,jsoncons::stream_source<wchar_t>>; +using wcsv_string_cursor = basic_csv_cursor<wchar_t,jsoncons::string_source<wchar_t>>; + +using csv_cursor = basic_csv_cursor<char>; +using wcsv_cursor = basic_csv_cursor<wchar_t>; + +}} + +#endif + diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp new file mode 100644 index 0000000..49c1a3d --- /dev/null +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -0,0 +1,954 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_ENCODER_HPP +#define JSONCONS_CSV_CSV_ENCODER_HPP + +#include <array> // std::array +#include <string> +#include <vector> +#include <ostream> +#include <utility> // std::move +#include <unordered_map> // std::unordered_map +#include <memory> // std::allocator +#include <limits> // std::numeric_limits +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/detail/write_number.hpp> +#include <jsoncons_ext/csv/csv_options.hpp> +#include <jsoncons/sink.hpp> + +namespace jsoncons { namespace csv { + +template<class CharT,class Sink=jsoncons::stream_sink<CharT>,class Allocator=std::allocator<char>> +class basic_csv_encoder final : public basic_json_visitor<CharT> +{ +public: + using char_type = CharT; + using typename basic_json_visitor<CharT>::string_view_type; + using sink_type = Sink; + + using allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<CharT>; + using string_type = std::basic_string<CharT, std::char_traits<CharT>, char_allocator_type>; + using string_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<string_type>; + using string_string_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<std::pair<const string_type,string_type>>; + +private: + static jsoncons::basic_string_view<CharT> null_constant() + { + static jsoncons::basic_string_view<CharT> k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"null"); + return k; + } + static jsoncons::basic_string_view<CharT> true_constant() + { + static jsoncons::basic_string_view<CharT> k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"true"); + return k; + } + static jsoncons::basic_string_view<CharT> false_constant() + { + static jsoncons::basic_string_view<CharT> k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"false"); + return k; + } + + enum class stack_item_kind + { + row_mapping, + column_mapping, + object, + row, + column, + object_multi_valued_field, + row_multi_valued_field, + column_multi_valued_field + }; + + struct stack_item + { + stack_item_kind item_kind_; + std::size_t count_; + + stack_item(stack_item_kind item_kind) noexcept + : item_kind_(item_kind), count_(0) + { + } + + bool is_object() const + { + return item_kind_ == stack_item_kind::object; + } + + stack_item_kind item_kind() const + { + return item_kind_; + } + }; + + Sink sink_; + const basic_csv_encode_options<CharT> options_; + allocator_type alloc_; + + std::vector<stack_item> stack_; + jsoncons::detail::write_double fp_; + std::vector<string_type,string_allocator_type> strings_buffer_; + + std::unordered_map<string_type,string_type, std::hash<string_type>,std::equal_to<string_type>,string_string_allocator_type> buffered_line_; + string_type name_; + std::size_t column_index_; + std::vector<std::size_t> row_counts_; + + // Noncopyable and nonmoveable + basic_csv_encoder(const basic_csv_encoder&) = delete; + basic_csv_encoder& operator=(const basic_csv_encoder&) = delete; +public: + basic_csv_encoder(Sink&& sink, + const Allocator& alloc = Allocator()) + : basic_csv_encoder(std::forward<Sink>(sink), basic_csv_encode_options<CharT>(), alloc) + { + } + + basic_csv_encoder(Sink&& sink, + const basic_csv_encode_options<CharT>& options, + const Allocator& alloc = Allocator()) + : sink_(std::forward<Sink>(sink)), + options_(options), + alloc_(alloc), + stack_(), + fp_(options.float_format(), options.precision()), + column_index_(0) + { + jsoncons::csv::detail::parse_column_names(options.column_names(), strings_buffer_); + } + + ~basic_csv_encoder() noexcept + { + JSONCONS_TRY + { + sink_.flush(); + } + JSONCONS_CATCH(...) + { + } + } + + void reset() + { + stack_.clear(); + strings_buffer_.clear(); + buffered_line_.clear(); + name_.clear(); + column_index_ = 0; + row_counts_.clear(); + } + + void reset(Sink&& sink) + { + sink_ = std::move(sink); + reset(); + } + +private: + + template<class AnyWriter> + void escape_string(const CharT* s, + std::size_t length, + CharT quote_char, CharT quote_escape_char, + AnyWriter& sink) + { + const CharT* begin = s; + const CharT* end = s + length; + for (const CharT* it = begin; it != end; ++it) + { + CharT c = *it; + if (c == quote_char) + { + sink.push_back(quote_escape_char); + sink.push_back(quote_char); + } + else + { + sink.push_back(c); + } + } + } + + void visit_flush() override + { + sink_.flush(); + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + stack_.emplace_back(stack_item_kind::column_mapping); + return true; + } + switch (stack_.back().item_kind_) + { + case stack_item_kind::row_mapping: + stack_.emplace_back(stack_item_kind::object); + return true; + default: // error + ec = csv_errc::source_error; + return false; + } + } + + bool visit_end_object(const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + if (stack_[0].count_ == 0) + { + for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(strings_buffer_[i].data(), + strings_buffer_[i].length()); + } + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } + for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + auto it = buffered_line_.find(strings_buffer_[i]); + if (it != buffered_line_.end()) + { + sink_.append(it->second.data(),it->second.length()); + it->second.clear(); + } + } + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + break; + case stack_item_kind::column_mapping: + { + for (const auto& item : strings_buffer_) + { + sink_.append(item.data(), item.size()); + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + } + break; + } + default: + break; + } + stack_.pop_back(); + if (!stack_.empty()) + { + end_value(); + } + return true; + } + + bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + stack_.emplace_back(stack_item_kind::row_mapping); + return true; + } + switch (stack_.back().item_kind_) + { + case stack_item_kind::row_mapping: + stack_.emplace_back(stack_item_kind::row); + if (stack_[0].count_ == 0) + { + for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(strings_buffer_[i].data(),strings_buffer_[i].length()); + } + if (strings_buffer_.size() > 0) + { + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } + } + return true; + case stack_item_kind::object: + stack_.emplace_back(stack_item_kind::object_multi_valued_field); + return true; + case stack_item_kind::column_mapping: + stack_.emplace_back(stack_item_kind::column); + row_counts_.push_back(1); + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + return true; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + begin_value(bo); + stack_.emplace_back(stack_item_kind::column_multi_valued_field); + return true; + } + case stack_item_kind::row: + begin_value(sink_); + stack_.emplace_back(stack_item_kind::row_multi_valued_field); + return true; + default: // error + ec = csv_errc::source_error; + return false; + } + } + + bool visit_end_array(const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::row: + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + break; + case stack_item_kind::column: + ++column_index_; + break; + default: + break; + } + stack_.pop_back(); + + if (!stack_.empty()) + { + end_value(); + } + return true; + } + + bool visit_key(const string_view_type& name, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + { + name_ = string_type(name); + buffered_line_[string_type(name)] = std::basic_string<CharT>(); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + strings_buffer_.emplace_back(name); + } + break; + } + case stack_item_kind::column_mapping: + { + if (strings_buffer_.empty()) + { + strings_buffer_.emplace_back(name); + } + else + { + strings_buffer_[0].push_back(options_.field_delimiter()); + strings_buffer_[0].append(string_type(name)); + } + break; + } + default: + break; + } + return true; + } + + bool visit_null(semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_null_value(bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_null_value(sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_null_value(bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_null_value(bo); + break; + } + default: + break; + } + return true; + } + + bool visit_string(const string_view_type& sv, semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_string_value(sv,bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_string_value(sv,sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_string_value(sv,bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_string_value(sv,bo); + break; + } + default: + break; + } + return true; + } + + bool visit_byte_string(const byte_string_view& b, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + byte_string_chars_format encoding_hint; + switch (tag) + { + case semantic_tag::base16: + encoding_hint = byte_string_chars_format::base16; + break; + case semantic_tag::base64: + encoding_hint = byte_string_chars_format::base64; + break; + case semantic_tag::base64url: + encoding_hint = byte_string_chars_format::base64url; + break; + default: + encoding_hint = byte_string_chars_format::none; + break; + } + byte_string_chars_format format = jsoncons::detail::resolve_byte_string_chars_format(encoding_hint,byte_string_chars_format::none,byte_string_chars_format::base64url); + + std::basic_string<CharT> s; + switch (format) + { + case byte_string_chars_format::base16: + { + encode_base16(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + case byte_string_chars_format::base64: + { + encode_base64(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + case byte_string_chars_format::base64url: + { + encode_base64url(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + default: + { + JSONCONS_UNREACHABLE(); + } + } + + return true; + } + + bool visit_double(double val, + semantic_tag, + const ser_context& context, + std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_double_value(val, context, bo, ec); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_double_value(val, context, sink_, ec); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_double_value(val, context, bo, ec); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_double_value(val, context, bo, ec); + break; + } + default: + break; + } + return true; + } + + bool visit_int64(int64_t val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_int64_value(val,bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_int64_value(val,sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_int64_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_int64_value(val, bo); + break; + } + default: + break; + } + return true; + } + + bool visit_uint64(uint64_t val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_uint64_value(val, bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_uint64_value(val,sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_uint64_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_uint64_value(val, bo); + break; + } + default: + break; + } + return true; + } + + bool visit_bool(bool val, semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_bool_value(val,bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_bool_value(val,sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_bool_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_bool_value(val, bo); + break; + } + default: + break; + } + return true; + } + + template <class AnyWriter> + bool do_string_value(const CharT* s, std::size_t length, AnyWriter& sink) + { + bool quote = false; + if (options_.quote_style() == quote_style_kind::all || options_.quote_style() == quote_style_kind::nonnumeric || + (options_.quote_style() == quote_style_kind::minimal && + (std::char_traits<CharT>::find(s, length, options_.field_delimiter()) != nullptr || std::char_traits<CharT>::find(s, length, options_.quote_char()) != nullptr))) + { + quote = true; + sink.push_back(options_.quote_char()); + } + escape_string(s, length, options_.quote_char(), options_.quote_escape_char(), sink); + if (quote) + { + sink.push_back(options_.quote_char()); + } + + return true; + } + + template <class AnyWriter> + void write_string_value(const string_view_type& value, AnyWriter& sink) + { + begin_value(sink); + do_string_value(value.data(),value.length(),sink); + end_value(); + } + + template <class AnyWriter> + void write_double_value(double val, const ser_context& context, AnyWriter& sink, std::error_code& ec) + { + begin_value(sink); + + if (!std::isfinite(val)) + { + if ((std::isnan)(val)) + { + if (options_.enable_nan_to_num()) + { + sink.append(options_.nan_to_num().data(), options_.nan_to_num().length()); + } + else if (options_.enable_nan_to_str()) + { + visit_string(options_.nan_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + else if (val == std::numeric_limits<double>::infinity()) + { + if (options_.enable_inf_to_num()) + { + sink.append(options_.inf_to_num().data(), options_.inf_to_num().length()); + } + else if (options_.enable_inf_to_str()) + { + visit_string(options_.inf_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + else + { + if (options_.enable_neginf_to_num()) + { + sink.append(options_.neginf_to_num().data(), options_.neginf_to_num().length()); + } + else if (options_.enable_neginf_to_str()) + { + visit_string(options_.neginf_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + } + else + { + fp_(val, sink); + } + + end_value(); + + } + + template <class AnyWriter> + void write_int64_value(int64_t val, AnyWriter& sink) + { + begin_value(sink); + + jsoncons::detail::from_integer(val,sink); + + end_value(); + } + + template <class AnyWriter> + void write_uint64_value(uint64_t val, AnyWriter& sink) + { + begin_value(sink); + + jsoncons::detail::from_integer(val,sink); + + end_value(); + } + + template <class AnyWriter> + void write_bool_value(bool val, AnyWriter& sink) + { + begin_value(sink); + + if (val) + { + sink.append(true_constant().data(), true_constant().size()); + } + else + { + sink.append(false_constant().data(), false_constant().size()); + } + + end_value(); + } + + template <class AnyWriter> + bool write_null_value(AnyWriter& sink) + { + begin_value(sink); + sink.append(null_constant().data(), null_constant().size()); + end_value(); + return true; + } + + template <class AnyWriter> + void begin_value(AnyWriter& sink) + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::row: + if (stack_.back().count_ > 0) + { + sink.push_back(options_.field_delimiter()); + } + break; + case stack_item_kind::column: + { + if (row_counts_.size() >= 3) + { + for (std::size_t i = row_counts_.size()-2; i-- > 0;) + { + if (row_counts_[i] <= row_counts_.back()) + { + sink.push_back(options_.field_delimiter()); + } + else + { + break; + } + } + } + if (column_index_ > 0) + { + sink.push_back(options_.field_delimiter()); + } + break; + } + case stack_item_kind::row_multi_valued_field: + case stack_item_kind::column_multi_valued_field: + if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) + { + sink.push_back(options_.subfield_delimiter()); + } + break; + default: + break; + } + } + + void end_value() + { + JSONCONS_ASSERT(!stack_.empty()); + switch(stack_.back().item_kind_) + { + case stack_item_kind::row: + { + ++stack_.back().count_; + break; + } + case stack_item_kind::column: + { + ++row_counts_.back(); + break; + } + default: + ++stack_.back().count_; + break; + } + } +}; + +using csv_stream_encoder = basic_csv_encoder<char>; +using csv_string_encoder = basic_csv_encoder<char,jsoncons::string_sink<std::string>>; +using csv_wstream_encoder = basic_csv_encoder<wchar_t>; +using wcsv_string_encoder = basic_csv_encoder<wchar_t,jsoncons::string_sink<std::wstring>>; + +#if !defined(JSONCONS_NO_DEPRECATED) +template<class CharT, class Sink = jsoncons::stream_sink<CharT>, class Allocator = std::allocator<CharT>> +using basic_csv_serializer = basic_csv_encoder<CharT,Sink,Allocator>; + +JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use csv_string_encoder") typedef csv_string_encoder csv_string_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use csv_string_encoder") typedef csv_string_encoder csv_string_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_encoder; +JSONCONS_DEPRECATED_MSG("Instead, use wcsv_stream_encoder") typedef csv_stream_encoder wcsv_encoder; +#endif + +}} + +#endif diff --git a/include/jsoncons_ext/csv/csv_error.hpp b/include/jsoncons_ext/csv/csv_error.hpp new file mode 100644 index 0000000..30255dd --- /dev/null +++ b/include/jsoncons_ext/csv/csv_error.hpp @@ -0,0 +1,85 @@ +/// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_ERROR_HPP +#define JSONCONS_CSV_CSV_ERROR_HPP + +#include <system_error> +#include <jsoncons/json_exception.hpp> + +namespace jsoncons { namespace csv { + + enum class csv_errc : int + { + success = 0, + unexpected_eof = 1, + source_error, + expected_quote, + syntax_error, + invalid_parse_state, + invalid_escaped_char, + unexpected_char_between_fields + }; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use csv_errc") typedef csv_errc csv_parser_errc; +#endif + +class csv_error_category_impl + : public std::error_category +{ +public: + const char* name() const noexcept override + { + return "jsoncons/csv"; + } + std::string message(int ev) const override + { + switch (static_cast<csv_errc>(ev)) + { + case csv_errc::unexpected_eof: + return "Unexpected end of file"; + case csv_errc::source_error: + return "Source error"; + case csv_errc::expected_quote: + return "Expected quote character"; + case csv_errc::syntax_error: + return "CSV syntax error"; + case csv_errc::invalid_parse_state: + return "Invalid CSV parser state"; + case csv_errc::invalid_escaped_char: + return "Invalid character following quote escape character"; + case csv_errc::unexpected_char_between_fields: + return "Unexpected character between fields"; + default: + return "Unknown CSV parser error"; + } + } +}; + +inline +const std::error_category& csv_error_category() +{ + static csv_error_category_impl instance; + return instance; +} + +inline +std::error_code make_error_code(csv_errc result) +{ + return std::error_code(static_cast<int>(result),csv_error_category()); +} + +}} + +namespace std { + template<> + struct is_error_code_enum<jsoncons::csv::csv_errc> : public true_type + { + }; +} + +#endif diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp new file mode 100644 index 0000000..8bd2e22 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -0,0 +1,973 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_OPTIONS_HPP +#define JSONCONS_CSV_CSV_OPTIONS_HPP + +#include <string> +#include <vector> +#include <utility> // std::pair +#include <unordered_map> // std::unordered_map +#include <map> +#include <limits> // std::numeric_limits +#include <cwchar> +#include <jsoncons/json_options.hpp> + +namespace jsoncons { namespace csv { + +enum class csv_column_type : uint8_t +{ + string_t,integer_t,float_t,boolean_t,repeat_t +}; + +enum class quote_style_kind : uint8_t +{ + minimal,all,nonnumeric,none +}; + +enum class csv_mapping_kind : uint8_t +{ + n_rows = 1, + n_objects, + m_columns +}; + +#if !defined(JSONCONS_NO_DEPRECATED) +using mapping_kind = csv_mapping_kind; +JSONCONS_DEPRECATED_MSG("Instead, use quote_style_kind") typedef quote_style_kind quote_styles; +JSONCONS_DEPRECATED_MSG("Instead, use quote_style_kind") typedef quote_style_kind quote_style_type; +JSONCONS_DEPRECATED_MSG("Instead, use csv_mapping_kind") typedef csv_mapping_kind mapping_type; +#endif + +enum class column_state {sequence,label}; + +struct csv_type_info +{ + csv_type_info() = default; + csv_type_info(const csv_type_info&) = default; + csv_type_info(csv_type_info&&) = default; + + csv_type_info(csv_column_type ctype, std::size_t lev, std::size_t repcount = 0) noexcept + { + col_type = ctype; + level = lev; + rep_count = repcount; + } + + csv_column_type col_type; + std::size_t level; + std::size_t rep_count; +}; + +namespace detail { + +template <class CharT,class Container> +void parse_column_names(const std::basic_string<CharT>& names, + Container& cont) +{ + column_state state = column_state::sequence; + typename Container::value_type buffer(cont.get_allocator()); + + auto p = names.begin(); + while (p != names.end()) + { + switch (state) + { + case column_state::sequence: + { + switch (*p) + { + case ' ': case '\t':case '\r': case '\n': + ++p; + break; + default: + buffer.clear(); + state = column_state::label; + break; + } + break; + } + case column_state::label: + { + switch (*p) + { + case ',': + cont.push_back(buffer); + buffer.clear(); + ++p; + state = column_state::sequence; + break; + default: + buffer.push_back(*p); + ++p; + break; + } + break; + } + } + } + if (state == column_state::label) + { + cont.push_back(buffer); + buffer.clear(); + } +} + +template <class CharT,class Container> +void parse_column_types(const std::basic_string<CharT>& types, + Container& column_types) +{ + const std::map<jsoncons::basic_string_view<CharT>,csv_column_type> type_dictionary = + { + + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"string"),csv_column_type::string_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"integer"),csv_column_type::integer_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"float"),csv_column_type::float_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"boolean"),csv_column_type::boolean_t} + }; + + column_state state = column_state::sequence; + int depth = 0; + std::basic_string<CharT> buffer; + + auto p = types.begin(); + while (p != types.end()) + { + switch (state) + { + case column_state::sequence: + { + switch (*p) + { + case ' ': case '\t':case '\r': case '\n': + ++p; + break; + case '[': + ++depth; + ++p; + break; + case ']': + JSONCONS_ASSERT(depth > 0); + --depth; + ++p; + break; + case '*': + { + JSONCONS_ASSERT(column_types.size() != 0); + std::size_t offset = 0; + std::size_t level = column_types.size() > 0 ? column_types.back().level: 0; + if (level > 0) + { + for (auto it = column_types.rbegin(); + it != column_types.rend() && level == it->level; + ++it) + { + ++offset; + } + } + else + { + offset = 1; + } + column_types.emplace_back(csv_column_type::repeat_t,depth,offset); + ++p; + break; + } + default: + buffer.clear(); + state = column_state::label; + break; + } + break; + } + case column_state::label: + { + switch (*p) + { + case '*': + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + state = column_state::sequence; + break; + } + case ',': + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + ++p; + state = column_state::sequence; + break; + } + case ']': + { + JSONCONS_ASSERT(depth > 0); + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + --depth; + ++p; + state = column_state::sequence; + break; + } + default: + { + buffer.push_back(*p); + ++p; + break; + } + } + break; + } + } + } + if (state == column_state::label) + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + } +} + +} // detail + +template <class CharT> +class basic_csv_options; + +template <class CharT> +class basic_csv_options_common +{ + friend class basic_csv_options<CharT>; +public: + using char_type = CharT; + using string_type = std::basic_string<CharT>; +private: + char_type field_delimiter_; + char_type quote_char_; + char_type quote_escape_char_; + char_type subfield_delimiter_; + + bool enable_nan_to_num_:1; + bool enable_inf_to_num_:1; + bool enable_neginf_to_num_:1; + bool enable_nan_to_str_:1; + bool enable_inf_to_str_:1; + bool enable_neginf_to_str_:1; + bool enable_str_to_nan_:1; + bool enable_str_to_inf_:1; + bool enable_str_to_neginf_:1; + + string_type nan_to_num_; + string_type inf_to_num_; + string_type neginf_to_num_; + string_type nan_to_str_; + string_type inf_to_str_; + string_type neginf_to_str_; + string_type column_names_; + +protected: + basic_csv_options_common() + : field_delimiter_(','), + quote_char_('\"'), + quote_escape_char_('\"'), + subfield_delimiter_(char_type()), + enable_nan_to_num_(false), + enable_inf_to_num_(false), + enable_neginf_to_num_(false), + enable_nan_to_str_(false), + enable_inf_to_str_(false), + enable_neginf_to_str_(false), + enable_str_to_nan_(false), + enable_str_to_inf_(false), + enable_str_to_neginf_(false) + { + } + + basic_csv_options_common(const basic_csv_options_common&) = default; + basic_csv_options_common& operator=(const basic_csv_options_common&) = default; + + virtual ~basic_csv_options_common() noexcept = default; +public: + + char_type field_delimiter() const + { + return field_delimiter_; + } + + const char_type subfield_delimiter() const + { + return subfield_delimiter_; + } + + char_type quote_char() const + { + return quote_char_; + } + + char_type quote_escape_char() const + { + return quote_escape_char_; + } + + string_type column_names() const + { + return column_names_; + } + + bool enable_nan_to_num() const + { + return enable_nan_to_num_; + } + + bool enable_inf_to_num() const + { + return enable_inf_to_num_; + } + + bool enable_neginf_to_num() const + { + return enable_neginf_to_num_ || enable_inf_to_num_; + } + + bool enable_nan_to_str() const + { + return enable_nan_to_str_; + } + + bool enable_str_to_nan() const + { + return enable_str_to_nan_; + } + + bool enable_inf_to_str() const + { + return enable_inf_to_str_; + } + + bool enable_str_to_inf() const + { + return enable_str_to_inf_; + } + + bool enable_neginf_to_str() const + { + return enable_neginf_to_str_ || enable_inf_to_str_; + } + + bool enable_str_to_neginf() const + { + return enable_str_to_neginf_ || enable_str_to_inf_; + } + + string_type nan_to_num() const + { + return nan_to_num_; + } + + string_type inf_to_num() const + { + return inf_to_num_; + } + + string_type neginf_to_num() const + { + if (enable_neginf_to_num_) + { + return neginf_to_num_; + } + else if (enable_inf_to_num_) + { + string_type s; + s.push_back('-'); + s.append(inf_to_num_); + return s; + } + else + { + return neginf_to_num_; + } + } + + string_type nan_to_str() const + { + return nan_to_str_; + } + + string_type inf_to_str() const + { + return inf_to_str_; + } + + string_type neginf_to_str() const + { + if (enable_neginf_to_str_) + { + return neginf_to_str_; + } + else if (enable_inf_to_str_) + { + string_type s; + s.push_back('-'); + s.append(inf_to_str_); + return s; + } + else + { + return neginf_to_str_; // empty string + } + } +}; + +template <class CharT> +class basic_csv_decode_options : public virtual basic_csv_options_common<CharT> +{ + friend class basic_csv_options<CharT>; + using super_type = basic_csv_options_common<CharT>; +public: + using typename super_type::char_type; + using typename super_type::string_type; + +private: + bool assume_header_:1; + bool ignore_empty_values_:1; + bool ignore_empty_lines_:1; + bool trim_leading_:1; + bool trim_trailing_:1; + bool trim_leading_inside_quotes_:1; + bool trim_trailing_inside_quotes_:1; + bool unquoted_empty_value_is_null_:1; + bool infer_types_:1; + bool lossless_number_:1; + char_type comment_starter_; + csv_mapping_kind mapping_; + std::size_t header_lines_; + std::size_t max_lines_; + string_type column_types_; + string_type column_defaults_; +public: + basic_csv_decode_options() + : assume_header_(false), + ignore_empty_values_(false), + ignore_empty_lines_(true), + trim_leading_(false), + trim_trailing_(false), + trim_leading_inside_quotes_(false), + trim_trailing_inside_quotes_(false), + unquoted_empty_value_is_null_(false), + infer_types_(true), + lossless_number_(false), + comment_starter_('\0'), + mapping_(), + header_lines_(0), + max_lines_((std::numeric_limits<std::size_t>::max)()) + {} + + basic_csv_decode_options(const basic_csv_decode_options& other) = default; + + basic_csv_decode_options(basic_csv_decode_options&& other) + : super_type(std::forward<basic_csv_decode_options>(other)), + assume_header_(other.assume_header_), + ignore_empty_values_(other.ignore_empty_values_), + ignore_empty_lines_(other.ignore_empty_lines_), + trim_leading_(other.trim_leading_), + trim_trailing_(other.trim_trailing_), + trim_leading_inside_quotes_(other.trim_leading_inside_quotes_), + trim_trailing_inside_quotes_(other.trim_trailing_inside_quotes_), + unquoted_empty_value_is_null_(other.unquoted_empty_value_is_null_), + infer_types_(other.infer_types_), + lossless_number_(other.lossless_number_), + comment_starter_(other.comment_starter_), + mapping_(other.mapping_), + header_lines_(other.header_lines_), + max_lines_(other.max_lines_), + column_types_(std::move(other.column_types_)), + column_defaults_(std::move(other.column_defaults_)) + {} + + std::size_t header_lines() const + { + return (assume_header_ && header_lines_ <= 1) ? 1 : header_lines_; + } + + bool assume_header() const + { + return assume_header_; + } + + bool ignore_empty_values() const + { + return ignore_empty_values_; + } + + bool ignore_empty_lines() const + { + return ignore_empty_lines_; + } + + bool trim_leading() const + { + return trim_leading_; + } + + bool trim_trailing() const + { + return trim_trailing_; + } + + bool trim_leading_inside_quotes() const + { + return trim_leading_inside_quotes_; + } + + bool trim_trailing_inside_quotes() const + { + return trim_trailing_inside_quotes_; + } + + bool trim() const + { + return trim_leading_ && trim_trailing_; + } + + bool trim_inside_quotes() const + { + return trim_leading_inside_quotes_ && trim_trailing_inside_quotes_; + } + + bool unquoted_empty_value_is_null() const + { + return unquoted_empty_value_is_null_; + } + + bool infer_types() const + { + return infer_types_; + } + + bool lossless_number() const + { + return lossless_number_; + } + + char_type comment_starter() const + { + return comment_starter_; + } + + csv_mapping_kind mapping_kind() const + { + return mapping_ != csv_mapping_kind() ? mapping_ : (assume_header() || this->column_names().size() > 0 ? csv_mapping_kind::n_objects : csv_mapping_kind::n_rows); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + csv_mapping_kind mapping() const + { + return mapping_kind(); + } +#endif + + std::size_t max_lines() const + { + return max_lines_; + } + + string_type column_types() const + { + return column_types_; + } + + string_type column_defaults() const + { + return column_defaults_; + } +}; + +template <class CharT> +class basic_csv_encode_options : public virtual basic_csv_options_common<CharT> +{ + friend class basic_csv_options<CharT>; + using super_type = basic_csv_options_common<CharT>; +public: + using typename super_type::char_type; + using typename super_type::string_type; +private: + quote_style_kind quote_style_; + float_chars_format float_format_; + int8_t precision_; + string_type line_delimiter_; +public: + basic_csv_encode_options() + : quote_style_(quote_style_kind::minimal), + float_format_(float_chars_format::general), + precision_(0) + { + line_delimiter_.push_back('\n'); + } + + basic_csv_encode_options(const basic_csv_encode_options& other) = default; + + basic_csv_encode_options(basic_csv_encode_options&& other) + : super_type(std::forward<basic_csv_encode_options>(other)), + quote_style_(other.quote_style_), + float_format_(other.float_format_), + precision_(other.precision_), + line_delimiter_(std::move(other.line_delimiter_)) + { + } + + quote_style_kind quote_style() const + { + return quote_style_; + } + + float_chars_format float_format() const + { + return float_format_; + } + + int8_t precision() const + { + return precision_; + } + + string_type line_delimiter() const + { + return line_delimiter_; + } +}; + +template <class CharT> +class basic_csv_options final : public basic_csv_decode_options<CharT>, public basic_csv_encode_options<CharT> +{ + using char_type = CharT; + using string_type = std::basic_string<CharT>; + +public: + using basic_csv_decode_options<CharT>::enable_str_to_nan; + using basic_csv_decode_options<CharT>::enable_str_to_inf; + using basic_csv_decode_options<CharT>::enable_str_to_neginf; + using basic_csv_decode_options<CharT>::nan_to_str; + using basic_csv_decode_options<CharT>::inf_to_str; + using basic_csv_decode_options<CharT>::neginf_to_str; + using basic_csv_decode_options<CharT>::nan_to_num; + using basic_csv_decode_options<CharT>::inf_to_num; + using basic_csv_decode_options<CharT>::neginf_to_num; + using basic_csv_decode_options<CharT>::field_delimiter; + using basic_csv_decode_options<CharT>::subfield_delimiter; + using basic_csv_decode_options<CharT>::quote_char; + using basic_csv_decode_options<CharT>::quote_escape_char; + using basic_csv_decode_options<CharT>::column_names; + using basic_csv_decode_options<CharT>::header_lines; + using basic_csv_decode_options<CharT>::assume_header; + using basic_csv_decode_options<CharT>::ignore_empty_values; + using basic_csv_decode_options<CharT>::ignore_empty_lines; + using basic_csv_decode_options<CharT>::trim_leading; + using basic_csv_decode_options<CharT>::trim_trailing; + using basic_csv_decode_options<CharT>::trim_leading_inside_quotes; + using basic_csv_decode_options<CharT>::trim_trailing_inside_quotes; + using basic_csv_decode_options<CharT>::trim; + using basic_csv_decode_options<CharT>::trim_inside_quotes; + using basic_csv_decode_options<CharT>::unquoted_empty_value_is_null; + using basic_csv_decode_options<CharT>::infer_types; + using basic_csv_decode_options<CharT>::lossless_number; + using basic_csv_decode_options<CharT>::comment_starter; + using basic_csv_decode_options<CharT>::mapping; + using basic_csv_decode_options<CharT>::max_lines; + using basic_csv_decode_options<CharT>::column_types; + using basic_csv_decode_options<CharT>::column_defaults; + using basic_csv_encode_options<CharT>::float_format; + using basic_csv_encode_options<CharT>::precision; + using basic_csv_encode_options<CharT>::line_delimiter; + using basic_csv_encode_options<CharT>::quote_style; + + static constexpr size_t default_indent = 4; + +// Constructors + + basic_csv_options() = default; + basic_csv_options(const basic_csv_options&) = default; + basic_csv_options(basic_csv_options&&) = default; + basic_csv_options& operator=(const basic_csv_options&) = default; + basic_csv_options& operator=(basic_csv_options&&) = default; + + basic_csv_options& float_format(float_chars_format value) + { + this->float_format_ = value; + return *this; + } + + basic_csv_options& precision(int8_t value) + { + this->precision_ = value; + return *this; + } + + basic_csv_options& header_lines(std::size_t value) + { + this->header_lines_ = value; + return *this; + } + + basic_csv_options& assume_header(bool value) + { + this->assume_header_ = value; + return *this; + } + + basic_csv_options& ignore_empty_values(bool value) + { + this->ignore_empty_values_ = value; + return *this; + } + + basic_csv_options& ignore_empty_lines(bool value) + { + this->ignore_empty_lines_ = value; + return *this; + } + + basic_csv_options& trim_leading(bool value) + { + this->trim_leading_ = value; + return *this; + } + + basic_csv_options& trim_trailing(bool value) + { + this->trim_trailing_ = value; + return *this; + } + + basic_csv_options& trim_leading_inside_quotes(bool value) + { + this->trim_leading_inside_quotes_ = value; + return *this; + } + + basic_csv_options& trim_trailing_inside_quotes(bool value) + { + this->trim_trailing_inside_quotes_ = value; + return *this; + } + + basic_csv_options& trim(bool value) + { + this->trim_leading_ = value; + this->trim_trailing_ = value; + return *this; + } + + basic_csv_options& trim_inside_quotes(bool value) + { + this->trim_leading_inside_quotes_ = value; + this->trim_trailing_inside_quotes_ = value; + return *this; + } + + basic_csv_options& unquoted_empty_value_is_null(bool value) + { + this->unquoted_empty_value_is_null_ = value; + return *this; + } + + basic_csv_options& column_names(const string_type& value) + { + this->column_names_ = value; + return *this; + } + + basic_csv_options& column_types(const string_type& value) + { + this->column_types_ = value; + return *this; + } + + basic_csv_options& column_defaults(const string_type& value) + { + this->column_defaults_ = value; + return *this; + } + + basic_csv_options& field_delimiter(char_type value) + { + this->field_delimiter_ = value; + return *this; + } + + basic_csv_options& subfield_delimiter(char_type value) + { + this->subfield_delimiter_ = value; + return *this; + } + + basic_csv_options& line_delimiter(const string_type& value) + { + this->line_delimiter_ = value; + return *this; + } + + basic_csv_options& quote_char(char_type value) + { + this->quote_char_ = value; + return *this; + } + + basic_csv_options& infer_types(bool value) + { + this->infer_types_ = value; + return *this; + } + + basic_csv_options& lossless_number(bool value) + { + this->lossless_number_ = value; + return *this; + } + + basic_csv_options& quote_escape_char(char_type value) + { + this->quote_escape_char_ = value; + return *this; + } + + basic_csv_options& comment_starter(char_type value) + { + this->comment_starter_ = value; + return *this; + } + + basic_csv_options& quote_style(quote_style_kind value) + { + this->quote_style_ = value; + return *this; + } + +//#if !defined(JSONCONS_NO_DEPRECATED) + basic_csv_options& mapping(csv_mapping_kind value) + { + this->mapping_ = value; + return *this; + } +//#endif + + basic_csv_options& mapping_kind(csv_mapping_kind value) + { + this->mapping_ = value; + return *this; + } + + basic_csv_options& max_lines(std::size_t value) + { + this->max_lines_ = value; + return *this; + } + + basic_csv_options& nan_to_num(const string_type& value) + { + this->enable_nan_to_num_ = true; + this->nan_to_str_.clear(); + this->nan_to_num_ = value; + return *this; + } + + basic_csv_options& inf_to_num(const string_type& value) + { + this->enable_inf_to_num_ = true; + this->inf_to_str_.clear(); + this->inf_to_num_ = value; + return *this; + } + + basic_csv_options& neginf_to_num(const string_type& value) + { + this->enable_neginf_to_num_ = true; + this->neginf_to_str_.clear(); + this->neginf_to_num_ = value; + return *this; + } + + basic_csv_options& nan_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_nan_to_str_ = true; + this->enable_str_to_nan_ = enable_inverse; + this->nan_to_num_.clear(); + this->nan_to_str_ = value; + return *this; + } + + basic_csv_options& inf_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_inf_to_str_ = true; + this->enable_inf_to_str_ = enable_inverse; + this->inf_to_num_.clear(); + this->inf_to_str_ = value; + return *this; + } + + basic_csv_options& neginf_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_neginf_to_str_ = true; + this->enable_neginf_to_str_ = enable_inverse; + this->neginf_to_num_.clear(); + this->neginf_to_str_ = value; + return *this; + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + JSONCONS_DEPRECATED_MSG("Instead, use float_format(float_chars_format)") + basic_csv_options& floating_point_format(float_chars_format value) + { + this->float_format_ = value; + return *this; + } +#endif + +}; + +using csv_options = basic_csv_options<char>; +using wcsv_options = basic_csv_options<wchar_t>; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use csv_options") typedef csv_options csv_parameters; +JSONCONS_DEPRECATED_MSG("Instead, use wcsv_options") typedef wcsv_options wcsv_parameters; +JSONCONS_DEPRECATED_MSG("Instead, use csv_options") typedef csv_options csv_serializing_options; +JSONCONS_DEPRECATED_MSG("Instead, use wcsv_options") typedef wcsv_options wcsv_serializing_options; +#endif + + +}} +#endif diff --git a/include/jsoncons_ext/csv/csv_parser.hpp b/include/jsoncons_ext/csv/csv_parser.hpp new file mode 100644 index 0000000..37887e2 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_parser.hpp @@ -0,0 +1,2097 @@ +// Copyright 2015 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_PARSER_HPP +#define JSONCONS_CSV_CSV_PARSER_HPP + +#include <memory> // std::allocator +#include <string> +#include <sstream> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <cctype> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/json_reader.hpp> +#include <jsoncons/json_filter.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons/detail/parse_number.hpp> +#include <jsoncons_ext/csv/csv_error.hpp> +#include <jsoncons_ext/csv/csv_options.hpp> + +namespace jsoncons { namespace csv { + +enum class csv_mode +{ + initial, + header, + data, + subfields +}; + +enum class csv_parse_state +{ + start, + cr, + column_labels, + expect_comment_or_record, + expect_record, + end_record, + no_more_records, + comment, + between_values, + quoted_string, + unquoted_string, + before_unquoted_string, + escaped_value, + minus, + zero, + integer, + fraction, + exp1, + exp2, + exp3, + accept, + before_unquoted_field, + before_unquoted_field_tail, + before_unquoted_field_tail1, + before_last_unquoted_field, + before_last_unquoted_field_tail, + before_unquoted_subfield, + before_unquoted_subfield_tail, + before_quoted_subfield, + before_quoted_subfield_tail, + before_quoted_field, + before_quoted_field_tail, + before_last_quoted_field, + before_last_quoted_field_tail, + done +}; + +enum class cached_state +{ + begin_object, + end_object, + begin_array, + end_array, + name, + item, + done +}; + +struct default_csv_parsing +{ + bool operator()(csv_errc, const ser_context&) noexcept + { + return false; + } +}; + +namespace detail { + + template <class CharT,class TempAllocator> + class parse_event + { + using temp_allocator_type = TempAllocator; + using string_view_type = typename basic_json_visitor<CharT>::string_view_type; + using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>; + using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>; + using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>; + using byte_string_type = basic_byte_string<byte_allocator_type>; + + staj_event_type event_type; + string_type string_value; + byte_string_type byte_string_value; + union + { + bool bool_value; + int64_t int64_value; + uint64_t uint64_value; + double double_value; + }; + semantic_tag tag; + public: + parse_event(staj_event_type event_type, semantic_tag tag, const TempAllocator& alloc) + : event_type(event_type), + string_value(alloc), + byte_string_value(alloc), + tag(tag) + { + } + + parse_event(const string_view_type& value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::string_value), + string_value(value.data(),value.length(),alloc), + byte_string_value(alloc), + tag(tag) + { + } + + parse_event(const byte_string_view& value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::byte_string_value), + string_value(alloc), + byte_string_value(value.data(),value.size(),alloc), + tag(tag) + { + } + + parse_event(bool value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::bool_value), + string_value(alloc), + byte_string_value(alloc), + bool_value(value), + tag(tag) + { + } + + parse_event(int64_t value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::int64_value), + string_value(alloc), + byte_string_value(alloc), + int64_value(value), + tag(tag) + { + } + + parse_event(uint64_t value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::uint64_value), + string_value(alloc), + byte_string_value(alloc), + uint64_value(value), + tag(tag) + { + } + + parse_event(double value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::double_value), + string_value(alloc), + byte_string_value(alloc), + double_value(value), + tag(tag) + { + } + + parse_event(const parse_event&) = default; + parse_event(parse_event&&) = default; + parse_event& operator=(const parse_event&) = default; + parse_event& operator=(parse_event&&) = default; + + bool replay(basic_json_visitor<CharT>& visitor) const + { + switch (event_type) + { + case staj_event_type::begin_array: + return visitor.begin_array(tag, ser_context()); + case staj_event_type::end_array: + return visitor.end_array(ser_context()); + case staj_event_type::string_value: + return visitor.string_value(string_value, tag, ser_context()); + case staj_event_type::byte_string_value: + case staj_event_type::null_value: + return visitor.null_value(tag, ser_context()); + case staj_event_type::bool_value: + return visitor.bool_value(bool_value, tag, ser_context()); + case staj_event_type::int64_value: + return visitor.int64_value(int64_value, tag, ser_context()); + case staj_event_type::uint64_value: + return visitor.uint64_value(uint64_value, tag, ser_context()); + case staj_event_type::double_value: + return visitor.double_value(double_value, tag, ser_context()); + default: + return false; + } + } + }; + + template <class CharT, class TempAllocator> + class m_columns_filter : public basic_json_visitor<CharT> + { + public: + using string_view_type = typename basic_json_visitor<CharT>::string_view_type; + using char_type = CharT; + using temp_allocator_type = TempAllocator; + + using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>; + using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>; + + using string_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type>; + using parse_event_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event<CharT,TempAllocator>>; + using parse_event_vector_type = std::vector<parse_event<CharT,TempAllocator>, parse_event_allocator_type>; + using parse_event_vector_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event_vector_type>; + private: + TempAllocator alloc_; + std::size_t name_index_; + int level_; + cached_state state_; + std::size_t column_index_; + std::size_t row_index_; + + std::vector<string_type, string_allocator_type> column_names_; + std::vector<parse_event_vector_type,parse_event_vector_allocator_type> cached_events_; + public: + + m_columns_filter(const TempAllocator& alloc) + : alloc_(alloc), + name_index_(0), + level_(0), + state_(cached_state::begin_object), + column_index_(0), + row_index_(0), + column_names_(alloc), + cached_events_(alloc) + { + } + + void reset() + { + name_index_ = 0; + level_ = 0; + state_ = cached_state::begin_object; + column_index_ = 0; + row_index_ = 0; + column_names_.clear(); + cached_events_.clear(); + } + + bool done() const + { + return state_ == cached_state::done; + } + + void initialize(const std::vector<string_type, string_allocator_type>& column_names) + { + for (const auto& name : column_names) + { + column_names_.push_back(name); + cached_events_.emplace_back(alloc_); + } + name_index_ = 0; + level_ = 0; + column_index_ = 0; + row_index_ = 0; + state_ = cached_state::begin_object; + } + + void skip_column() + { + ++name_index_; + } + + bool replay_parse_events(basic_json_visitor<CharT>& visitor) + { + bool more = true; + while (more) + { + switch (state_) + { + case cached_state::begin_object: + more = visitor.begin_object(semantic_tag::none, ser_context()); + column_index_ = 0; + state_ = cached_state::name; + break; + case cached_state::end_object: + more = visitor.end_object(ser_context()); + state_ = cached_state::done; + break; + case cached_state::name: + if (column_index_ < column_names_.size()) + { + more = visitor.key(column_names_[column_index_], ser_context()); + state_ = cached_state::begin_array; + } + else + { + state_ = cached_state::end_object; + } + break; + case cached_state::begin_array: + more = visitor.begin_array(semantic_tag::none, ser_context()); + row_index_ = 0; + state_ = cached_state::item; + break; + case cached_state::end_array: + more = visitor.end_array(ser_context()); + ++column_index_; + state_ = cached_state::name; + break; + case cached_state::item: + if (row_index_ < cached_events_[column_index_].size()) + { + more = cached_events_[column_index_][row_index_].replay(visitor); + ++row_index_; + } + else + { + state_ = cached_state::end_array; + } + break; + default: + more = false; + break; + } + } + return more; + } + + void visit_flush() override + { + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + ec = csv_errc::invalid_parse_state; + return false; + } + + bool visit_end_object(const ser_context&, std::error_code& ec) override + { + ec = csv_errc::invalid_parse_state; + return false; + } + + bool visit_begin_array(semantic_tag tag, const ser_context&, std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(staj_event_type::begin_array, tag, alloc_); + + ++level_; + } + return true; + } + + bool visit_end_array(const ser_context&, std::error_code&) override + { + if (level_ > 0) + { + cached_events_[name_index_].emplace_back(staj_event_type::end_array, semantic_tag::none, alloc_); + ++name_index_; + --level_; + } + else + { + name_index_ = 0; + } + return true; + } + + bool visit_key(const string_view_type&, const ser_context&, std::error_code& ec) override + { + ec = csv_errc::invalid_parse_state; + return false; + } + + bool visit_null(semantic_tag tag, const ser_context&, std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(staj_event_type::null_value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_string(const string_view_type& value, semantic_tag tag, const ser_context&, std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_byte_string(const byte_string_view& value, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_double(double value, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_int64(int64_t value, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_uint64(uint64_t value, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_bool(bool value, semantic_tag tag, const ser_context&, std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + }; + +} // namespace detail + +template<class CharT,class TempAllocator=std::allocator<char>> +class basic_csv_parser : public ser_context +{ +public: + using string_view_type = jsoncons::basic_string_view<CharT>; + using char_type = CharT; +private: + struct string_maps_to_double + { + string_view_type s; + + bool operator()(const std::pair<string_view_type,double>& val) const + { + return val.first == s; + } + }; + + using temp_allocator_type = TempAllocator; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type; + using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type> string_allocator_type; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_mode> csv_mode_allocator_type; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_type_info> csv_type_info_allocator_type; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<std::vector<string_type,string_allocator_type>> string_vector_allocator_type; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_parse_state> csv_parse_state_allocator_type; + + static constexpr int default_depth = 3; + + temp_allocator_type alloc_; + csv_parse_state state_; + basic_json_visitor<CharT>* visitor_; + std::function<bool(csv_errc,const ser_context&)> err_handler_; + std::size_t column_; + std::size_t line_; + int depth_; + const basic_csv_decode_options<CharT> options_; + std::size_t column_index_; + std::size_t level_; + std::size_t offset_; + jsoncons::detail::chars_to to_double_; + const CharT* begin_input_; + const CharT* input_end_; + const CharT* input_ptr_; + bool more_; + std::size_t header_line_; + + detail::m_columns_filter<CharT,TempAllocator> m_columns_filter_; + std::vector<csv_mode,csv_mode_allocator_type> stack_; + std::vector<string_type,string_allocator_type> column_names_; + std::vector<csv_type_info,csv_type_info_allocator_type> column_types_; + std::vector<string_type,string_allocator_type> column_defaults_; + std::vector<csv_parse_state,csv_parse_state_allocator_type> state_stack_; + string_type buffer_; + std::vector<std::pair<string_view_type,double>> string_double_map_; + +public: + basic_csv_parser(const TempAllocator& alloc = TempAllocator()) + : basic_csv_parser(basic_csv_decode_options<CharT>(), + default_csv_parsing(), + alloc) + { + } + + basic_csv_parser(const basic_csv_decode_options<CharT>& options, + const TempAllocator& alloc = TempAllocator()) + : basic_csv_parser(options, + default_csv_parsing(), + alloc) + { + } + + basic_csv_parser(std::function<bool(csv_errc,const ser_context&)> err_handler, + const TempAllocator& alloc = TempAllocator()) + : basic_csv_parser(basic_csv_decode_options<CharT>(), + err_handler, + alloc) + { + } + + basic_csv_parser(const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const TempAllocator& alloc = TempAllocator()) + : alloc_(alloc), + state_(csv_parse_state::start), + visitor_(nullptr), + err_handler_(err_handler), + column_(1), + line_(1), + depth_(default_depth), + options_(options), + column_index_(0), + level_(0), + offset_(0), + begin_input_(nullptr), + input_end_(nullptr), + input_ptr_(nullptr), + more_(true), + header_line_(1), + m_columns_filter_(alloc), + stack_(alloc), + column_names_(alloc), + column_types_(alloc), + column_defaults_(alloc), + state_stack_(alloc), + buffer_(alloc) + { + if (options_.enable_str_to_nan()) + { + string_double_map_.emplace_back(options_.nan_to_str(),std::nan("")); + } + if (options_.enable_str_to_inf()) + { + string_double_map_.emplace_back(options_.inf_to_str(),std::numeric_limits<double>::infinity()); + } + if (options_.enable_str_to_neginf()) + { + string_double_map_.emplace_back(options_.neginf_to_str(),-std::numeric_limits<double>::infinity()); + } + + initialize(); + } + + ~basic_csv_parser() noexcept + { + } + + bool done() const + { + return state_ == csv_parse_state::done; + } + + bool accept() const + { + return state_ == csv_parse_state::accept || state_ == csv_parse_state::done; + } + + bool stopped() const + { + return !more_; + } + + bool source_exhausted() const + { + return input_ptr_ == input_end_; + } + + const std::vector<string_type,string_allocator_type>& column_labels() const + { + return column_names_; + } + + void reinitialize() + { + state_ = csv_parse_state::start; + visitor_ = nullptr; + column_ = 1; + line_ = 1; + depth_ = default_depth; + column_index_ = 0; + level_ = 0; + offset_ = 0; + begin_input_ = nullptr; + input_end_ = nullptr; + input_ptr_ = nullptr; + more_ = true; + header_line_ = 1; + m_columns_filter_.reset(); + stack_.clear(); + column_names_.clear(); + column_types_.clear(); + column_defaults_.clear(); + state_stack_.clear(); + buffer_.clear(); + + initialize(); + } + + void restart() + { + more_ = true; + } + + void parse_some(basic_json_visitor<CharT>& visitor) + { + std::error_code ec; + parse_some(visitor,ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,line_,column_)); + } + } + + void parse_some(basic_json_visitor<CharT>& visitor, std::error_code& ec) + { + switch (options_.mapping_kind()) + { + case csv_mapping_kind::m_columns: + visitor_ = &m_columns_filter_; + break; + default: + visitor_ = std::addressof(visitor); + break; + } + + const CharT* local_input_end = input_end_; + + if (input_ptr_ == local_input_end && more_) + { + switch (state_) + { + case csv_parse_state::start: + ec = csv_errc::source_error; + more_ = false; + return; + case csv_parse_state::before_unquoted_field: + case csv_parse_state::before_last_unquoted_field: + end_unquoted_string_value(ec); + state_ = csv_parse_state::before_last_unquoted_field_tail; + break; + case csv_parse_state::before_last_unquoted_field_tail: + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + ++column_index_; + state_ = csv_parse_state::end_record; + break; + case csv_parse_state::before_unquoted_string: + buffer_.clear(); + JSONCONS_FALLTHROUGH; + case csv_parse_state::unquoted_string: + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + if (options_.ignore_empty_values() && buffer_.empty()) + { + state_ = csv_parse_state::end_record; + } + else + { + before_value(ec); + state_ = csv_parse_state::before_unquoted_field; + } + break; + case csv_parse_state::before_last_quoted_field: + end_quoted_string_value(ec); + ++column_index_; + state_ = csv_parse_state::end_record; + break; + case csv_parse_state::escaped_value: + if (options_.quote_escape_char() == options_.quote_char()) + { + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + before_value(ec); + ++column_; + state_ = csv_parse_state::before_last_quoted_field; + } + else + { + state_ = csv_parse_state::end_record; + } + } + else + { + ec = csv_errc::invalid_escaped_char; + more_ = false; + return; + } + break; + case csv_parse_state::end_record: + if (column_index_ > 0) + { + after_record(ec); + } + state_ = csv_parse_state::no_more_records; + break; + case csv_parse_state::no_more_records: + switch (stack_.back()) + { + case csv_mode::header: + stack_.pop_back(); + break; + case csv_mode::data: + stack_.pop_back(); + break; + default: + break; + } + more_ = visitor_->end_array(*this, ec); + if (options_.mapping_kind() == csv_mapping_kind::m_columns) + { + if (!m_columns_filter_.done()) + { + more_ = m_columns_filter_.replay_parse_events(visitor); + } + else + { + state_ = csv_parse_state::accept; + } + } + else + { + state_ = csv_parse_state::accept; + } + break; + case csv_parse_state::accept: + if (!(stack_.size() == 1 && stack_.back() == csv_mode::initial)) + { + err_handler_(csv_errc::unexpected_eof, *this); + ec = csv_errc::unexpected_eof; + more_ = false; + return; + } + stack_.pop_back(); + visitor_->flush(); + state_ = csv_parse_state::done; + more_ = false; + return; + default: + state_ = csv_parse_state::end_record; + break; + } + } + + for (; (input_ptr_ < local_input_end) && more_;) + { + CharT curr_char = *input_ptr_; + + switch (state_) + { + case csv_parse_state::cr: + ++line_; + column_ = 1; + switch (*input_ptr_) + { + case '\n': + ++input_ptr_; + state_ = pop_state(); + break; + default: + state_ = pop_state(); + break; + } + break; + case csv_parse_state::start: + if (options_.mapping_kind() != csv_mapping_kind::m_columns) + { + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + } + if (options_.assume_header() && options_.mapping_kind() == csv_mapping_kind::n_rows && options_.column_names().size() > 0) + { + column_index_ = 0; + state_ = csv_parse_state::column_labels; + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + state_ = csv_parse_state::expect_comment_or_record; + } + else + { + state_ = csv_parse_state::expect_comment_or_record; + } + break; + case csv_parse_state::column_labels: + if (column_index_ < column_names_.size()) + { + more_ = visitor_->string_value(column_names_[column_index_], semantic_tag::none, *this, ec); + ++column_index_; + } + else + { + more_ = visitor_->end_array(*this, ec); + state_ = csv_parse_state::expect_comment_or_record; + //stack_.back() = csv_mode::data; + column_index_ = 0; + } + break; + case csv_parse_state::comment: + switch (curr_char) + { + case '\n': + { + ++line_; + if (stack_.back() == csv_mode::header) + { + ++header_line_; + } + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + break; + } + case '\r': + ++line_; + if (stack_.back() == csv_mode::header) + { + ++header_line_; + } + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + push_state(state_); + state_ = csv_parse_state::cr; + break; + default: + ++column_; + break; + } + ++input_ptr_; + break; + + case csv_parse_state::expect_comment_or_record: + buffer_.clear(); + if (curr_char == options_.comment_starter()) + { + state_ = csv_parse_state::comment; + ++column_; + ++input_ptr_; + } + else + { + state_ = csv_parse_state::expect_record; + } + break; + case csv_parse_state::quoted_string: + { + if (curr_char == options_.quote_escape_char()) + { + state_ = csv_parse_state::escaped_value; + } + else if (curr_char == options_.quote_char()) + { + state_ = csv_parse_state::between_values; + } + else + { + buffer_.push_back(static_cast<CharT>(curr_char)); + } + } + ++column_; + ++input_ptr_; + break; + case csv_parse_state::escaped_value: + { + if (curr_char == options_.quote_char()) + { + buffer_.push_back(static_cast<CharT>(curr_char)); + state_ = csv_parse_state::quoted_string; + ++column_; + ++input_ptr_; + } + else if (options_.quote_escape_char() == options_.quote_char()) + { + state_ = csv_parse_state::between_values; + } + else + { + ec = csv_errc::invalid_escaped_char; + more_ = false; + return; + } + } + break; + case csv_parse_state::between_values: + switch (curr_char) + { + case '\r': + case '\n': + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + before_value(ec); + state_ = csv_parse_state::before_last_quoted_field; + } + else + { + state_ = csv_parse_state::end_record; + } + break; + } + default: + if (curr_char == options_.field_delimiter()) + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + before_value(ec); + state_ = csv_parse_state::before_quoted_field; + } + else if (options_.subfield_delimiter() != char_type() && curr_char == options_.subfield_delimiter()) + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + before_value(ec); + state_ = csv_parse_state::before_quoted_subfield; + } + else if (curr_char == ' ' || curr_char == '\t') + { + ++column_; + ++input_ptr_; + } + else + { + ec = csv_errc::unexpected_char_between_fields; + more_ = false; + return; + } + break; + } + break; + case csv_parse_state::before_unquoted_string: + { + buffer_.clear(); + state_ = csv_parse_state::unquoted_string; + break; + } + case csv_parse_state::before_unquoted_field: + end_unquoted_string_value(ec); + state_ = csv_parse_state::before_unquoted_field_tail; + break; + case csv_parse_state::before_unquoted_field_tail: + { + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + ++column_index_; + state_ = csv_parse_state::before_unquoted_string; + ++column_; + ++input_ptr_; + break; + } + case csv_parse_state::before_unquoted_field_tail1: + { + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + state_ = csv_parse_state::end_record; + ++column_; + ++input_ptr_; + break; + } + + case csv_parse_state::before_last_unquoted_field: + end_unquoted_string_value(ec); + state_ = csv_parse_state::before_last_unquoted_field_tail; + break; + + case csv_parse_state::before_last_unquoted_field_tail: + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + ++column_index_; + state_ = csv_parse_state::end_record; + break; + + case csv_parse_state::before_unquoted_subfield: + if (stack_.back() == csv_mode::data) + { + stack_.push_back(csv_mode::subfields); + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + } + state_ = csv_parse_state::before_unquoted_subfield_tail; + break; + case csv_parse_state::before_unquoted_subfield_tail: + end_unquoted_string_value(ec); + state_ = csv_parse_state::before_unquoted_string; + ++column_; + ++input_ptr_; + break; + case csv_parse_state::before_quoted_field: + end_quoted_string_value(ec); + state_ = csv_parse_state::before_unquoted_field_tail; // return to unquoted + break; + case csv_parse_state::before_quoted_subfield: + if (stack_.back() == csv_mode::data) + { + stack_.push_back(csv_mode::subfields); + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + } + state_ = csv_parse_state::before_quoted_subfield_tail; + break; + case csv_parse_state::before_quoted_subfield_tail: + end_quoted_string_value(ec); + state_ = csv_parse_state::before_unquoted_string; + ++column_; + ++input_ptr_; + break; + case csv_parse_state::before_last_quoted_field: + end_quoted_string_value(ec); + state_ = csv_parse_state::before_last_quoted_field_tail; + break; + case csv_parse_state::before_last_quoted_field_tail: + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + ++column_index_; + state_ = csv_parse_state::end_record; + break; + case csv_parse_state::unquoted_string: + { + switch (curr_char) + { + case '\n': + case '\r': + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + before_value(ec); + state_ = csv_parse_state::before_last_unquoted_field; + } + else + { + state_ = csv_parse_state::end_record; + } + break; + } + default: + if (curr_char == options_.field_delimiter()) + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + before_value(ec); + state_ = csv_parse_state::before_unquoted_field; + } + else if (options_.subfield_delimiter() != char_type() && curr_char == options_.subfield_delimiter()) + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + before_value(ec); + state_ = csv_parse_state::before_unquoted_subfield; + } + else if (curr_char == options_.quote_char()) + { + buffer_.clear(); + state_ = csv_parse_state::quoted_string; + ++column_; + ++input_ptr_; + } + else + { + buffer_.push_back(static_cast<CharT>(curr_char)); + ++column_; + ++input_ptr_; + } + break; + } + break; + } + case csv_parse_state::expect_record: + { + switch (curr_char) + { + case '\n': + { + if (!options_.ignore_empty_lines()) + { + before_record(ec); + state_ = csv_parse_state::end_record; + } + else + { + ++line_; + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + ++input_ptr_; + } + break; + } + case '\r': + if (!options_.ignore_empty_lines()) + { + before_record(ec); + state_ = csv_parse_state::end_record; + } + else + { + ++line_; + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + ++input_ptr_; + push_state(state_); + state_ = csv_parse_state::cr; + } + break; + case ' ': + case '\t': + if (!options_.trim_leading()) + { + buffer_.push_back(static_cast<CharT>(curr_char)); + before_record(ec); + state_ = csv_parse_state::unquoted_string; + } + ++column_; + ++input_ptr_; + break; + default: + before_record(ec); + if (curr_char == options_.quote_char()) + { + buffer_.clear(); + state_ = csv_parse_state::quoted_string; + ++column_; + ++input_ptr_; + } + else + { + state_ = csv_parse_state::unquoted_string; + } + break; + } + break; + } + case csv_parse_state::end_record: + { + switch (curr_char) + { + case '\n': + { + ++line_; + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + after_record(ec); + ++input_ptr_; + break; + } + case '\r': + ++line_; + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + after_record(ec); + push_state(state_); + state_ = csv_parse_state::cr; + ++input_ptr_; + break; + case ' ': + case '\t': + ++column_; + ++input_ptr_; + break; + default: + err_handler_(csv_errc::syntax_error, *this); + ec = csv_errc::syntax_error; + more_ = false; + return; + } + break; + } + default: + err_handler_(csv_errc::invalid_parse_state, *this); + ec = csv_errc::invalid_parse_state; + more_ = false; + return; + } + if (line_ > options_.max_lines()) + { + state_ = csv_parse_state::done; + more_ = false; + } + } + } + + void finish_parse() + { + std::error_code ec; + finish_parse(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,line_,column_)); + } + } + + void finish_parse(std::error_code& ec) + { + while (more_) + { + parse_some(ec); + } + } + + csv_parse_state state() const + { + return state_; + } + + void update(const string_view_type sv) + { + update(sv.data(),sv.length()); + } + + void update(const CharT* data, std::size_t length) + { + begin_input_ = data; + input_end_ = data + length; + input_ptr_ = begin_input_; + } + + std::size_t line() const override + { + return line_; + } + + std::size_t column() const override + { + return column_; + } + +private: + void initialize() + { + jsoncons::csv::detail::parse_column_names(options_.column_names(), column_names_); + jsoncons::csv::detail::parse_column_types(options_.column_types(), column_types_); + jsoncons::csv::detail::parse_column_names(options_.column_defaults(), column_defaults_); + + stack_.reserve(default_depth); + stack_.push_back(csv_mode::initial); + stack_.push_back((options_.header_lines() > 0) ? csv_mode::header + : csv_mode::data); + } + + // name + void before_value(std::error_code& ec) + { + switch (stack_.back()) + { + case csv_mode::header: + if (options_.trim_leading_inside_quotes() || options_.trim_trailing_inside_quotes()) + { + trim_string_buffer(options_.trim_leading_inside_quotes(),options_.trim_trailing_inside_quotes()); + } + if (line_ == header_line_) + { + column_names_.push_back(buffer_); + if (options_.assume_header() && options_.mapping_kind() == csv_mapping_kind::n_rows) + { + more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec); + } + } + break; + case csv_mode::data: + if (options_.mapping_kind() == csv_mapping_kind::n_objects) + { + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + if (column_index_ < column_names_.size() + offset_) + { + more_ = visitor_->key(column_names_[column_index_ - offset_], *this, ec); + } + } + } + break; + default: + break; + } + } + + // begin_array or begin_record + void before_record(std::error_code& ec) + { + offset_ = 0; + + switch (stack_.back()) + { + case csv_mode::header: + if (options_.assume_header() && line_ == header_line_) + { + if (options_.mapping_kind() == csv_mapping_kind::n_rows) + { + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + } + } + break; + case csv_mode::data: + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + break; + case csv_mapping_kind::n_objects: + more_ = visitor_->begin_object(semantic_tag::none, *this, ec); + break; + case csv_mapping_kind::m_columns: + break; + default: + break; + } + break; + default: + break; + } + } + + // end_array, begin_array, string_value (headers) + void after_record(std::error_code& ec) + { + if (column_types_.size() > 0) + { + if (level_ > 0) + { + more_ = visitor_->end_array(*this, ec); + level_ = 0; + } + } + switch (stack_.back()) + { + case csv_mode::header: + if (line_ >= options_.header_lines()) + { + stack_.back() = csv_mode::data; + } + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + if (options_.assume_header()) + { + more_ = visitor_->end_array(*this, ec); + } + break; + case csv_mapping_kind::m_columns: + m_columns_filter_.initialize(column_names_); + break; + default: + break; + } + break; + case csv_mode::data: + case csv_mode::subfields: + { + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + more_ = visitor_->end_array(*this, ec); + break; + case csv_mapping_kind::n_objects: + more_ = visitor_->end_object(*this, ec); + break; + case csv_mapping_kind::m_columns: + more_ = visitor_->end_array(*this, ec); + break; + } + break; + } + default: + break; + } + column_index_ = 0; + } + + void trim_string_buffer(bool trim_leading, bool trim_trailing) + { + std::size_t start = 0; + std::size_t length = buffer_.length(); + if (trim_leading) + { + bool done = false; + while (!done && start < buffer_.length()) + { + if ((buffer_[start] < 256) && std::isspace(buffer_[start])) + { + ++start; + } + else + { + done = true; + } + } + } + if (trim_trailing) + { + bool done = false; + while (!done && length > 0) + { + if ((buffer_[length-1] < 256) && std::isspace(buffer_[length-1])) + { + --length; + } + else + { + done = true; + } + } + } + if (start != 0 || length != buffer_.size()) + { + buffer_ = buffer_.substr(start,length-start); + } + } + + /* + end_array, begin_array, xxx_value (end_value) + */ + void end_unquoted_string_value(std::error_code& ec) + { + switch (stack_.back()) + { + case csv_mode::data: + case csv_mode::subfields: + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(options_.infer_types(), ec); + } + break; + case csv_mapping_kind::n_objects: + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + if (column_index_ < column_names_.size() + offset_) + { + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(options_.infer_types(), ec); + } + } + else if (level_ > 0) + { + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(options_.infer_types(), ec); + } + } + } + break; + case csv_mapping_kind::m_columns: + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + end_value(options_.infer_types(), ec); + } + else + { + m_columns_filter_.skip_column(); + } + break; + } + break; + default: + break; + } + } + + void end_quoted_string_value(std::error_code& ec) + { + switch (stack_.back()) + { + case csv_mode::data: + case csv_mode::subfields: + if (options_.trim_leading_inside_quotes() || options_.trim_trailing_inside_quotes()) + { + trim_string_buffer(options_.trim_leading_inside_quotes(),options_.trim_trailing_inside_quotes()); + } + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + end_value(false, ec); + break; + case csv_mapping_kind::n_objects: + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + if (column_index_ < column_names_.size() + offset_) + { + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(false, ec); + } + } + else if (level_ > 0) + { + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(false, ec); + } + } + } + break; + case csv_mapping_kind::m_columns: + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + end_value(false, ec); + } + else + { + m_columns_filter_.skip_column(); + } + break; + } + break; + default: + break; + } + } + + void end_value(bool infer_types, std::error_code& ec) + { + auto it = std::find_if(string_double_map_.begin(), string_double_map_.end(), string_maps_to_double{ buffer_ }); + if (it != string_double_map_.end()) + { + more_ = visitor_->double_value(it->second, semantic_tag::none, *this, ec); + } + else if (column_index_ < column_types_.size() + offset_) + { + if (column_types_[column_index_ - offset_].col_type == csv_column_type::repeat_t) + { + offset_ = offset_ + column_types_[column_index_ - offset_].rep_count; + if (column_index_ - offset_ + 1 < column_types_.size()) + { + if (column_index_ == offset_ || level_ > column_types_[column_index_-offset_].level) + { + more_ = visitor_->end_array(*this, ec); + } + level_ = column_index_ == offset_ ? 0 : column_types_[column_index_ - offset_].level; + } + } + if (level_ < column_types_[column_index_ - offset_].level) + { + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + level_ = column_types_[column_index_ - offset_].level; + } + else if (level_ > column_types_[column_index_ - offset_].level) + { + more_ = visitor_->end_array(*this, ec); + level_ = column_types_[column_index_ - offset_].level; + } + switch (column_types_[column_index_ - offset_].col_type) + { + case csv_column_type::integer_t: + { + std::basic_istringstream<CharT,std::char_traits<CharT>,char_allocator_type> iss{buffer_}; + int64_t val; + iss >> val; + if (!iss.fail()) + { + more_ = visitor_->int64_value(val, semantic_tag::none, *this, ec); + } + else + { + if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0) + { + basic_json_parser<CharT,temp_allocator_type> parser(alloc_); + parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length()); + parser.parse_some(*visitor_); + parser.finish_parse(*visitor_); + } + else + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + } + } + break; + case csv_column_type::float_t: + { + if (options_.lossless_number()) + { + more_ = visitor_->string_value(buffer_,semantic_tag::bigdec, *this, ec); + } + else + { + std::basic_istringstream<CharT, std::char_traits<CharT>, char_allocator_type> iss{ buffer_ }; + double val; + iss >> val; + if (!iss.fail()) + { + more_ = visitor_->double_value(val, semantic_tag::none, *this, ec); + } + else + { + if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0) + { + basic_json_parser<CharT,temp_allocator_type> parser(alloc_); + parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length()); + parser.parse_some(*visitor_); + parser.finish_parse(*visitor_); + } + else + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + } + } + } + break; + case csv_column_type::boolean_t: + { + if (buffer_.length() == 1 && buffer_[0] == '0') + { + more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec); + } + else if (buffer_.length() == 1 && buffer_[0] == '1') + { + more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec); + } + else if (buffer_.length() == 5 && ((buffer_[0] == 'f' || buffer_[0] == 'F') && (buffer_[1] == 'a' || buffer_[1] == 'A') && (buffer_[2] == 'l' || buffer_[2] == 'L') && (buffer_[3] == 's' || buffer_[3] == 'S') && (buffer_[4] == 'e' || buffer_[4] == 'E'))) + { + more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec); + } + else if (buffer_.length() == 4 && ((buffer_[0] == 't' || buffer_[0] == 'T') && (buffer_[1] == 'r' || buffer_[1] == 'R') && (buffer_[2] == 'u' || buffer_[2] == 'U') && (buffer_[3] == 'e' || buffer_[3] == 'E'))) + { + more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec); + } + else + { + if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0) + { + basic_json_parser<CharT,temp_allocator_type> parser(alloc_); + parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length()); + parser.parse_some(*visitor_); + parser.finish_parse(*visitor_); + } + else + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + } + } + break; + default: + if (buffer_.length() > 0) + { + more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec); + } + else + { + if (column_index_ < column_defaults_.size() + offset_ && column_defaults_[column_index_ - offset_].length() > 0) + { + basic_json_parser<CharT,temp_allocator_type> parser(alloc_); + parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length()); + parser.parse_some(*visitor_); + parser.finish_parse(*visitor_); + } + else + { + more_ = visitor_->string_value(string_view_type(), semantic_tag::none, *this, ec); + } + } + break; + } + } + else + { + if (infer_types) + { + end_value_with_numeric_check(ec); + } + else + { + more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec); + } + } + } + + enum class numeric_check_state + { + initial, + null, + boolean_true, + boolean_false, + minus, + zero, + integer, + fraction1, + fraction, + exp1, + exp, + not_a_number + }; + + /* + xxx_value + */ + void end_value_with_numeric_check(std::error_code& ec) + { + numeric_check_state state = numeric_check_state::initial; + bool is_negative = false; + int precision = 0; + uint8_t decimal_places = 0; + + auto last = buffer_.end(); + + std::string buffer; + for (auto p = buffer_.begin(); state != numeric_check_state::not_a_number && p != last; ++p) + { + switch (state) + { + case numeric_check_state::initial: + { + switch (*p) + { + case 'n':case 'N': + if ((last-p) == 4 && (p[1] == 'u' || p[1] == 'U') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 'l' || p[3] == 'L')) + { + state = numeric_check_state::null; + } + else + { + state = numeric_check_state::not_a_number; + } + break; + case 't':case 'T': + if ((last-p) == 4 && (p[1] == 'r' || p[1] == 'R') && (p[2] == 'u' || p[2] == 'U') && (p[3] == 'e' || p[3] == 'U')) + { + state = numeric_check_state::boolean_true; + } + else + { + state = numeric_check_state::not_a_number; + } + break; + case 'f':case 'F': + if ((last-p) == 5 && (p[1] == 'a' || p[1] == 'A') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 's' || p[3] == 'S') && (p[4] == 'e' || p[4] == 'E')) + { + state = numeric_check_state::boolean_false; + } + else + { + state = numeric_check_state::not_a_number; + } + break; + case '-': + is_negative = true; + buffer.push_back(*p); + state = numeric_check_state::minus; + break; + case '0': + ++precision; + buffer.push_back(*p); + state = numeric_check_state::zero; + break; + case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + buffer.push_back(*p); + state = numeric_check_state::integer; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::zero: + { + switch (*p) + { + case '.': + buffer.push_back(to_double_.get_decimal_point()); + state = numeric_check_state::fraction1; + break; + case 'e':case 'E': + buffer.push_back(*p); + state = numeric_check_state::exp1; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::integer: + { + switch (*p) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + buffer.push_back(*p); + break; + case '.': + buffer.push_back(to_double_.get_decimal_point()); + state = numeric_check_state::fraction1; + break; + case 'e':case 'E': + buffer.push_back(*p); + state = numeric_check_state::exp1; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::minus: + { + switch (*p) + { + case '0': + ++precision; + buffer.push_back(*p); + state = numeric_check_state::zero; + break; + case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + buffer.push_back(*p); + state = numeric_check_state::integer; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::fraction1: + { + switch (*p) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + ++decimal_places; + buffer.push_back(*p); + state = numeric_check_state::fraction; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::fraction: + { + switch (*p) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + ++decimal_places; + buffer.push_back(*p); + break; + case 'e':case 'E': + buffer.push_back(*p); + state = numeric_check_state::exp1; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::exp1: + { + switch (*p) + { + case '-': + buffer.push_back(*p); + break; + case '+': + break; + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + state = numeric_check_state::exp; + buffer.push_back(*p); + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::exp: + { + switch (*p) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + buffer.push_back(*p); + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + default: + break; + } + } + + switch (state) + { + case numeric_check_state::null: + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + break; + case numeric_check_state::boolean_true: + more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec); + break; + case numeric_check_state::boolean_false: + more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec); + break; + case numeric_check_state::zero: + case numeric_check_state::integer: + { + if (is_negative) + { + int64_t val{ 0 }; + auto result = jsoncons::detail::to_integer_decimal(buffer_.data(), buffer_.length(), val); + if (result) + { + more_ = visitor_->int64_value(val, semantic_tag::none, *this, ec); + } + else // Must be overflow + { + more_ = visitor_->string_value(buffer_, semantic_tag::bigint, *this, ec); + } + } + else + { + uint64_t val{ 0 }; + auto result = jsoncons::detail::to_integer_decimal(buffer_.data(), buffer_.length(), val); + if (result) + { + more_ = visitor_->uint64_value(val, semantic_tag::none, *this, ec); + } + else if (result.ec == jsoncons::detail::to_integer_errc::overflow) + { + more_ = visitor_->string_value(buffer_, semantic_tag::bigint, *this, ec); + } + else + { + ec = result.ec; + more_ = false; + return; + } + } + break; + } + case numeric_check_state::fraction: + case numeric_check_state::exp: + { + if (options_.lossless_number()) + { + more_ = visitor_->string_value(buffer_,semantic_tag::bigdec, *this, ec); + } + else + { + double d = to_double_(buffer.c_str(), buffer.length()); + more_ = visitor_->double_value(d, semantic_tag::none, *this, ec); + } + break; + } + default: + { + more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec); + break; + } + } + } + + void push_state(csv_parse_state state) + { + state_stack_.push_back(state); + } + + csv_parse_state pop_state() + { + JSONCONS_ASSERT(!state_stack_.empty()) + csv_parse_state state = state_stack_.back(); + state_stack_.pop_back(); + return state; + } +}; + +using csv_parser = basic_csv_parser<char>; +using wcsv_parser = basic_csv_parser<wchar_t>; + +}} + +#endif + diff --git a/include/jsoncons_ext/csv/csv_reader.hpp b/include/jsoncons_ext/csv/csv_reader.hpp new file mode 100644 index 0000000..f10211a --- /dev/null +++ b/include/jsoncons_ext/csv/csv_reader.hpp @@ -0,0 +1,348 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_READER_HPP +#define JSONCONS_CSV_CSV_READER_HPP + +#include <string> +#include <vector> +#include <stdexcept> +#include <memory> // std::allocator +#include <utility> // std::move +#include <istream> // std::basic_istream +#include <jsoncons/source.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons_ext/csv/csv_error.hpp> +#include <jsoncons_ext/csv/csv_parser.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons/json_reader.hpp> +#include <jsoncons/json_decoder.hpp> +#include <jsoncons/source_adaptor.hpp> +#include <jsoncons_ext/csv/csv_options.hpp> + +namespace jsoncons { namespace csv { + + template<class CharT,class Source=jsoncons::stream_source<CharT>,class Allocator=std::allocator<char>> + class basic_csv_reader + { + struct stack_item + { + stack_item() noexcept + : array_begun_(false) + { + } + + bool array_begun_; + }; + using char_type = CharT; + using temp_allocator_type = Allocator; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type; + + basic_csv_reader(const basic_csv_reader&) = delete; + basic_csv_reader& operator = (const basic_csv_reader&) = delete; + + basic_default_json_visitor<CharT> default_visitor_; + text_source_adaptor<Source> source_; + basic_json_visitor<CharT>& visitor_; + basic_csv_parser<CharT,Allocator> parser_; + + public: + // Structural characters + static constexpr size_t default_max_buffer_size = 16384; + //! Parse an input stream of CSV text into a json object + /*! + \param is The input stream to read from + */ + + template <class Sourceable> + basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const Allocator& alloc = Allocator()) + + : basic_csv_reader(std::forward<Sourceable>(source), + visitor, + basic_csv_decode_options<CharT>(), + default_csv_parsing(), + alloc) + { + } + + template <class Sourceable> + basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + const Allocator& alloc = Allocator()) + + : basic_csv_reader(std::forward<Sourceable>(source), + visitor, + options, + default_csv_parsing(), + alloc) + { + } + + template <class Sourceable> + basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator()) + : basic_csv_reader(std::forward<Sourceable>(source), + visitor, + basic_csv_decode_options<CharT>(), + err_handler, + alloc) + { + } + + template <class Sourceable> + basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator()) + : source_(std::forward<Sourceable>(source)), + visitor_(visitor), + parser_(options, err_handler, alloc) + + { + } + + ~basic_csv_reader() noexcept = default; + + void read() + { + std::error_code ec; + read(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read(std::error_code& ec) + { + read_internal(ec); + } + + std::size_t line() const + { + return parser_.line(); + } + + std::size_t column() const + { + return parser_.column(); + } + + bool eof() const + { + return parser_.source_exhausted() && source_.eof(); + } + + private: + + void read_internal(std::error_code& ec) + { + if (source_.is_error()) + { + ec = csv_errc::source_error; + return; + } + while (!parser_.stopped()) + { + if (parser_.source_exhausted()) + { + auto s = source_.read_buffer(ec); + if (ec) return; + if (s.size() > 0) + { + parser_.update(s.data(),s.size()); + } + } + parser_.parse_some(visitor_, ec); + if (ec) return; + } + } + }; + + template<class CharT,class Source=jsoncons::stream_source<CharT>,class Allocator=std::allocator<char>> + class legacy_basic_csv_reader + { + struct stack_item + { + stack_item() noexcept + : array_begun_(false) + { + } + + bool array_begun_; + }; + using char_type = CharT; + using temp_allocator_type = Allocator; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type; + + legacy_basic_csv_reader(const legacy_basic_csv_reader&) = delete; + legacy_basic_csv_reader& operator = (const legacy_basic_csv_reader&) = delete; + + basic_default_json_visitor<CharT> default_visitor_; + text_source_adaptor<Source> source_; + basic_json_visitor<CharT>& visitor_; + basic_csv_parser<CharT,Allocator> parser_; + + public: + // Structural characters + static constexpr size_t default_max_buffer_size = 16384; + //! Parse an input stream of CSV text into a json object + /*! + \param is The input stream to read from + */ + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const Allocator& alloc = Allocator()) + + : legacy_basic_csv_reader(std::forward<Sourceable>(source), + visitor, + basic_csv_decode_options<CharT>(), + default_csv_parsing(), + alloc) + { + } + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + const Allocator& alloc = Allocator()) + + : legacy_basic_csv_reader(std::forward<Sourceable>(source), + visitor, + options, + default_csv_parsing(), + alloc) + { + } + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator()) + : legacy_basic_csv_reader(std::forward<Sourceable>(source), + visitor, + basic_csv_decode_options<CharT>(), + err_handler, + alloc) + { + } + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator(), + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(std::forward<Sourceable>(source)), + visitor_(visitor), + parser_(options, err_handler, alloc) + { + } + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator(), + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(), + visitor_(visitor), + parser_(options, err_handler, alloc) + { + jsoncons::basic_string_view<CharT> sv(std::forward<Sourceable>(source)); + auto r = unicode_traits::detect_encoding_from_bom(sv.data(), sv.size()); + if (!(r.encoding == unicode_traits::encoding_kind::utf8 || r.encoding == unicode_traits::encoding_kind::undetected)) + { + JSONCONS_THROW(ser_error(json_errc::illegal_unicode_character,parser_.line(),parser_.column())); + } + std::size_t offset = (r.ptr - sv.data()); + parser_.update(sv.data()+offset,sv.size()-offset); + } + + ~legacy_basic_csv_reader() noexcept = default; + + void read() + { + std::error_code ec; + read(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read(std::error_code& ec) + { + read_internal(ec); + } + + std::size_t line() const + { + return parser_.line(); + } + + std::size_t column() const + { + return parser_.column(); + } + + bool eof() const + { + return parser_.source_exhausted() && source_.eof(); + } + + private: + + void read_internal(std::error_code& ec) + { + if (source_.is_error()) + { + ec = csv_errc::source_error; + return; + } + while (!parser_.stopped()) + { + if (parser_.source_exhausted()) + { + auto s = source_.read_buffer(ec); + if (ec) return; + if (s.size() > 0) + { + parser_.update(s.data(),s.size()); + } + } + parser_.parse_some(visitor_, ec); + if (ec) return; + } + } + }; + +#if !defined(JSONCONS_NO_DEPRECATED) + using csv_reader = legacy_basic_csv_reader<char>; + using wcsv_reader = legacy_basic_csv_reader<wchar_t>; +#endif + + using csv_string_reader = basic_csv_reader<char,string_source<char>>; + using wcsv_string_reader = basic_csv_reader<wchar_t,string_source<wchar_t>>; + using csv_stream_reader = basic_csv_reader<char,stream_source<char>>; + using wcsv_stream_reader = basic_csv_reader<wchar_t,stream_source<wchar_t>>; + +}} + +#endif diff --git a/include/jsoncons_ext/csv/csv_serializer.hpp b/include/jsoncons_ext/csv/csv_serializer.hpp new file mode 100644 index 0000000..ec73510 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_serializer.hpp @@ -0,0 +1,12 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_SERIALIZER_HPP +#define JSONCONS_CSV_CSV_SERIALIZER_HPP + +#include <jsoncons_ext/csv/csv_encoder.hpp> + +#endif diff --git a/include/jsoncons_ext/csv/decode_csv.hpp b/include/jsoncons_ext/csv/decode_csv.hpp new file mode 100644 index 0000000..b91c58b --- /dev/null +++ b/include/jsoncons_ext/csv/decode_csv.hpp @@ -0,0 +1,208 @@ +/// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_DECODE_CSV_HPP +#define JSONCONS_CSV_DECODE_CSV_HPP + +#include <jsoncons_ext/csv/csv_options.hpp> +#include <jsoncons_ext/csv/csv_reader.hpp> +#include <jsoncons_ext/csv/csv_encoder.hpp> +#include <jsoncons_ext/csv/csv_cursor.hpp> + +namespace jsoncons { +namespace csv { + + template <class T,class Source> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_sequence_of<Source,typename T::char_type>::value,T>::type + decode_csv(const Source& s, const basic_csv_decode_options<typename Source::value_type>& options = basic_csv_decode_options<typename Source::value_type>()) + { + using char_type = typename Source::value_type; + + json_decoder<T> decoder; + + basic_csv_reader<char_type,jsoncons::string_source<char_type>> reader(s,decoder,options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T,class Source> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_char_sequence<Source>::value,T>::type + decode_csv(const Source& s, const basic_csv_decode_options<typename Source::value_type>& options = basic_csv_decode_options<typename Source::value_type>()) + { + using char_type = typename Source::value_type; + + basic_csv_cursor<char_type> cursor(s, options); + jsoncons::json_decoder<basic_json<char_type>> decoder; + + std::error_code ec; + T val = decode_traits<T,char_type>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template <class T,class CharT> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_csv(std::basic_istream<CharT>& is, const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>()) + { + using char_type = CharT; + + json_decoder<T> decoder; + + basic_csv_reader<char_type,jsoncons::stream_source<char_type>> reader(is,decoder,options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T,class CharT> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_csv(std::basic_istream<CharT>& is, const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>()) + { + basic_csv_cursor<CharT> cursor(is, options); + jsoncons::json_decoder<basic_json<CharT>> decoder; + + std::error_code ec; + T val = decode_traits<T,CharT>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template <class T, class InputIt> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_csv(InputIt first, InputIt last, + const basic_csv_decode_options<typename std::iterator_traits<InputIt>::value_type>& options = + basic_csv_decode_options<typename std::iterator_traits<InputIt>::value_type>()) + { + using char_type = typename std::iterator_traits<InputIt>::value_type; + + jsoncons::json_decoder<T> decoder; + basic_csv_reader<char_type, iterator_source<InputIt>> reader(iterator_source<InputIt>(first,last), decoder, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T, class InputIt> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_csv(InputIt first, InputIt last, + const basic_csv_decode_options<typename std::iterator_traits<InputIt>::value_type>& options = + basic_csv_decode_options<typename std::iterator_traits<InputIt>::value_type>()) + { + using char_type = typename std::iterator_traits<InputIt>::value_type; + + basic_csv_cursor<char_type,iterator_source<InputIt>> cursor(iterator_source<InputIt>(first, last), options); + jsoncons::json_decoder<basic_json<char_type>> decoder; + std::error_code ec; + T val = decode_traits<T,char_type>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + // With leading allocator parameter + + template <class T,class Source,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_sequence_of<Source,typename T::char_type>::value,T>::type + decode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& s, + const basic_csv_decode_options<typename Source::value_type>& options = basic_csv_decode_options<typename Source::value_type>()) + { + using char_type = typename Source::value_type; + + json_decoder<T,TempAllocator> decoder(temp_alloc); + + basic_csv_reader<char_type,jsoncons::string_source<char_type>,TempAllocator> reader(s,decoder,options,temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T,class Source,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_char_sequence<Source>::value,T>::type + decode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& s, + const basic_csv_decode_options<typename Source::value_type>& options = basic_csv_decode_options<typename Source::value_type>()) + { + using char_type = typename Source::value_type; + + basic_csv_cursor<char_type,stream_source<char_type>,TempAllocator> cursor(s, options, temp_alloc); + json_decoder<basic_json<char_type,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char_type>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template <class T,class CharT,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::basic_istream<CharT>& is, + const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>()) + { + using char_type = CharT; + + json_decoder<T,TempAllocator> decoder(temp_alloc); + + basic_csv_reader<char_type,jsoncons::string_source<char_type>,TempAllocator> reader(is,decoder,options,temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T,class CharT,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::basic_istream<CharT>& is, + const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>()) + { + basic_csv_cursor<CharT,stream_source<CharT>,TempAllocator> cursor(is, options, temp_alloc); + json_decoder<basic_json<CharT,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,CharT>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + +} // namespace csv +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/csv/encode_csv.hpp b/include/jsoncons_ext/csv/encode_csv.hpp new file mode 100644 index 0000000..d919253 --- /dev/null +++ b/include/jsoncons_ext/csv/encode_csv.hpp @@ -0,0 +1,122 @@ +/// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_ENCODE_CSV_HPP +#define JSONCONS_CSV_ENCODE_CSV_HPP + +#include <jsoncons_ext/csv/csv_options.hpp> +#include <jsoncons_ext/csv/csv_reader.hpp> +#include <jsoncons_ext/csv/csv_encoder.hpp> + +namespace jsoncons { +namespace csv { + + template <class T,class Container> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_char_container<Container>::value>::type + encode_csv(const T& j, Container& s, const basic_csv_encode_options<typename Container::value_type>& options = basic_csv_encode_options<typename Container::value_type>()) + { + using char_type = typename Container::value_type; + basic_csv_encoder<char_type,jsoncons::string_sink<std::basic_string<char_type>>> encoder(s,options); + j.dump(encoder); + } + + template <class T,class Container> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_char_container<Container>::value>::type + encode_csv(const T& val, Container& s, const basic_csv_encode_options<typename Container::value_type>& options = basic_csv_encode_options<typename Container::value_type>()) + { + using char_type = typename Container::value_type; + basic_csv_encoder<char_type,jsoncons::string_sink<std::basic_string<char_type>>> encoder(s,options); + std::error_code ec; + encode_traits<T,char_type>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template <class T, class CharT> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_csv(const T& j, std::basic_ostream<CharT>& os, const basic_csv_encode_options<CharT>& options = basic_csv_encode_options<CharT>()) + { + using char_type = CharT; + basic_csv_encoder<char_type,jsoncons::stream_sink<char_type>> encoder(os,options); + j.dump(encoder); + } + + template <class T, class CharT> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_csv(const T& val, std::basic_ostream<CharT>& os, const basic_csv_encode_options<CharT>& options = basic_csv_encode_options<CharT>()) + { + using char_type = CharT; + basic_csv_encoder<char_type,jsoncons::stream_sink<char_type>> encoder(os,options); + std::error_code ec; + encode_traits<T,CharT>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + // with temp_allocator_arg_t + + template <class T, class Container, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_char_container<Container>::value>::type + encode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, Container& s, const basic_csv_encode_options<typename Container::value_type>& options = basic_csv_encode_options<typename Container::value_type>()) + { + using char_type = typename Container::value_type; + basic_csv_encoder<char_type,jsoncons::string_sink<std::basic_string<char_type>>,TempAllocator> encoder(s, options, temp_alloc); + j.dump(encoder); + } + + template <class T, class Container, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_char_container<Container>::value>::type + encode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, Container& s, const basic_csv_encode_options<typename Container::value_type>& options = basic_csv_encode_options<typename Container::value_type>()) + { + using char_type = typename Container::value_type; + basic_csv_encoder<char_type,jsoncons::string_sink<std::basic_string<char_type>>,TempAllocator> encoder(s, options, temp_alloc); + std::error_code ec; + encode_traits<T,char_type>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template <class T, class CharT, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, std::basic_ostream<CharT>& os, const basic_csv_encode_options<CharT>& options = basic_csv_encode_options<CharT>()) + { + using char_type = CharT; + basic_csv_encoder<char_type,jsoncons::stream_sink<char_type>,TempAllocator> encoder(os, options, temp_alloc); + j.dump(encoder); + } + + template <class T, class CharT, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, std::basic_ostream<CharT>& os, const basic_csv_encode_options<CharT>& options = basic_csv_encode_options<CharT>()) + { + using char_type = CharT; + basic_csv_encoder<char_type,jsoncons::stream_sink<char_type>,TempAllocator> encoder(os, options, temp_alloc); + std::error_code ec; + encode_traits<T,CharT>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + +} // namespace csv +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/jmespath/jmespath.hpp b/include/jsoncons_ext/jmespath/jmespath.hpp new file mode 100644 index 0000000..69458cd --- /dev/null +++ b/include/jsoncons_ext/jmespath/jmespath.hpp @@ -0,0 +1,5215 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JMESPATH_JMESPATH_HPP +#define JSONCONS_JMESPATH_JMESPATH_HPP + +#include <string> +#include <vector> +#include <unordered_map> // std::unordered_map +#include <memory> +#include <type_traits> // std::is_const +#include <limits> // std::numeric_limits +#include <utility> // std::move +#include <functional> // +#include <algorithm> // std::stable_sort, std::reverse +#include <cmath> // std::abs +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jmespath/jmespath_error.hpp> + +namespace jsoncons { +namespace jmespath { + + enum class operator_kind + { + default_op, // Identifier, CurrentNode, Index, MultiSelectList, MultiSelectHash, FunctionExpression + projection_op, + flatten_projection_op, // FlattenProjection + or_op, + and_op, + eq_op, + ne_op, + lt_op, + lte_op, + gt_op, + gte_op, + not_op + }; + + struct operator_table final + { + static int precedence_level(operator_kind oper) + { + switch (oper) + { + case operator_kind::projection_op: + return 11; + case operator_kind::flatten_projection_op: + return 11; + case operator_kind::or_op: + return 9; + case operator_kind::and_op: + return 8; + case operator_kind::eq_op: + case operator_kind::ne_op: + return 6; + case operator_kind::lt_op: + case operator_kind::lte_op: + case operator_kind::gt_op: + case operator_kind::gte_op: + return 5; + case operator_kind::not_op: + return 1; + default: + return 1; + } + } + + static bool is_right_associative(operator_kind oper) + { + switch (oper) + { + case operator_kind::not_op: + return true; + case operator_kind::projection_op: + return true; + case operator_kind::flatten_projection_op: + return false; + case operator_kind::or_op: + case operator_kind::and_op: + case operator_kind::eq_op: + case operator_kind::ne_op: + case operator_kind::lt_op: + case operator_kind::lte_op: + case operator_kind::gt_op: + case operator_kind::gte_op: + return false; + default: + return false; + } + } + }; + + enum class token_kind + { + current_node, + lparen, + rparen, + begin_multi_select_hash, + end_multi_select_hash, + begin_multi_select_list, + end_multi_select_list, + begin_filter, + end_filter, + pipe, + separator, + key, + literal, + expression, + binary_operator, + unary_operator, + function, + end_function, + argument, + begin_expression_type, + end_expression_type, + end_of_expression + }; + + struct literal_arg_t + { + explicit literal_arg_t() = default; + }; + constexpr literal_arg_t literal_arg{}; + + struct begin_expression_type_arg_t + { + explicit begin_expression_type_arg_t() = default; + }; + constexpr begin_expression_type_arg_t begin_expression_type_arg{}; + + struct end_expression_type_arg_t + { + explicit end_expression_type_arg_t() = default; + }; + constexpr end_expression_type_arg_t end_expression_type_arg{}; + + struct end_of_expression_arg_t + { + explicit end_of_expression_arg_t() = default; + }; + constexpr end_of_expression_arg_t end_of_expression_arg{}; + + struct separator_arg_t + { + explicit separator_arg_t() = default; + }; + constexpr separator_arg_t separator_arg{}; + + struct key_arg_t + { + explicit key_arg_t() = default; + }; + constexpr key_arg_t key_arg{}; + + struct lparen_arg_t + { + explicit lparen_arg_t() = default; + }; + constexpr lparen_arg_t lparen_arg{}; + + struct rparen_arg_t + { + explicit rparen_arg_t() = default; + }; + constexpr rparen_arg_t rparen_arg{}; + + struct begin_multi_select_hash_arg_t + { + explicit begin_multi_select_hash_arg_t() = default; + }; + constexpr begin_multi_select_hash_arg_t begin_multi_select_hash_arg{}; + + struct end_multi_select_hash_arg_t + { + explicit end_multi_select_hash_arg_t() = default; + }; + constexpr end_multi_select_hash_arg_t end_multi_select_hash_arg{}; + + struct begin_multi_select_list_arg_t + { + explicit begin_multi_select_list_arg_t() = default; + }; + constexpr begin_multi_select_list_arg_t begin_multi_select_list_arg{}; + + struct end_multi_select_list_arg_t + { + explicit end_multi_select_list_arg_t() = default; + }; + constexpr end_multi_select_list_arg_t end_multi_select_list_arg{}; + + struct begin_filter_arg_t + { + explicit begin_filter_arg_t() = default; + }; + constexpr begin_filter_arg_t begin_filter_arg{}; + + struct end_filter_arg_t + { + explicit end_filter_arg_t() = default; + }; + constexpr end_filter_arg_t end_filter_arg{}; + + struct pipe_arg_t + { + explicit pipe_arg_t() = default; + }; + constexpr pipe_arg_t pipe_arg{}; + + struct current_node_arg_t + { + explicit current_node_arg_t() = default; + }; + constexpr current_node_arg_t current_node_arg{}; + + struct end_function_arg_t + { + explicit end_function_arg_t() = default; + }; + constexpr end_function_arg_t end_function_arg{}; + + struct argument_arg_t + { + explicit argument_arg_t() = default; + }; + constexpr argument_arg_t argument_arg{}; + + struct slice + { + jsoncons::optional<int64_t> start_; + jsoncons::optional<int64_t> stop_; + int64_t step_; + + slice() + : start_(), stop_(), step_(1) + { + } + + slice(const jsoncons::optional<int64_t>& start, const jsoncons::optional<int64_t>& end, int64_t step) + : start_(start), stop_(end), step_(step) + { + } + + slice(const slice& other) + : start_(other.start_), stop_(other.stop_), step_(other.step_) + { + } + + slice& operator=(const slice& rhs) + { + if (this != &rhs) + { + if (rhs.start_) + { + start_ = rhs.start_; + } + else + { + start_.reset(); + } + if (rhs.stop_) + { + stop_ = rhs.stop_; + } + else + { + stop_.reset(); + } + step_ = rhs.step_; + } + return *this; + } + + int64_t get_start(std::size_t size) const + { + if (start_) + { + auto len = *start_ >= 0 ? *start_ : (static_cast<int64_t>(size) + *start_); + return len <= static_cast<int64_t>(size) ? len : static_cast<int64_t>(size); + } + else + { + if (step_ >= 0) + { + return 0; + } + else + { + return static_cast<int64_t>(size); + } + } + } + + int64_t get_stop(std::size_t size) const + { + if (stop_) + { + auto len = *stop_ >= 0 ? *stop_ : (static_cast<int64_t>(size) + *stop_); + return len <= static_cast<int64_t>(size) ? len : static_cast<int64_t>(size); + } + else + { + return step_ >= 0 ? static_cast<int64_t>(size) : -1; + } + } + + int64_t step() const + { + return step_; // Allow negative + } + }; + + namespace detail { + + enum class path_state + { + start, + lhs_expression, + rhs_expression, + sub_expression, + expression_type, + comparator_expression, + function_expression, + argument, + expression_or_expression_type, + quoted_string, + raw_string, + raw_string_escape_char, + quoted_string_escape_char, + escape_u1, + escape_u2, + escape_u3, + escape_u4, + escape_expect_surrogate_pair1, + escape_expect_surrogate_pair2, + escape_u5, + escape_u6, + escape_u7, + escape_u8, + literal, + key_expr, + val_expr, + identifier_or_function_expr, + unquoted_string, + key_val_expr, + number, + digit, + index_or_slice_expression, + bracket_specifier, + bracket_specifier_or_multi_select_list, + filter, + multi_select_list, + multi_select_hash, + rhs_slice_expression_stop, + rhs_slice_expression_step, + expect_rbracket, + expect_rparen, + expect_dot, + expect_rbrace, + expect_colon, + expect_multi_select_list, + cmp_lt_or_lte, + cmp_eq, + cmp_gt_or_gte, + cmp_ne, + expect_pipe_or_or, + expect_and + }; + + // dynamic_resources + + template<class Json, class JsonReference> + class dynamic_resources + { + typedef typename Json::char_type char_type; + typedef typename Json::char_traits_type char_traits_type; + typedef std::basic_string<char_type,char_traits_type> string_type; + typedef typename Json::string_view_type string_view_type; + typedef JsonReference reference; + using pointer = typename std::conditional<std::is_const<typename std::remove_reference<JsonReference>::type>::value,typename Json::const_pointer,typename Json::pointer>::type; + typedef typename Json::const_pointer const_pointer; + + std::vector<std::unique_ptr<Json>> temp_storage_; + + public: + ~dynamic_resources() + { + } + + reference number_type_name() + { + static Json number_type_name(JSONCONS_STRING_CONSTANT(char_type, "number")); + + return number_type_name; + } + + reference boolean_type_name() + { + static Json boolean_type_name(JSONCONS_STRING_CONSTANT(char_type, "boolean")); + + return boolean_type_name; + } + + reference string_type_name() + { + static Json string_type_name(JSONCONS_STRING_CONSTANT(char_type, "string")); + + return string_type_name; + } + + reference object_type_name() + { + static Json object_type_name(JSONCONS_STRING_CONSTANT(char_type, "object")); + + return object_type_name; + } + + reference array_type_name() + { + static Json array_type_name(JSONCONS_STRING_CONSTANT(char_type, "array")); + + return array_type_name; + } + + reference null_type_name() + { + static Json null_type_name(JSONCONS_STRING_CONSTANT(char_type, "null")); + + return null_type_name; + } + + reference true_value() const + { + static const Json true_value(true, semantic_tag::none); + return true_value; + } + + reference false_value() const + { + static const Json false_value(false, semantic_tag::none); + return false_value; + } + + reference null_value() const + { + static const Json null_value(null_type(), semantic_tag::none); + return null_value; + } + + template <typename... Args> + Json* create_json(Args&& ... args) + { + auto temp = jsoncons::make_unique<Json>(std::forward<Args>(args)...); + Json* ptr = temp.get(); + temp_storage_.emplace_back(std::move(temp)); + return ptr; + } + }; + + template<class Json, class JsonReference> + class jmespath_evaluator + { + public: + typedef typename Json::char_type char_type; + typedef typename Json::char_traits_type char_traits_type; + typedef std::basic_string<char_type,char_traits_type> string_type; + typedef typename Json::string_view_type string_view_type; + typedef JsonReference reference; + using pointer = typename std::conditional<std::is_const<typename std::remove_reference<JsonReference>::type>::value,typename Json::const_pointer,typename Json::pointer>::type; + typedef typename Json::const_pointer const_pointer; + + static bool is_false(reference ref) + { + return (ref.is_array() && ref.empty()) || + (ref.is_object() && ref.empty()) || + (ref.is_string() && ref.as_string_view().size() == 0) || + (ref.is_bool() && !ref.as_bool()) || + ref.is_null(); + } + + static bool is_true(reference ref) + { + return !is_false(ref); + } + + class unary_operator + { + std::size_t precedence_level_; + bool is_right_associative_; + + protected: + ~unary_operator() = default; // virtual destructor not needed + public: + unary_operator(operator_kind oper) + : precedence_level_(operator_table::precedence_level(oper)), + is_right_associative_(operator_table::is_right_associative(oper)) + { + } + + std::size_t precedence_level() const + { + return precedence_level_; + } + bool is_right_associative() const + { + return is_right_associative_; + } + + virtual reference evaluate(reference val, dynamic_resources<Json,JsonReference>&, std::error_code& ec) const = 0; + }; + + class not_expression final : public unary_operator + { + public: + not_expression() + : unary_operator(operator_kind::not_op) + {} + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + return is_false(val) ? resources.true_value() : resources.false_value(); + } + }; + + class binary_operator + { + std::size_t precedence_level_; + bool is_right_associative_; + protected: + ~binary_operator() = default; // virtual destructor not needed + public: + binary_operator(operator_kind oper) + : precedence_level_(operator_table::precedence_level(oper)), + is_right_associative_(operator_table::is_right_associative(oper)) + { + } + + + std::size_t precedence_level() const + { + return precedence_level_; + } + bool is_right_associative() const + { + return is_right_associative_; + } + + virtual reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>&, std::error_code& ec) const = 0; + + virtual std::string to_string(std::size_t indent = 0) const + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("to_string not implemented\n"); + return s; + } + }; + + // expression_base + class expression_base + { + std::size_t precedence_level_; + bool is_right_associative_; + bool is_projection_; + public: + expression_base(operator_kind oper, bool is_projection) + : precedence_level_(operator_table::precedence_level(oper)), + is_right_associative_(operator_table::is_right_associative(oper)), + is_projection_(is_projection) + { + } + + std::size_t precedence_level() const + { + return precedence_level_; + } + + bool is_right_associative() const + { + return is_right_associative_; + } + + bool is_projection() const + { + return is_projection_; + } + + virtual ~expression_base() = default; + + virtual reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const = 0; + + virtual void add_expression(std::unique_ptr<expression_base>&& expressions) = 0; + + virtual std::string to_string(std::size_t = 0) const + { + return std::string("to_string not implemented"); + } + }; + + // parameter + + enum class parameter_kind{value, expression}; + + class parameter + { + parameter_kind type_; + + union + { + expression_base* expression_; + pointer value_; + }; + + public: + + parameter(const parameter& other) noexcept + : type_(other.type_) + { + switch (type_) + { + case parameter_kind::expression: + expression_ = other.expression_; + break; + case parameter_kind::value: + value_ = other.value_; + break; + default: + break; + } + } + + parameter(reference value) noexcept + : type_(parameter_kind::value), value_(std::addressof(value)) + { + } + + parameter(expression_base* expression) noexcept + : type_(parameter_kind::expression), expression_(expression) + { + } + + parameter& operator=(const parameter& other) + { + if (&other != this) + { + type_ = other.type_; + switch (type_) + { + case parameter_kind::expression: + expression_ = other.expression_; + break; + case parameter_kind::value: + value_ = other.value_; + break; + default: + break; + } + } + return *this; + } + + bool is_value() const + { + return type_ == parameter_kind::value; + } + + bool is_expression() const + { + return type_ == parameter_kind::expression; + } + + const Json& value() const + { + return *value_; + } + + const expression_base& expression() const + { + return *expression_; + } + }; + + // function_base + class function_base + { + jsoncons::optional<std::size_t> arg_count_; + public: + function_base(jsoncons::optional<std::size_t> arg_count) + : arg_count_(arg_count) + { + } + + jsoncons::optional<std::size_t> arity() const + { + return arg_count_; + } + + virtual ~function_base() = default; + + virtual reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>&, std::error_code& ec) const = 0; + + virtual std::string to_string(std::size_t = 0) const + { + return std::string("to_string not implemented"); + } + }; + + class abs_function : public function_base + { + public: + abs_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + switch (arg0.type()) + { + case json_type::uint64_value: + return arg0; + case json_type::int64_value: + { + return arg0.template as<int64_t>() >= 0 ? arg0 : *resources.create_json(std::abs(arg0.template as<int64_t>())); + } + case json_type::double_value: + { + return arg0.template as<double>() >= 0 ? arg0 : *resources.create_json(std::abs(arg0.template as<double>())); + } + default: + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + } + } + }; + + class avg_function : public function_base + { + public: + avg_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.empty()) + { + return resources.null_value(); + } + + double sum = 0; + for (auto& j : arg0.array_range()) + { + if (!j.is_number()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + sum += j.template as<double>(); + } + + return sum == 0 ? resources.null_value() : *resources.create_json(sum/arg0.size()); + } + }; + + class ceil_function : public function_base + { + public: + ceil_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + switch (arg0.type()) + { + case json_type::uint64_value: + case json_type::int64_value: + { + return *resources.create_json(arg0.template as<double>()); + } + case json_type::double_value: + { + return *resources.create_json(std::ceil(arg0.template as<double>())); + } + default: + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + } + }; + + class contains_function : public function_base + { + public: + contains_function() + : function_base(2) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!(args[0].is_value() && args[1].is_value())) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + + reference arg0 = args[0].value(); + reference arg1 = args[1].value(); + + switch (arg0.type()) + { + case json_type::array_value: + for (auto& j : arg0.array_range()) + { + if (j == arg1) + { + return resources.true_value(); + } + } + return resources.false_value(); + case json_type::string_value: + { + if (!arg1.is_string()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + auto sv0 = arg0.template as<string_view_type>(); + auto sv1 = arg1.template as<string_view_type>(); + return sv0.find(sv1) != string_view_type::npos ? resources.true_value() : resources.false_value(); + } + default: + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + } + } + }; + + class ends_with_function : public function_base + { + public: + ends_with_function() + : function_base(2) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!(args[0].is_value() && args[1].is_value())) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_string()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg1 = args[1].value(); + if (!arg1.is_string()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + auto sv0 = arg0.template as<string_view_type>(); + auto sv1 = arg1.template as<string_view_type>(); + + if (sv1.length() <= sv0.length() && sv1 == sv0.substr(sv0.length() - sv1.length())) + { + return resources.true_value(); + } + else + { + return resources.false_value(); + } + } + }; + + class floor_function : public function_base + { + public: + floor_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + switch (arg0.type()) + { + case json_type::uint64_value: + case json_type::int64_value: + { + return *resources.create_json(arg0.template as<double>()); + } + case json_type::double_value: + { + return *resources.create_json(std::floor(arg0.template as<double>())); + } + default: + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + } + }; + + class join_function : public function_base + { + public: + join_function() + : function_base(2) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + reference arg0 = args[0].value(); + reference arg1 = args[1].value(); + + if (!(args[0].is_value() && args[1].is_value())) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + if (!arg0.is_string()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (!arg1.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + string_type sep = arg0.template as<string_type>(); + string_type buf; + for (auto& j : arg1.array_range()) + { + if (!j.is_string()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (!buf.empty()) + { + buf.append(sep); + } + auto sv = j.template as<string_view_type>(); + buf.append(sv.begin(), sv.end()); + } + return *resources.create_json(buf); + } + }; + + class length_function : public function_base + { + public: + length_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + + switch (arg0.type()) + { + case json_type::object_value: + case json_type::array_value: + return *resources.create_json(arg0.size()); + case json_type::string_value: + { + auto sv0 = arg0.template as<string_view_type>(); + auto length = unicode_traits::count_codepoints(sv0.data(), sv0.size()); + return *resources.create_json(length); + } + default: + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + } + } + }; + + class max_function : public function_base + { + public: + max_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.empty()) + { + return resources.null_value(); + } + + bool is_number = arg0.at(0).is_number(); + bool is_string = arg0.at(0).is_string(); + if (!is_number && !is_string) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + std::size_t index = 0; + for (std::size_t i = 1; i < arg0.size(); ++i) + { + if (!(arg0.at(i).is_number() == is_number && arg0.at(i).is_string() == is_string)) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.at(i) > arg0.at(index)) + { + index = i; + } + } + + return arg0.at(index); + } + }; + + class max_by_function : public function_base + { + public: + max_by_function() + : function_base(2) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!(args[0].is_value() && args[1].is_expression())) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.empty()) + { + return resources.null_value(); + } + + const auto& expr = args[1].expression(); + + std::error_code ec2; + Json key1 = expr.evaluate(arg0.at(0), resources, ec2); + + bool is_number = key1.is_number(); + bool is_string = key1.is_string(); + if (!(is_number || is_string)) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + std::size_t index = 0; + for (std::size_t i = 1; i < arg0.size(); ++i) + { + reference key2 = expr.evaluate(arg0.at(i), resources, ec2); + if (!(key2.is_number() == is_number && key2.is_string() == is_string)) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (key2 > key1) + { + key1 = key2; + index = i; + } + } + + return arg0.at(index); + } + }; + + class map_function : public function_base + { + public: + map_function() + : function_base(2) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!(args[0].is_expression() && args[1].is_value())) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + const auto& expr = args[0].expression(); + + reference arg0 = args[1].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + auto result = resources.create_json(json_array_arg); + + for (auto& item : arg0.array_range()) + { + auto& j = expr.evaluate(item, resources, ec); + if (ec) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + result->emplace_back(json_const_pointer_arg, std::addressof(j)); + } + + return *result; + } + + std::string to_string(std::size_t = 0) const override + { + return std::string("map_function\n"); + } + }; + + class min_function : public function_base + { + public: + min_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.empty()) + { + return resources.null_value(); + } + + bool is_number = arg0.at(0).is_number(); + bool is_string = arg0.at(0).is_string(); + if (!is_number && !is_string) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + std::size_t index = 0; + for (std::size_t i = 1; i < arg0.size(); ++i) + { + if (!(arg0.at(i).is_number() == is_number && arg0.at(i).is_string() == is_string)) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.at(i) < arg0.at(index)) + { + index = i; + } + } + + return arg0.at(index); + } + }; + + class min_by_function : public function_base + { + public: + min_by_function() + : function_base(2) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!(args[0].is_value() && args[1].is_expression())) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.empty()) + { + return resources.null_value(); + } + + const auto& expr = args[1].expression(); + + std::error_code ec2; + Json key1 = expr.evaluate(arg0.at(0), resources, ec2); + + bool is_number = key1.is_number(); + bool is_string = key1.is_string(); + if (!(is_number || is_string)) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + std::size_t index = 0; + for (std::size_t i = 1; i < arg0.size(); ++i) + { + reference key2 = expr.evaluate(arg0.at(i), resources, ec2); + if (!(key2.is_number() == is_number && key2.is_string() == is_string)) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (key2 < key1) + { + key1 = key2; + index = i; + } + } + + return arg0.at(index); + } + }; + + class merge_function : public function_base + { + public: + merge_function() + : function_base(jsoncons::optional<std::size_t>()) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + if (args.empty()) + { + ec = jmespath_errc::invalid_arity; + return resources.null_value(); + } + + for (auto& param : args) + { + if (!param.is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + } + + reference arg0 = args[0].value(); + if (!arg0.is_object()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (args.size() == 1) + { + return arg0; + } + + auto result = resources.create_json(arg0); + for (std::size_t i = 1; i < args.size(); ++i) + { + reference argi = args[i].value(); + if (!argi.is_object()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + for (auto& item : argi.object_range()) + { + result->insert_or_assign(item.key(),item.value()); + } + } + + return *result; + } + }; + + class type_function : public function_base + { + public: + type_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + + switch (arg0.type()) + { + case json_type::int64_value: + case json_type::uint64_value: + case json_type::double_value: + return resources.number_type_name(); + case json_type::bool_value: + return resources.boolean_type_name(); + case json_type::string_value: + return resources.string_type_name(); + case json_type::object_value: + return resources.object_type_name(); + case json_type::array_value: + return resources.array_type_name(); + default: + return resources.null_type_name(); + break; + + } + } + }; + + class sort_function : public function_base + { + public: + sort_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.size() <= 1) + { + return arg0; + } + + bool is_number = arg0.at(0).is_number(); + bool is_string = arg0.at(0).is_string(); + if (!is_number && !is_string) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + for (std::size_t i = 1; i < arg0.size(); ++i) + { + if (arg0.at(i).is_number() != is_number || arg0.at(i).is_string() != is_string) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + } + + auto v = resources.create_json(arg0); + std::stable_sort((v->array_range()).begin(), (v->array_range()).end()); + return *v; + } + }; + + class sort_by_function : public function_base + { + public: + sort_by_function() + : function_base(2) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!(args[0].is_value() && args[1].is_expression())) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + if (arg0.size() <= 1) + { + return arg0; + } + + const auto& expr = args[1].expression(); + + auto v = resources.create_json(arg0); + std::stable_sort((v->array_range()).begin(), (v->array_range()).end(), + [&expr,&resources,&ec](reference lhs, reference rhs) -> bool + { + std::error_code ec2; + reference key1 = expr.evaluate(lhs, resources, ec2); + bool is_number = key1.is_number(); + bool is_string = key1.is_string(); + if (!(is_number || is_string)) + { + ec = jmespath_errc::invalid_type; + } + + reference key2 = expr.evaluate(rhs, resources, ec2); + if (!(key2.is_number() == is_number && key2.is_string() == is_string)) + { + ec = jmespath_errc::invalid_type; + } + + return key1 < key2; + }); + return ec ? resources.null_value() : *v; + } + + std::string to_string(std::size_t = 0) const override + { + return std::string("sort_by_function\n"); + } + }; + + class keys_function final : public function_base + { + public: + keys_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_object()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + auto result = resources.create_json(json_array_arg); + result->reserve(args.size()); + + for (auto& item : arg0.object_range()) + { + result->emplace_back(item.key()); + } + return *result; + } + }; + + class values_function final : public function_base + { + public: + values_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_object()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + auto result = resources.create_json(json_array_arg); + result->reserve(args.size()); + + for (auto& item : arg0.object_range()) + { + result->emplace_back(item.value()); + } + return *result; + } + }; + + class reverse_function final : public function_base + { + public: + reverse_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + switch (arg0.type()) + { + case json_type::string_value: + { + string_view_type sv = arg0.as_string_view(); + std::basic_string<char32_t> buf; + unicode_traits::convert(sv.data(), sv.size(), buf); + std::reverse(buf.begin(), buf.end()); + string_type s; + unicode_traits::convert(buf.data(), buf.size(), s); + return *resources.create_json(s); + } + case json_type::array_value: + { + auto result = resources.create_json(arg0); + std::reverse(result->array_range().begin(),result->array_range().end()); + return *result; + } + default: + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + } + }; + + class starts_with_function : public function_base + { + public: + starts_with_function() + : function_base(2) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!(args[0].is_value() && args[1].is_value())) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_string()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg1 = args[1].value(); + if (!arg1.is_string()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + auto sv0 = arg0.template as<string_view_type>(); + auto sv1 = arg1.template as<string_view_type>(); + + if (sv1.length() <= sv0.length() && sv1 == sv0.substr(0, sv1.length())) + { + return resources.true_value(); + } + else + { + return resources.false_value(); + } + } + }; + + class sum_function : public function_base + { + public: + sum_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (!arg0.is_array()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + double sum = 0; + for (auto& j : arg0.array_range()) + { + if (!j.is_number()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + sum += j.template as<double>(); + } + + return *resources.create_json(sum); + } + }; + + class to_array_function final : public function_base + { + public: + to_array_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + if (arg0.is_array()) + { + return arg0; + } + else + { + auto result = resources.create_json(json_array_arg); + result->push_back(arg0); + return *result; + } + } + + std::string to_string(std::size_t = 0) const override + { + return std::string("to_array_function\n"); + } + }; + + class to_number_function final : public function_base + { + public: + to_number_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + switch (arg0.type()) + { + case json_type::int64_value: + case json_type::uint64_value: + case json_type::double_value: + return arg0; + case json_type::string_value: + { + auto sv = arg0.as_string_view(); + uint64_t uval{ 0 }; + auto result1 = jsoncons::detail::to_integer(sv.data(), sv.length(), uval); + if (result1) + { + return *resources.create_json(uval); + } + int64_t sval{ 0 }; + auto result2 = jsoncons::detail::to_integer(sv.data(), sv.length(), sval); + if (result2) + { + return *resources.create_json(sval); + } + jsoncons::detail::chars_to to_double; + try + { + auto s = arg0.as_string(); + double d = to_double(s.c_str(), s.length()); + return *resources.create_json(d); + } + catch (const std::exception&) + { + return resources.null_value(); + } + } + default: + return resources.null_value(); + } + } + + std::string to_string(std::size_t = 0) const override + { + return std::string("to_number_function\n"); + } + }; + + class to_string_function final : public function_base + { + public: + to_string_function() + : function_base(1) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + JSONCONS_ASSERT(args.size() == *this->arity()); + + if (!args[0].is_value()) + { + ec = jmespath_errc::invalid_type; + return resources.null_value(); + } + + reference arg0 = args[0].value(); + return *resources.create_json(arg0.template as<string_type>()); + } + + std::string to_string(std::size_t = 0) const override + { + return std::string("to_string_function\n"); + } + }; + + class not_null_function final : public function_base + { + public: + not_null_function() + : function_base(jsoncons::optional<std::size_t>()) + { + } + + reference evaluate(std::vector<parameter>& args, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + for (auto& param : args) + { + if (param.is_value() && !param.value().is_null()) + { + return param.value(); + } + } + return resources.null_value(); + } + + std::string to_string(std::size_t = 0) const override + { + return std::string("to_string_function\n"); + } + }; + + // token + + class token + { + public: + token_kind type_; + + union + { + std::unique_ptr<expression_base> expression_; + const unary_operator* unary_operator_; + const binary_operator* binary_operator_; + const function_base* function_; + Json value_; + string_type key_; + }; + public: + + token(current_node_arg_t) noexcept + : type_(token_kind::current_node) + { + } + + token(end_function_arg_t) noexcept + : type_(token_kind::end_function) + { + } + + token(separator_arg_t) noexcept + : type_(token_kind::separator) + { + } + + token(lparen_arg_t) noexcept + : type_(token_kind::lparen) + { + } + + token(rparen_arg_t) noexcept + : type_(token_kind::rparen) + { + } + + token(end_of_expression_arg_t) noexcept + : type_(token_kind::end_of_expression) + { + } + + token(begin_multi_select_hash_arg_t) noexcept + : type_(token_kind::begin_multi_select_hash) + { + } + + token(end_multi_select_hash_arg_t) noexcept + : type_(token_kind::end_multi_select_hash) + { + } + + token(begin_multi_select_list_arg_t) noexcept + : type_(token_kind::begin_multi_select_list) + { + } + + token(end_multi_select_list_arg_t) noexcept + : type_(token_kind::end_multi_select_list) + { + } + + token(begin_filter_arg_t) noexcept + : type_(token_kind::begin_filter) + { + } + + token(end_filter_arg_t) noexcept + : type_(token_kind::end_filter) + { + } + + token(pipe_arg_t) noexcept + : type_(token_kind::pipe) + { + } + + token(key_arg_t, const string_type& key) + : type_(token_kind::key) + { + new (&key_) string_type(key); + } + + token(std::unique_ptr<expression_base>&& expression) + : type_(token_kind::expression) + { + new (&expression_) std::unique_ptr<expression_base>(std::move(expression)); + } + + token(const unary_operator* expression) noexcept + : type_(token_kind::unary_operator), + unary_operator_(expression) + { + } + + token(const binary_operator* expression) noexcept + : type_(token_kind::binary_operator), + binary_operator_(expression) + { + } + + token(const function_base* function) noexcept + : type_(token_kind::function), + function_(function) + { + } + + token(argument_arg_t) noexcept + : type_(token_kind::argument) + { + } + + token(begin_expression_type_arg_t) noexcept + : type_(token_kind::begin_expression_type) + { + } + + token(end_expression_type_arg_t) noexcept + : type_(token_kind::end_expression_type) + { + } + + token(literal_arg_t, Json&& value) noexcept + : type_(token_kind::literal), value_(std::move(value)) + { + } + + token(token&& other) noexcept + { + construct(std::forward<token>(other)); + } + + token& operator=(token&& other) + { + if (&other != this) + { + if (type_ == other.type_) + { + switch (type_) + { + case token_kind::expression: + expression_ = std::move(other.expression_); + break; + case token_kind::key: + key_ = std::move(other.key_); + break; + case token_kind::unary_operator: + unary_operator_ = other.unary_operator_; + break; + case token_kind::binary_operator: + binary_operator_ = other.binary_operator_; + break; + case token_kind::function: + function_ = other.function_; + break; + case token_kind::literal: + value_ = std::move(other.value_); + break; + default: + break; + } + } + else + { + destroy(); + construct(std::forward<token>(other)); + } + } + return *this; + } + + ~token() noexcept + { + destroy(); + } + + token_kind type() const + { + return type_; + } + + bool is_lparen() const + { + return type_ == token_kind::lparen; + } + + bool is_lbrace() const + { + return type_ == token_kind::begin_multi_select_hash; + } + + bool is_key() const + { + return type_ == token_kind::key; + } + + bool is_rparen() const + { + return type_ == token_kind::rparen; + } + + bool is_current_node() const + { + return type_ == token_kind::current_node; + } + + bool is_projection() const + { + return type_ == token_kind::expression && expression_->is_projection(); + } + + bool is_expression() const + { + return type_ == token_kind::expression; + } + + bool is_operator() const + { + return type_ == token_kind::unary_operator || + type_ == token_kind::binary_operator; + } + + std::size_t precedence_level() const + { + switch(type_) + { + case token_kind::unary_operator: + return unary_operator_->precedence_level(); + case token_kind::binary_operator: + return binary_operator_->precedence_level(); + case token_kind::expression: + return expression_->precedence_level(); + default: + return 0; + } + } + + jsoncons::optional<std::size_t> arity() const + { + return type_ == token_kind::function ? function_->arity() : jsoncons::optional<std::size_t>(); + } + + bool is_right_associative() const + { + switch(type_) + { + case token_kind::unary_operator: + return unary_operator_->is_right_associative(); + case token_kind::binary_operator: + return binary_operator_->is_right_associative(); + case token_kind::expression: + return expression_->is_right_associative(); + default: + return false; + } + } + + void construct(token&& other) + { + type_ = other.type_; + switch (type_) + { + case token_kind::expression: + new (&expression_) std::unique_ptr<expression_base>(std::move(other.expression_)); + break; + case token_kind::key: + new (&key_) string_type(std::move(other.key_)); + break; + case token_kind::unary_operator: + unary_operator_ = other.unary_operator_; + break; + case token_kind::binary_operator: + binary_operator_ = other.binary_operator_; + break; + case token_kind::function: + function_ = other.function_; + break; + case token_kind::literal: + new (&value_) Json(std::move(other.value_)); + break; + default: + break; + } + } + + void destroy() noexcept + { + switch(type_) + { + case token_kind::expression: + expression_.~unique_ptr(); + break; + case token_kind::key: + key_.~basic_string(); + break; + case token_kind::literal: + value_.~Json(); + break; + default: + break; + } + } + + std::string to_string(std::size_t indent = 0) const + { + switch(type_) + { + case token_kind::expression: + return expression_->to_string(indent); + break; + case token_kind::unary_operator: + return std::string("unary_operator"); + break; + case token_kind::binary_operator: + return binary_operator_->to_string(indent); + break; + case token_kind::current_node: + return std::string("current_node"); + break; + case token_kind::end_function: + return std::string("end_function"); + break; + case token_kind::separator: + return std::string("separator"); + break; + case token_kind::literal: + return std::string("literal"); + break; + case token_kind::key: + return std::string("key") + key_; + break; + case token_kind::begin_multi_select_hash: + return std::string("begin_multi_select_hash"); + break; + case token_kind::begin_multi_select_list: + return std::string("begin_multi_select_list"); + break; + case token_kind::begin_filter: + return std::string("begin_filter"); + break; + case token_kind::pipe: + return std::string("pipe"); + break; + case token_kind::lparen: + return std::string("lparen"); + break; + case token_kind::function: + return function_->to_string(); + case token_kind::argument: + return std::string("argument"); + break; + case token_kind::begin_expression_type: + return std::string("begin_expression_type"); + break; + case token_kind::end_expression_type: + return std::string("end_expression_type"); + break; + default: + return std::string("default"); + break; + } + } + }; + + static pointer evaluate_tokens(reference doc, const std::vector<token>& output_stack, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) + { + pointer root_ptr = std::addressof(doc); + std::vector<parameter> stack; + std::vector<parameter> arg_stack; + for (std::size_t i = 0; i < output_stack.size(); ++i) + { + auto& t = output_stack[i]; + switch (t.type()) + { + case token_kind::literal: + { + stack.emplace_back(t.value_); + break; + } + case token_kind::begin_expression_type: + { + JSONCONS_ASSERT(i+1 < output_stack.size()); + ++i; + JSONCONS_ASSERT(output_stack[i].is_expression()); + JSONCONS_ASSERT(!stack.empty()); + stack.pop_back(); + stack.emplace_back(output_stack[i].expression_.get()); + break; + } + case token_kind::pipe: + { + JSONCONS_ASSERT(!stack.empty()); + root_ptr = std::addressof(stack.back().value()); + break; + } + case token_kind::current_node: + stack.emplace_back(*root_ptr); + break; + case token_kind::expression: + { + JSONCONS_ASSERT(!stack.empty()); + pointer ptr = std::addressof(stack.back().value()); + stack.pop_back(); + auto& ref = t.expression_->evaluate(*ptr, resources, ec); + stack.emplace_back(ref); + break; + } + case token_kind::unary_operator: + { + JSONCONS_ASSERT(stack.size() >= 1); + pointer ptr = std::addressof(stack.back().value()); + stack.pop_back(); + reference r = t.unary_operator_->evaluate(*ptr, resources, ec); + stack.emplace_back(r); + break; + } + case token_kind::binary_operator: + { + JSONCONS_ASSERT(stack.size() >= 2); + pointer rhs = std::addressof(stack.back().value()); + stack.pop_back(); + pointer lhs = std::addressof(stack.back().value()); + stack.pop_back(); + reference r = t.binary_operator_->evaluate(*lhs,*rhs, resources, ec); + stack.emplace_back(r); + break; + } + case token_kind::argument: + { + JSONCONS_ASSERT(!stack.empty()); + arg_stack.push_back(std::move(stack.back())); + stack.pop_back(); + break; + } + case token_kind::function: + { + if (t.function_->arity() && *(t.function_->arity()) != arg_stack.size()) + { + ec = jmespath_errc::invalid_arity; + return std::addressof(resources.null_value()); + } + + reference r = t.function_->evaluate(arg_stack, resources, ec); + if (ec) + { + return std::addressof(resources.null_value()); + } + arg_stack.clear(); + stack.emplace_back(r); + break; + } + default: + break; + } + } + JSONCONS_ASSERT(stack.size() == 1); + return std::addressof(stack.back().value()); + } + + // Implementations + + class or_operator final : public binary_operator + { + public: + or_operator() + : binary_operator(operator_kind::or_op) + { + } + + reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + if (lhs.is_null() && rhs.is_null()) + { + return resources.null_value(); + } + if (!is_false(lhs)) + { + return lhs; + } + else + { + return rhs; + } + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("or_operator\n"); + return s; + } + }; + + class and_operator final : public binary_operator + { + public: + and_operator() + : binary_operator(operator_kind::and_op) + { + } + + reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>&, std::error_code&) const override + { + if (is_true(lhs)) + { + return rhs; + } + else + { + return lhs; + } + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("and_operator\n"); + return s; + } + }; + + class eq_operator final : public binary_operator + { + public: + eq_operator() + : binary_operator(operator_kind::eq_op) + { + } + + reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + return lhs == rhs ? resources.true_value() : resources.false_value(); + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("eq_operator\n"); + return s; + } + }; + + class ne_operator final : public binary_operator + { + public: + ne_operator() + : binary_operator(operator_kind::ne_op) + { + } + + reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + return lhs != rhs ? resources.true_value() : resources.false_value(); + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("ne_operator\n"); + return s; + } + }; + + class lt_operator final : public binary_operator + { + public: + lt_operator() + : binary_operator(operator_kind::lt_op) + { + } + + reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + if (!(lhs.is_number() && rhs.is_number())) + { + return resources.null_value(); + } + return lhs < rhs ? resources.true_value() : resources.false_value(); + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("lt_operator\n"); + return s; + } + }; + + class lte_operator final : public binary_operator + { + public: + lte_operator() + : binary_operator(operator_kind::lte_op) + { + } + + reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + if (!(lhs.is_number() && rhs.is_number())) + { + return resources.null_value(); + } + return lhs <= rhs ? resources.true_value() : resources.false_value(); + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("lte_operator\n"); + return s; + } + }; + + class gt_operator final : public binary_operator + { + public: + gt_operator() + : binary_operator(operator_kind::gt_op) + { + } + + reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + if (!(lhs.is_number() && rhs.is_number())) + { + return resources.null_value(); + } + return lhs > rhs ? resources.true_value() : resources.false_value(); + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("gt_operator\n"); + return s; + } + }; + + class gte_operator final : public binary_operator + { + public: + gte_operator() + : binary_operator(operator_kind::gte_op) + { + } + + reference evaluate(reference lhs, reference rhs, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + if (!(lhs.is_number() && rhs.is_number())) + { + return resources.null_value(); + } + return lhs >= rhs ? resources.true_value() : resources.false_value(); + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("gte_operator\n"); + return s; + } + }; + + // basic_expression + class basic_expression : public expression_base + { + public: + basic_expression() + : expression_base(operator_kind::default_op, false) + { + } + + void add_expression(std::unique_ptr<expression_base>&&) override + { + } + }; + + class identifier_selector final : public basic_expression + { + private: + string_type identifier_; + public: + identifier_selector(const string_view_type& name) + : identifier_(name) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + //std::cout << "(identifier_selector " << identifier_ << " ) " << pretty_print(val) << "\n"; + if (val.is_object() && val.contains(identifier_)) + { + return val.at(identifier_); + } + else + { + return resources.null_value(); + } + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("identifier_selector "); + s.append(identifier_); + return s; + } + }; + + class current_node final : public basic_expression + { + public: + current_node() + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>&, std::error_code&) const override + { + return val; + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("current_node "); + return s; + } + }; + + class index_selector final : public basic_expression + { + int64_t index_; + public: + index_selector(int64_t index) + : index_(index) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code&) const override + { + if (!val.is_array()) + { + return resources.null_value(); + } + int64_t slen = static_cast<int64_t>(val.size()); + if (index_ >= 0 && index_ < slen) + { + std::size_t index = static_cast<std::size_t>(index_); + return val.at(index); + } + else if ((slen + index_) >= 0 && (slen+index_) < slen) + { + std::size_t index = static_cast<std::size_t>(slen + index_); + return val.at(index); + } + else + { + return resources.null_value(); + } + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("index_selector "); + s.append(std::to_string(index_)); + return s; + } + }; + + // projection_base + class projection_base : public expression_base + { + protected: + std::vector<std::unique_ptr<expression_base>> expressions_; + public: + projection_base(operator_kind oper) + : expression_base(oper, true) + { + } + + void add_expression(std::unique_ptr<expression_base>&& expr) override + { + if (!expressions_.empty() && expressions_.back()->is_projection() && + (expr->precedence_level() < expressions_.back()->precedence_level() || + (expr->precedence_level() == expressions_.back()->precedence_level() && expr->is_right_associative()))) + { + expressions_.back()->add_expression(std::move(expr)); + } + else + { + expressions_.emplace_back(std::move(expr)); + } + } + + reference apply_expressions(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const + { + pointer ptr = std::addressof(val); + for (auto& expression : expressions_) + { + ptr = std::addressof(expression->evaluate(*ptr, resources, ec)); + } + return *ptr; + } + }; + + class object_projection final : public projection_base + { + public: + object_projection() + : projection_base(operator_kind::projection_op) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + if (!val.is_object()) + { + return resources.null_value(); + } + + auto result = resources.create_json(json_array_arg); + for (auto& item : val.object_range()) + { + if (!item.value().is_null()) + { + reference j = this->apply_expressions(item.value(), resources, ec); + if (!j.is_null()) + { + result->emplace_back(json_const_pointer_arg, std::addressof(j)); + } + } + } + return *result; + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("object_projection\n"); + for (auto& expr : this->expressions_) + { + std::string sss = expr->to_string(indent+2); + s.insert(s.end(), sss.begin(), sss.end()); + s.push_back('\n'); + } + return s; + } + }; + + class list_projection final : public projection_base + { + public: + list_projection() + : projection_base(operator_kind::projection_op) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + if (!val.is_array()) + { + return resources.null_value(); + } + + auto result = resources.create_json(json_array_arg); + for (reference item : val.array_range()) + { + if (!item.is_null()) + { + reference j = this->apply_expressions(item, resources, ec); + if (!j.is_null()) + { + result->emplace_back(json_const_pointer_arg, std::addressof(j)); + } + } + } + return *result; + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("list_projection\n"); + for (auto& expr : this->expressions_) + { + std::string sss = expr->to_string(indent+2); + s.insert(s.end(), sss.begin(), sss.end()); + s.push_back('\n'); + } + return s; + } + }; + + class slice_projection final : public projection_base + { + slice slice_; + public: + slice_projection(const slice& s) + : projection_base(operator_kind::projection_op), slice_(s) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + if (!val.is_array()) + { + return resources.null_value(); + } + + auto start = slice_.get_start(val.size()); + auto end = slice_.get_stop(val.size()); + auto step = slice_.step(); + + if (step == 0) + { + ec = jmespath_errc::step_cannot_be_zero; + return resources.null_value(); + } + + auto result = resources.create_json(json_array_arg); + if (step > 0) + { + if (start < 0) + { + start = 0; + } + if (end > static_cast<int64_t>(val.size())) + { + end = val.size(); + } + for (int64_t i = start; i < end; i += step) + { + reference j = this->apply_expressions(val.at(static_cast<std::size_t>(i)), resources, ec); + if (!j.is_null()) + { + result->emplace_back(json_const_pointer_arg, std::addressof(j)); + } + } + } + else + { + if (start >= static_cast<int64_t>(val.size())) + { + start = static_cast<int64_t>(val.size()) - 1; + } + if (end < -1) + { + end = -1; + } + for (int64_t i = start; i > end; i += step) + { + reference j = this->apply_expressions(val.at(static_cast<std::size_t>(i)), resources, ec); + if (!j.is_null()) + { + result->emplace_back(json_const_pointer_arg, std::addressof(j)); + } + } + } + + return *result; + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("slice_projection\n"); + for (auto& expr : this->expressions_) + { + std::string sss = expr->to_string(indent+2); + s.insert(s.end(), sss.begin(), sss.end()); + s.push_back('\n'); + } + return s; + } + }; + + class filter_expression final : public projection_base + { + std::vector<token> token_list_; + public: + filter_expression(std::vector<token>&& token_list) + : projection_base(operator_kind::projection_op), token_list_(std::move(token_list)) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + if (!val.is_array()) + { + return resources.null_value(); + } + auto result = resources.create_json(json_array_arg); + + for (auto& item : val.array_range()) + { + Json j(json_const_pointer_arg, evaluate_tokens(item, token_list_, resources, ec)); + if (is_true(j)) + { + reference jj = this->apply_expressions(item, resources, ec); + if (!jj.is_null()) + { + result->emplace_back(json_const_pointer_arg, std::addressof(jj)); + } + } + } + return *result; + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("filter_expression\n"); + for (auto& item : token_list_) + { + std::string sss = item.to_string(indent+2); + s.insert(s.end(), sss.begin(), sss.end()); + s.push_back('\n'); + } + return s; + } + }; + + class flatten_projection final : public projection_base + { + public: + flatten_projection() + : projection_base(operator_kind::flatten_projection_op) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + if (!val.is_array()) + { + return resources.null_value(); + } + + auto result = resources.create_json(json_array_arg); + for (reference current_elem : val.array_range()) + { + if (current_elem.is_array()) + { + for (reference elem : current_elem.array_range()) + { + if (!elem.is_null()) + { + reference j = this->apply_expressions(elem, resources, ec); + if (!j.is_null()) + { + result->emplace_back(json_const_pointer_arg, std::addressof(j)); + } + } + } + } + else + { + if (!current_elem.is_null()) + { + reference j = this->apply_expressions(current_elem, resources, ec); + if (!j.is_null()) + { + result->emplace_back(json_const_pointer_arg, std::addressof(j)); + } + } + } + } + return *result; + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("flatten_projection\n"); + for (auto& expr : this->expressions_) + { + std::string sss = expr->to_string(indent+2); + s.insert(s.end(), sss.begin(), sss.end()); + s.push_back('\n'); + } + return s; + } + }; + + class multi_select_list final : public basic_expression + { + std::vector<std::vector<token>> token_lists_; + public: + multi_select_list(std::vector<std::vector<token>>&& token_lists) + : token_lists_(std::move(token_lists)) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + if (val.is_null()) + { + return val; + } + auto result = resources.create_json(json_array_arg); + result->reserve(token_lists_.size()); + + for (auto& list : token_lists_) + { + result->emplace_back(json_const_pointer_arg, evaluate_tokens(val, list, resources, ec)); + } + return *result; + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("multi_select_list\n"); + for (auto& list : token_lists_) + { + for (auto& item : list) + { + std::string sss = item.to_string(indent+2); + s.insert(s.end(), sss.begin(), sss.end()); + s.push_back('\n'); + } + s.append("---\n"); + } + return s; + } + }; + + struct key_tokens + { + string_type key; + std::vector<token> tokens; + + key_tokens(string_type&& key, std::vector<token>&& tokens) noexcept + : key(std::move(key)), tokens(std::move(tokens)) + { + } + }; + + class multi_select_hash final : public basic_expression + { + public: + std::vector<key_tokens> key_toks_; + + multi_select_hash(std::vector<key_tokens>&& key_toks) + : key_toks_(std::move(key_toks)) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + if (val.is_null()) + { + return val; + } + auto resultp = resources.create_json(json_object_arg); + resultp->reserve(key_toks_.size()); + for (auto& item : key_toks_) + { + resultp->try_emplace(item.key, json_const_pointer_arg, evaluate_tokens(val, item.tokens, resources, ec)); + } + + return *resultp; + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("multi_select_list\n"); + return s; + } + }; + + class function_expression final : public basic_expression + { + public: + std::vector<token> toks_; + + function_expression(std::vector<token>&& toks) + : toks_(std::move(toks)) + { + } + + reference evaluate(reference val, dynamic_resources<Json,JsonReference>& resources, std::error_code& ec) const override + { + return *evaluate_tokens(val, toks_, resources, ec); + } + + std::string to_string(std::size_t indent = 0) const override + { + std::string s; + for (std::size_t i = 0; i <= indent; ++i) + { + s.push_back(' '); + } + s.append("function_expression\n"); + for (auto& tok : toks_) + { + for (std::size_t i = 0; i <= indent+2; ++i) + { + s.push_back(' '); + } + std::string sss = tok.to_string(indent+2); + s.insert(s.end(), sss.begin(), sss.end()); + s.push_back('\n'); + } + return s; + } + }; + + class static_resources + { + std::vector<std::unique_ptr<Json>> temp_storage_; + + public: + + static_resources() = default; + static_resources(const static_resources& expr) = delete; + static_resources& operator=(const static_resources& expr) = delete; + static_resources(static_resources&& expr) = default; + static_resources& operator=(static_resources&& expr) = default; + + const function_base* get_function(const string_type& name, std::error_code& ec) const + { + static abs_function abs_func; + static avg_function avg_func; + static ceil_function ceil_func; + static contains_function contains_func; + static ends_with_function ends_with_func; + static floor_function floor_func; + static join_function join_func; + static length_function length_func; + static max_function max_func; + static max_by_function max_by_func; + static map_function map_func; + static merge_function merge_func; + static min_function min_func; + static min_by_function min_by_func; + static type_function type_func; + static sort_function sort_func; + static sort_by_function sort_by_func; + static keys_function keys_func; + static values_function values_func; + static reverse_function reverse_func; + static starts_with_function starts_with_func; + static const sum_function sum_func; + static to_array_function to_array_func; + static to_number_function to_number_func; + static to_string_function to_string_func; + static not_null_function not_null_func; + + using function_dictionary = std::unordered_map<string_type,const function_base*>; + static const function_dictionary functions_ = + { + {string_type{'a','b','s'}, &abs_func}, + {string_type{'a','v','g'}, &avg_func}, + {string_type{'c','e','i', 'l'}, &ceil_func}, + {string_type{'c','o','n', 't', 'a', 'i', 'n', 's'}, &contains_func}, + {string_type{'e','n','d', 's', '_', 'w', 'i', 't', 'h'}, &ends_with_func}, + {string_type{'f','l','o', 'o', 'r'}, &floor_func}, + {string_type{'j','o','i', 'n'}, &join_func}, + {string_type{'l','e','n', 'g', 't', 'h'}, &length_func}, + {string_type{'m','a','x'}, &max_func}, + {string_type{'m','a','x','_','b','y'}, &max_by_func}, + {string_type{'m','a','p'}, &map_func}, + {string_type{'m','i','n'}, &min_func}, + {string_type{'m','i','n','_','b','y'}, &min_by_func}, + {string_type{'m','e','r', 'g', 'e'}, &merge_func}, + {string_type{'t','y','p', 'e'}, &type_func}, + {string_type{'s','o','r', 't'}, &sort_func}, + {string_type{'s','o','r', 't','_','b','y'}, &sort_by_func}, + {string_type{'k','e','y', 's'}, &keys_func}, + {string_type{'v','a','l', 'u','e','s'}, &values_func}, + {string_type{'r','e','v', 'e', 'r', 's','e'}, &reverse_func}, + {string_type{'s','t','a', 'r','t','s','_','w','i','t','h'}, &starts_with_func}, + {string_type{'s','u','m'}, &sum_func}, + {string_type{'t','o','_','a','r','r','a','y',}, &to_array_func}, + {string_type{'t','o','_', 'n', 'u', 'm','b','e','r'}, &to_number_func}, + {string_type{'t','o','_', 's', 't', 'r','i','n','g'}, &to_string_func}, + {string_type{'n','o','t', '_', 'n', 'u','l','l'}, ¬_null_func} + }; + auto it = functions_.find(name); + if (it == functions_.end()) + { + ec = jmespath_errc::unknown_function; + return nullptr; + } + return it->second; + } + + const unary_operator* get_not_operator() const + { + static const not_expression not_oper; + + return ¬_oper; + } + + const binary_operator* get_or_operator() const + { + static const or_operator or_oper; + + return &or_oper; + } + + const binary_operator* get_and_operator() const + { + static const and_operator and_oper; + + return &and_oper; + } + + const binary_operator* get_eq_operator() const + { + static const eq_operator eq_oper; + return &eq_oper; + } + + const binary_operator* get_ne_operator() const + { + static const ne_operator ne_oper; + return &ne_oper; + } + + const binary_operator* get_lt_operator() const + { + static const lt_operator lt_oper; + return <_oper; + } + + const binary_operator* get_lte_operator() const + { + static const lte_operator lte_oper; + return <e_oper; + } + + const binary_operator* get_gt_operator() const + { + static const gt_operator gt_oper; + return >_oper; + } + + const binary_operator* get_gte_operator() const + { + static const gte_operator gte_oper; + return >e_oper; + } + }; + + class jmespath_expression + { + static_resources resources_; + std::vector<token> output_stack_; + public: + jmespath_expression() + { + } + + jmespath_expression(const jmespath_expression& expr) = delete; + jmespath_expression& operator=(const jmespath_expression& expr) = delete; + + jmespath_expression(jmespath_expression&& expr) + : resources_(std::move(expr.resources_)), + output_stack_(std::move(expr.output_stack_)) + { + } + + jmespath_expression(static_resources&& resources, + std::vector<token>&& output_stack) + : resources_(std::move(resources)), output_stack_(std::move(output_stack)) + { + } + + Json evaluate(reference doc) + { + if (output_stack_.empty()) + { + return Json::null(); + } + std::error_code ec; + Json result = evaluate(doc, ec); + if (ec) + { + JSONCONS_THROW(jmespath_error(ec)); + } + return result; + } + + Json evaluate(reference doc, std::error_code& ec) + { + if (output_stack_.empty()) + { + return Json::null(); + } + dynamic_resources<Json,JsonReference> dynamic_storage; + return deep_copy(*evaluate_tokens(doc, output_stack_, dynamic_storage, ec)); + } + + static jmespath_expression compile(const string_view_type& expr) + { + jsoncons::jmespath::detail::jmespath_evaluator<Json,const Json&> evaluator; + std::error_code ec; + jmespath_expression result = evaluator.compile(expr.data(), expr.size(), ec); + if (ec) + { + JSONCONS_THROW(jmespath_error(ec, evaluator.line(), evaluator.column())); + } + return result; + } + + static jmespath_expression compile(const string_view_type& expr, + std::error_code& ec) + { + jsoncons::jmespath::detail::jmespath_evaluator<Json,const Json&> evaluator; + return evaluator.compile(expr.data(), expr.size(), ec); + } + }; + private: + std::size_t line_; + std::size_t column_; + const char_type* begin_input_; + const char_type* end_input_; + const char_type* p_; + + static_resources resources_; + std::vector<path_state> state_stack_; + + std::vector<token> output_stack_; + std::vector<token> operator_stack_; + + public: + jmespath_evaluator() + : line_(1), column_(1), + begin_input_(nullptr), end_input_(nullptr), + p_(nullptr) + { + } + + std::size_t line() const + { + return line_; + } + + std::size_t column() const + { + return column_; + } + + jmespath_expression compile(const char_type* path, + std::size_t length, + std::error_code& ec) + { + push_token(current_node_arg, ec); + if (ec) {return jmespath_expression();} + state_stack_.emplace_back(path_state::start); + + string_type buffer; + uint32_t cp = 0; + uint32_t cp2 = 0; + + begin_input_ = path; + end_input_ = path + length; + p_ = begin_input_; + + slice slic{}; + + while (p_ < end_input_) + { + switch (state_stack_.back()) + { + case path_state::start: + { + state_stack_.back() = path_state::rhs_expression; + state_stack_.emplace_back(path_state::lhs_expression); + break; + } + case path_state::rhs_expression: + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case '.': + ++p_; + ++column_; + state_stack_.emplace_back(path_state::sub_expression); + break; + case '|': + ++p_; + ++column_; + state_stack_.emplace_back(path_state::lhs_expression); + state_stack_.emplace_back(path_state::expect_pipe_or_or); + break; + case '&': + ++p_; + ++column_; + state_stack_.emplace_back(path_state::lhs_expression); + state_stack_.emplace_back(path_state::expect_and); + break; + case '<': + case '>': + case '=': + { + state_stack_.emplace_back(path_state::comparator_expression); + break; + } + case '!': + { + ++p_; + ++column_; + state_stack_.emplace_back(path_state::lhs_expression); + state_stack_.emplace_back(path_state::cmp_ne); + break; + } + case ')': + { + state_stack_.pop_back(); + break; + } + case '[': + state_stack_.emplace_back(path_state::bracket_specifier); + ++p_; + ++column_; + break; + default: + if (state_stack_.size() > 1) + { + state_stack_.pop_back(); + } + else + { + ec = jmespath_errc::syntax_error; + return jmespath_expression(); + } + break; + } + break; + case path_state::comparator_expression: + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case '<': + ++p_; + ++column_; + state_stack_.back() = path_state::lhs_expression; + state_stack_.emplace_back(path_state::cmp_lt_or_lte); + break; + case '>': + ++p_; + ++column_; + state_stack_.back() = path_state::lhs_expression; + state_stack_.emplace_back(path_state::cmp_gt_or_gte); + break; + case '=': + { + ++p_; + ++column_; + state_stack_.back() = path_state::lhs_expression; + state_stack_.emplace_back(path_state::cmp_eq); + break; + } + default: + if (state_stack_.size() > 1) + { + state_stack_.pop_back(); + } + else + { + ec = jmespath_errc::syntax_error; + return jmespath_expression(); + } + break; + } + break; + case path_state::lhs_expression: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case '\"': + state_stack_.back() = path_state::val_expr; + state_stack_.emplace_back(path_state::quoted_string); + ++p_; + ++column_; + break; + case '\'': + state_stack_.back() = path_state::raw_string; + ++p_; + ++column_; + break; + case '`': + state_stack_.back() = path_state::literal; + ++p_; + ++column_; + break; + case '{': + push_token(begin_multi_select_hash_arg, ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::multi_select_hash; + ++p_; + ++column_; + break; + case '*': // wildcard + push_token(token(jsoncons::make_unique<object_projection>()), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case '(': + { + ++p_; + ++column_; + push_token(lparen_arg, ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::expect_rparen; + state_stack_.emplace_back(path_state::rhs_expression); + state_stack_.emplace_back(path_state::lhs_expression); + break; + } + case '!': + { + ++p_; + ++column_; + push_token(token(resources_.get_not_operator()), ec); + if (ec) {return jmespath_expression();} + break; + } + case '@': + ++p_; + ++column_; + push_token(token(jsoncons::make_unique<current_node>()), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + break; + case '[': + state_stack_.back() = path_state::bracket_specifier_or_multi_select_list; + ++p_; + ++column_; + break; + default: + if ((*p_ >= 'A' && *p_ <= 'Z') || (*p_ >= 'a' && *p_ <= 'z') || (*p_ == '_')) + { + state_stack_.back() = path_state::identifier_or_function_expr; + state_stack_.emplace_back(path_state::unquoted_string); + buffer.push_back(*p_); + ++p_; + ++column_; + } + else + { + ec = jmespath_errc::expected_identifier; + return jmespath_expression(); + } + break; + }; + break; + } + case path_state::sub_expression: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case '\"': + state_stack_.back() = path_state::val_expr; + state_stack_.emplace_back(path_state::quoted_string); + ++p_; + ++column_; + break; + case '{': + push_token(begin_multi_select_hash_arg, ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::multi_select_hash; + ++p_; + ++column_; + break; + case '*': + push_token(token(jsoncons::make_unique<object_projection>()), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case '[': + state_stack_.back() = path_state::expect_multi_select_list; + ++p_; + ++column_; + break; + default: + if ((*p_ >= 'A' && *p_ <= 'Z') || (*p_ >= 'a' && *p_ <= 'z') || (*p_ == '_')) + { + state_stack_.back() = path_state::identifier_or_function_expr; + state_stack_.emplace_back(path_state::unquoted_string); + buffer.push_back(*p_); + ++p_; + ++column_; + } + else + { + ec = jmespath_errc::expected_identifier; + return jmespath_expression(); + } + break; + }; + break; + } + case path_state::key_expr: + push_token(token(key_arg, buffer), ec); + if (ec) {return jmespath_expression();} + buffer.clear(); + state_stack_.pop_back(); + break; + case path_state::val_expr: + push_token(token(jsoncons::make_unique<identifier_selector>(buffer)), ec); + if (ec) {return jmespath_expression();} + buffer.clear(); + state_stack_.pop_back(); + break; + case path_state::expression_or_expression_type: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case '&': + push_token(token(begin_expression_type_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::expression_type; + state_stack_.emplace_back(path_state::rhs_expression); + state_stack_.emplace_back(path_state::lhs_expression); + ++p_; + ++column_; + break; + default: + state_stack_.back() = path_state::argument; + state_stack_.emplace_back(path_state::rhs_expression); + state_stack_.emplace_back(path_state::lhs_expression); + break; + } + break; + case path_state::identifier_or_function_expr: + switch(*p_) + { + case '(': + { + auto f = resources_.get_function(buffer, ec); + if (ec) + { + return jmespath_expression(); + } + buffer.clear(); + push_token(token(f), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::function_expression; + state_stack_.emplace_back(path_state::expression_or_expression_type); + ++p_; + ++column_; + break; + } + default: + { + push_token(token(jsoncons::make_unique<identifier_selector>(buffer)), ec); + if (ec) {return jmespath_expression();} + buffer.clear(); + state_stack_.pop_back(); + break; + } + } + break; + + case path_state::function_expression: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case ',': + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.emplace_back(path_state::expression_or_expression_type); + ++p_; + ++column_; + break; + case ')': + { + push_token(token(end_function_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + } + default: + break; + } + break; + + case path_state::argument: + push_token(argument_arg, ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + break; + + case path_state::expression_type: + push_token(end_expression_type_arg, ec); + push_token(argument_arg, ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + break; + + case path_state::quoted_string: + switch (*p_) + { + case '\"': + state_stack_.pop_back(); // quoted_string + ++p_; + ++column_; + break; + case '\\': + state_stack_.emplace_back(path_state::quoted_string_escape_char); + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + + case path_state::unquoted_string: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + state_stack_.pop_back(); // unquoted_string + advance_past_space_character(ec); + break; + default: + if ((*p_ >= '0' && *p_ <= '9') || (*p_ >= 'A' && *p_ <= 'Z') || (*p_ >= 'a' && *p_ <= 'z') || (*p_ == '_')) + { + buffer.push_back(*p_); + ++p_; + ++column_; + } + else + { + state_stack_.pop_back(); // unquoted_string + } + break; + }; + break; + case path_state::raw_string_escape_char: + switch (*p_) + { + case '\'': + buffer.push_back(*p_); + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + buffer.push_back('\\'); + buffer.push_back(*p_); + state_stack_.pop_back(); + ++p_; + ++column_; + break; + } + break; + case path_state::quoted_string_escape_char: + switch (*p_) + { + case '\"': + buffer.push_back('\"'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case '\\': + buffer.push_back('\\'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case '/': + buffer.push_back('/'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'b': + buffer.push_back('\b'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'f': + buffer.push_back('\f'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'n': + buffer.push_back('\n'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'r': + buffer.push_back('\r'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 't': + buffer.push_back('\t'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'u': + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u1; + break; + default: + ec = jmespath_errc::illegal_escaped_character; + return jmespath_expression(); + } + break; + case path_state::escape_u1: + cp = append_to_codepoint(0, *p_, ec); + if (ec) + { + return jmespath_expression(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u2; + break; + case path_state::escape_u2: + cp = append_to_codepoint(cp, *p_, ec); + if (ec) + { + return jmespath_expression(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u3; + break; + case path_state::escape_u3: + cp = append_to_codepoint(cp, *p_, ec); + if (ec) + { + return jmespath_expression(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u4; + break; + case path_state::escape_u4: + cp = append_to_codepoint(cp, *p_, ec); + if (ec) + { + return jmespath_expression(); + } + if (unicode_traits::is_high_surrogate(cp)) + { + ++p_; + ++column_; + state_stack_.back() = path_state::escape_expect_surrogate_pair1; + } + else + { + unicode_traits::convert(&cp, 1, buffer); + ++p_; + ++column_; + state_stack_.pop_back(); + } + break; + case path_state::escape_expect_surrogate_pair1: + switch (*p_) + { + case '\\': + ++p_; + ++column_; + state_stack_.back() = path_state::escape_expect_surrogate_pair2; + break; + default: + ec = jmespath_errc::invalid_codepoint; + return jmespath_expression(); + } + break; + case path_state::escape_expect_surrogate_pair2: + switch (*p_) + { + case 'u': + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u5; + break; + default: + ec = jmespath_errc::invalid_codepoint; + return jmespath_expression(); + } + break; + case path_state::escape_u5: + cp2 = append_to_codepoint(0, *p_, ec); + if (ec) + { + return jmespath_expression(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u6; + break; + case path_state::escape_u6: + cp2 = append_to_codepoint(cp2, *p_, ec); + if (ec) + { + return jmespath_expression(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u7; + break; + case path_state::escape_u7: + cp2 = append_to_codepoint(cp2, *p_, ec); + if (ec) + { + return jmespath_expression(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u8; + break; + case path_state::escape_u8: + { + cp2 = append_to_codepoint(cp2, *p_, ec); + if (ec) + { + return jmespath_expression(); + } + uint32_t codepoint = 0x10000 + ((cp & 0x3FF) << 10) + (cp2 & 0x3FF); + unicode_traits::convert(&codepoint, 1, buffer); + state_stack_.pop_back(); + ++p_; + ++column_; + break; + } + case path_state::raw_string: + switch (*p_) + { + case '\'': + { + push_token(token(literal_arg, Json(buffer)), ec); + if (ec) {return jmespath_expression();} + buffer.clear(); + state_stack_.pop_back(); // raw_string + ++p_; + ++column_; + break; + } + case '\\': + state_stack_.emplace_back(path_state::raw_string_escape_char); + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case path_state::literal: + switch (*p_) + { + case '`': + { + json_decoder<Json> decoder; + basic_json_reader<char_type,string_source<char_type>> reader(buffer, decoder); + std::error_code parse_ec; + reader.read(parse_ec); + if (parse_ec) + { + ec = jmespath_errc::invalid_literal; + return jmespath_expression(); + } + auto j = decoder.get_result(); + + push_token(token(literal_arg, std::move(j)), ec); + if (ec) {return jmespath_expression();} + buffer.clear(); + state_stack_.pop_back(); // json_value + ++p_; + ++column_; + break; + } + case '\\': + if (p_+1 < end_input_) + { + ++p_; + ++column_; + if (*p_ != '`') + { + buffer.push_back('\\'); + } + buffer.push_back(*p_); + } + else + { + ec = jmespath_errc::unexpected_end_of_input; + return jmespath_expression(); + } + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case path_state::number: + switch(*p_) + { + case '-': + buffer.push_back(*p_); + state_stack_.back() = path_state::digit; + ++p_; + ++column_; + break; + default: + state_stack_.back() = path_state::digit; + break; + } + break; + case path_state::digit: + switch(*p_) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + buffer.push_back(*p_); + ++p_; + ++column_; + break; + default: + state_stack_.pop_back(); // digit + break; + } + break; + + case path_state::bracket_specifier: + switch(*p_) + { + case '*': + push_token(token(jsoncons::make_unique<list_projection>()), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::expect_rbracket; + ++p_; + ++column_; + break; + case ']': // [] + push_token(token(jsoncons::make_unique<flatten_projection>()), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); // bracket_specifier + ++p_; + ++column_; + break; + case '?': + push_token(token(begin_filter_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::filter; + state_stack_.emplace_back(path_state::rhs_expression); + state_stack_.emplace_back(path_state::lhs_expression); + ++p_; + ++column_; + break; + case ':': // slice_expression + state_stack_.back() = path_state::rhs_slice_expression_stop ; + state_stack_.emplace_back(path_state::number); + ++p_; + ++column_; + break; + // number + case '-':case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + state_stack_.back() = path_state::index_or_slice_expression; + state_stack_.emplace_back(path_state::number); + break; + default: + ec = jmespath_errc::expected_index_expression; + return jmespath_expression(); + } + break; + case path_state::bracket_specifier_or_multi_select_list: + switch(*p_) + { + case '*': + if (p_+1 >= end_input_) + { + ec = jmespath_errc::unexpected_end_of_input; + return jmespath_expression(); + } + if (*(p_+1) == ']') + { + state_stack_.back() = path_state::bracket_specifier; + } + else + { + push_token(token(begin_multi_select_list_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::multi_select_list; + state_stack_.emplace_back(path_state::lhs_expression); + } + break; + case ']': // [] + case '?': + case ':': // slice_expression + case '-':case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + state_stack_.back() = path_state::bracket_specifier; + break; + default: + push_token(token(begin_multi_select_list_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::multi_select_list; + state_stack_.emplace_back(path_state::lhs_expression); + break; + } + break; + + case path_state::expect_multi_select_list: + switch(*p_) + { + case ']': + case '?': + case ':': + case '-':case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ec = jmespath_errc::expected_multi_select_list; + return jmespath_expression(); + case '*': + push_token(token(jsoncons::make_unique<list_projection>()), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::expect_rbracket; + ++p_; + ++column_; + break; + default: + push_token(token(begin_multi_select_list_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::multi_select_list; + state_stack_.emplace_back(path_state::lhs_expression); + break; + } + break; + + case path_state::multi_select_hash: + switch(*p_) + { + case '*': + case ']': + case '?': + case ':': + case '-':case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + break; + default: + state_stack_.back() = path_state::key_val_expr; + break; + } + break; + + case path_state::index_or_slice_expression: + switch(*p_) + { + case ']': + { + if (buffer.empty()) + { + push_token(token(jsoncons::make_unique<flatten_projection>()), ec); + if (ec) {return jmespath_expression();} + } + else + { + int64_t val{ 0 }; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), val); + if (!r) + { + ec = jmespath_errc::invalid_number; + return jmespath_expression(); + } + push_token(token(jsoncons::make_unique<index_selector>(val)), ec); + if (ec) {return jmespath_expression();} + + buffer.clear(); + } + state_stack_.pop_back(); // bracket_specifier + ++p_; + ++column_; + break; + } + case ':': + { + if (!buffer.empty()) + { + int64_t val; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), val); + if (!r) + { + ec = jmespath_errc::invalid_number; + return jmespath_expression(); + } + slic.start_ = val; + buffer.clear(); + } + state_stack_.back() = path_state::rhs_slice_expression_stop; + state_stack_.emplace_back(path_state::number); + ++p_; + ++column_; + break; + } + default: + ec = jmespath_errc::expected_rbracket; + return jmespath_expression(); + } + break; + case path_state::rhs_slice_expression_stop : + { + if (!buffer.empty()) + { + int64_t val{ 0 }; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), val); + if (!r) + { + ec = jmespath_errc::invalid_number; + return jmespath_expression(); + } + slic.stop_ = jsoncons::optional<int64_t>(val); + buffer.clear(); + } + switch(*p_) + { + case ']': + push_token(token(jsoncons::make_unique<slice_projection>(slic)), ec); + if (ec) {return jmespath_expression();} + slic = slice{}; + state_stack_.pop_back(); // bracket_specifier2 + ++p_; + ++column_; + break; + case ':': + state_stack_.back() = path_state::rhs_slice_expression_step; + state_stack_.emplace_back(path_state::number); + ++p_; + ++column_; + break; + default: + ec = jmespath_errc::expected_rbracket; + return jmespath_expression(); + } + break; + } + case path_state::rhs_slice_expression_step: + { + if (!buffer.empty()) + { + int64_t val{ 0 }; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), val); + if (!r) + { + ec = jmespath_errc::invalid_number; + return jmespath_expression(); + } + if (val == 0) + { + ec = jmespath_errc::step_cannot_be_zero; + return jmespath_expression(); + } + slic.step_ = val; + buffer.clear(); + } + switch(*p_) + { + case ']': + push_token(token(jsoncons::make_unique<slice_projection>(slic)), ec); + if (ec) {return jmespath_expression();} + buffer.clear(); + slic = slice{}; + state_stack_.pop_back(); // rhs_slice_expression_step + ++p_; + ++column_; + break; + default: + ec = jmespath_errc::expected_rbracket; + return jmespath_expression(); + } + break; + } + case path_state::expect_rbracket: + { + switch(*p_) + { + case ']': + state_stack_.pop_back(); // expect_rbracket + ++p_; + ++column_; + break; + default: + ec = jmespath_errc::expected_rbracket; + return jmespath_expression(); + } + break; + } + case path_state::expect_rparen: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case ')': + ++p_; + ++column_; + push_token(rparen_arg, ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::rhs_expression; + break; + default: + ec = jmespath_errc::expected_rparen; + return jmespath_expression(); + } + break; + case path_state::key_val_expr: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case '\"': + state_stack_.back() = path_state::expect_colon; + state_stack_.emplace_back(path_state::key_expr); + state_stack_.emplace_back(path_state::quoted_string); + ++p_; + ++column_; + break; + case '\'': + state_stack_.back() = path_state::expect_colon; + state_stack_.emplace_back(path_state::raw_string); + ++p_; + ++column_; + break; + default: + if ((*p_ >= 'A' && *p_ <= 'Z') || (*p_ >= 'a' && *p_ <= 'z') || (*p_ == '_')) + { + state_stack_.back() = path_state::expect_colon; + state_stack_.emplace_back(path_state::key_expr); + state_stack_.emplace_back(path_state::unquoted_string); + buffer.push_back(*p_); + ++p_; + ++column_; + } + else + { + ec = jmespath_errc::expected_key; + return jmespath_expression(); + } + break; + }; + break; + } + case path_state::cmp_lt_or_lte: + { + switch(*p_) + { + case '=': + push_token(token(resources_.get_lte_operator()), ec); + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + push_token(token(resources_.get_lt_operator()), ec); + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + break; + } + break; + } + case path_state::cmp_gt_or_gte: + { + switch(*p_) + { + case '=': + push_token(token(resources_.get_gte_operator()), ec); + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + push_token(token(resources_.get_gt_operator()), ec); + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + break; + } + break; + } + case path_state::cmp_eq: + { + switch(*p_) + { + case '=': + push_token(token(resources_.get_eq_operator()), ec); + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + ec = jmespath_errc::expected_comparator; + return jmespath_expression(); + } + break; + } + case path_state::cmp_ne: + { + switch(*p_) + { + case '=': + push_token(token(resources_.get_ne_operator()), ec); + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + ec = jmespath_errc::expected_comparator; + return jmespath_expression(); + } + break; + } + case path_state::expect_dot: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case '.': + state_stack_.pop_back(); // expect_dot + ++p_; + ++column_; + break; + default: + ec = jmespath_errc::expected_dot; + return jmespath_expression(); + } + break; + } + case path_state::expect_pipe_or_or: + { + switch(*p_) + { + case '|': + push_token(token(resources_.get_or_operator()), ec); + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + push_token(token(pipe_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + break; + } + break; + } + case path_state::expect_and: + { + switch(*p_) + { + case '&': + push_token(token(resources_.get_and_operator()), ec); + push_token(token(current_node_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); // expect_and + ++p_; + ++column_; + break; + default: + ec = jmespath_errc::expected_and; + return jmespath_expression(); + } + break; + } + case path_state::multi_select_list: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case ',': + push_token(token(separator_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.emplace_back(path_state::lhs_expression); + ++p_; + ++column_; + break; + case '[': + state_stack_.emplace_back(path_state::lhs_expression); + break; + case '.': + state_stack_.emplace_back(path_state::sub_expression); + ++p_; + ++column_; + break; + case '|': + { + ++p_; + ++column_; + state_stack_.emplace_back(path_state::lhs_expression); + state_stack_.emplace_back(path_state::expect_pipe_or_or); + break; + } + case ']': + { + push_token(token(end_multi_select_list_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + + ++p_; + ++column_; + break; + } + default: + ec = jmespath_errc::expected_rbracket; + return jmespath_expression(); + } + break; + } + case path_state::filter: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case ']': + { + push_token(token(end_filter_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + } + default: + ec = jmespath_errc::expected_rbracket; + return jmespath_expression(); + } + break; + } + case path_state::expect_rbrace: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case ',': + push_token(token(separator_arg), ec); + if (ec) {return jmespath_expression();} + state_stack_.back() = path_state::key_val_expr; + ++p_; + ++column_; + break; + case '[': + case '{': + state_stack_.emplace_back(path_state::lhs_expression); + break; + case '.': + state_stack_.emplace_back(path_state::sub_expression); + ++p_; + ++column_; + break; + case '}': + { + state_stack_.pop_back(); + push_token(end_multi_select_hash_arg, ec); + if (ec) {return jmespath_expression();} + ++p_; + ++column_; + break; + } + default: + ec = jmespath_errc::expected_rbrace; + return jmespath_expression(); + } + break; + } + case path_state::expect_colon: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(ec); + break; + case ':': + state_stack_.back() = path_state::expect_rbrace; + state_stack_.emplace_back(path_state::lhs_expression); + ++p_; + ++column_; + break; + default: + ec = jmespath_errc::expected_colon; + return jmespath_expression(); + } + break; + } + } + + } + + if (state_stack_.empty()) + { + ec = jmespath_errc::syntax_error; + return jmespath_expression(); + } + while (state_stack_.size() > 1) + { + switch (state_stack_.back()) + { + case path_state::rhs_expression: + if (state_stack_.size() > 1) + { + state_stack_.pop_back(); + } + else + { + ec = jmespath_errc::syntax_error; + return jmespath_expression(); + } + break; + case path_state::val_expr: + push_token(token(jsoncons::make_unique<identifier_selector>(buffer)), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + break; + case path_state::identifier_or_function_expr: + push_token(token(jsoncons::make_unique<identifier_selector>(buffer)), ec); + if (ec) {return jmespath_expression();} + state_stack_.pop_back(); + break; + case path_state::unquoted_string: + state_stack_.pop_back(); + break; + default: + ec = jmespath_errc::syntax_error; + return jmespath_expression(); + break; + } + } + + if (!(state_stack_.size() == 1 && state_stack_.back() == path_state::rhs_expression)) + { + ec = jmespath_errc::unexpected_end_of_input; + return jmespath_expression(); + } + + state_stack_.pop_back(); + + push_token(end_of_expression_arg, ec); + if (ec) {return jmespath_expression();} + + //for (auto& t : output_stack_) + //{ + // std::cout << t.to_string() << std::endl; + //} + + return jmespath_expression(std::move(resources_), std::move(output_stack_)); + } + + void advance_past_space_character(std::error_code& ec) + { + switch (*p_) + { + case ' ':case '\t': + ++p_; + ++column_; + break; + case '\r': + if (p_+1 >= end_input_) + { + ec = jmespath_errc::unexpected_end_of_input; + return; + } + if (*(p_+1) == '\n') + ++p_; + ++line_; + column_ = 1; + ++p_; + break; + case '\n': + ++line_; + column_ = 1; + ++p_; + break; + default: + break; + } + } + + void unwind_rparen(std::error_code& ec) + { + auto it = operator_stack_.rbegin(); + while (it != operator_stack_.rend() && !it->is_lparen()) + { + output_stack_.emplace_back(std::move(*it)); + ++it; + } + if (it == operator_stack_.rend()) + { + ec = jmespath_errc::unbalanced_parentheses; + return; + } + ++it; + operator_stack_.erase(it.base(),operator_stack_.end()); + } + + void push_token(token&& tok, std::error_code& ec) + { + switch (tok.type()) + { + case token_kind::end_filter: + { + unwind_rparen(ec); + std::vector<token> toks; + auto it = output_stack_.rbegin(); + while (it != output_stack_.rend() && it->type() != token_kind::begin_filter) + { + toks.emplace_back(std::move(*it)); + ++it; + } + if (it == output_stack_.rend()) + { + ec = jmespath_errc::unbalanced_braces; + return; + } + if (toks.back().type() != token_kind::literal) + { + toks.emplace_back(current_node_arg); + } + std::reverse(toks.begin(), toks.end()); + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + + if (!output_stack_.empty() && output_stack_.back().is_projection() && + (tok.precedence_level() < output_stack_.back().precedence_level() || + (tok.precedence_level() == output_stack_.back().precedence_level() && tok.is_right_associative()))) + { + output_stack_.back().expression_->add_expression(jsoncons::make_unique<filter_expression>(std::move(toks))); + } + else + { + output_stack_.emplace_back(token(jsoncons::make_unique<filter_expression>(std::move(toks)))); + } + break; + } + case token_kind::end_multi_select_list: + { + unwind_rparen(ec); + std::vector<std::vector<token>> vals; + auto it = output_stack_.rbegin(); + while (it != output_stack_.rend() && it->type() != token_kind::begin_multi_select_list) + { + std::vector<token> toks; + do + { + toks.emplace_back(std::move(*it)); + ++it; + } while (it != output_stack_.rend() && it->type() != token_kind::begin_multi_select_list && it->type() != token_kind::separator); + if (it->type() == token_kind::separator) + { + ++it; + } + if (toks.back().type() != token_kind::literal) + { + toks.emplace_back(current_node_arg); + } + std::reverse(toks.begin(), toks.end()); + vals.emplace_back(std::move(toks)); + } + if (it == output_stack_.rend()) + { + ec = jmespath_errc::unbalanced_braces; + return; + } + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + std::reverse(vals.begin(), vals.end()); + if (!output_stack_.empty() && output_stack_.back().is_projection() && + (tok.precedence_level() < output_stack_.back().precedence_level() || + (tok.precedence_level() == output_stack_.back().precedence_level() && tok.is_right_associative()))) + { + output_stack_.back().expression_->add_expression(jsoncons::make_unique<multi_select_list>(std::move(vals))); + } + else + { + output_stack_.emplace_back(token(jsoncons::make_unique<multi_select_list>(std::move(vals)))); + } + break; + } + case token_kind::end_multi_select_hash: + { + unwind_rparen(ec); + std::vector<key_tokens> key_toks; + auto it = output_stack_.rbegin(); + while (it != output_stack_.rend() && it->type() != token_kind::begin_multi_select_hash) + { + std::vector<token> toks; + do + { + toks.emplace_back(std::move(*it)); + ++it; + } while (it != output_stack_.rend() && it->type() != token_kind::key); + JSONCONS_ASSERT(it->is_key()); + auto key = std::move(it->key_); + ++it; + if (it->type() == token_kind::separator) + { + ++it; + } + if (toks.back().type() != token_kind::literal) + { + toks.emplace_back(current_node_arg); + } + std::reverse(toks.begin(), toks.end()); + key_toks.emplace_back(std::move(key), std::move(toks)); + } + if (it == output_stack_.rend()) + { + ec = jmespath_errc::unbalanced_braces; + return; + } + std::reverse(key_toks.begin(), key_toks.end()); + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + + if (!output_stack_.empty() && output_stack_.back().is_projection() && + (tok.precedence_level() < output_stack_.back().precedence_level() || + (tok.precedence_level() == output_stack_.back().precedence_level() && tok.is_right_associative()))) + { + output_stack_.back().expression_->add_expression(jsoncons::make_unique<multi_select_hash>(std::move(key_toks))); + } + else + { + output_stack_.emplace_back(token(jsoncons::make_unique<multi_select_hash>(std::move(key_toks)))); + } + break; + } + case token_kind::end_expression_type: + { + std::vector<token> toks; + auto it = output_stack_.rbegin(); + while (it != output_stack_.rend() && it->type() != token_kind::begin_expression_type) + { + toks.emplace_back(std::move(*it)); + ++it; + } + if (it == output_stack_.rend()) + { + JSONCONS_THROW(json_runtime_error<std::runtime_error>("Unbalanced braces")); + } + if (toks.back().type() != token_kind::literal) + { + toks.emplace_back(current_node_arg); + } + std::reverse(toks.begin(), toks.end()); + output_stack_.erase(it.base(),output_stack_.end()); + output_stack_.emplace_back(token(jsoncons::make_unique<function_expression>(std::move(toks)))); + break; + } + case token_kind::literal: + if (!output_stack_.empty() && output_stack_.back().type() == token_kind::current_node) + { + output_stack_.back() = std::move(tok); + } + else + { + output_stack_.emplace_back(std::move(tok)); + } + break; + case token_kind::expression: + if (!output_stack_.empty() && output_stack_.back().is_projection() && + (tok.precedence_level() < output_stack_.back().precedence_level() || + (tok.precedence_level() == output_stack_.back().precedence_level() && tok.is_right_associative()))) + { + output_stack_.back().expression_->add_expression(std::move(tok.expression_)); + } + else + { + output_stack_.emplace_back(std::move(tok)); + } + break; + case token_kind::rparen: + { + unwind_rparen(ec); + break; + } + case token_kind::end_function: + { + unwind_rparen(ec); + std::vector<token> toks; + auto it = output_stack_.rbegin(); + std::size_t arg_count = 0; + while (it != output_stack_.rend() && it->type() != token_kind::function) + { + if (it->type() == token_kind::argument) + { + ++arg_count; + } + toks.emplace_back(std::move(*it)); + ++it; + } + if (it == output_stack_.rend()) + { + ec = jmespath_errc::unbalanced_parentheses; + return; + } + if (it->arity() && arg_count != *(it->arity())) + { + ec = jmespath_errc::invalid_arity; + return; + } + if (toks.back().type() != token_kind::literal) + { + toks.emplace_back(current_node_arg); + } + std::reverse(toks.begin(), toks.end()); + toks.push_back(std::move(*it)); + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + + if (!output_stack_.empty() && output_stack_.back().is_projection() && + (tok.precedence_level() < output_stack_.back().precedence_level() || + (tok.precedence_level() == output_stack_.back().precedence_level() && tok.is_right_associative()))) + { + output_stack_.back().expression_->add_expression(jsoncons::make_unique<function_expression>(std::move(toks))); + } + else + { + output_stack_.emplace_back(token(jsoncons::make_unique<function_expression>(std::move(toks)))); + } + break; + } + case token_kind::end_of_expression: + { + auto it = operator_stack_.rbegin(); + while (it != operator_stack_.rend()) + { + output_stack_.emplace_back(std::move(*it)); + ++it; + } + operator_stack_.clear(); + break; + } + case token_kind::unary_operator: + case token_kind::binary_operator: + { + if (operator_stack_.empty() || operator_stack_.back().is_lparen()) + { + operator_stack_.emplace_back(std::move(tok)); + } + else if (tok.precedence_level() < operator_stack_.back().precedence_level() + || (tok.precedence_level() == operator_stack_.back().precedence_level() && tok.is_right_associative())) + { + operator_stack_.emplace_back(std::move(tok)); + } + else + { + auto it = operator_stack_.rbegin(); + while (it != operator_stack_.rend() && it->is_operator() + && (tok.precedence_level() > it->precedence_level() + || (tok.precedence_level() == it->precedence_level() && tok.is_right_associative()))) + { + output_stack_.emplace_back(std::move(*it)); + ++it; + } + + operator_stack_.erase(it.base(),operator_stack_.end()); + operator_stack_.emplace_back(std::move(tok)); + } + break; + } + case token_kind::separator: + { + unwind_rparen(ec); + output_stack_.emplace_back(std::move(tok)); + operator_stack_.emplace_back(token(lparen_arg)); + break; + } + case token_kind::begin_filter: + output_stack_.emplace_back(std::move(tok)); + operator_stack_.emplace_back(token(lparen_arg)); + break; + case token_kind::begin_multi_select_list: + output_stack_.emplace_back(std::move(tok)); + operator_stack_.emplace_back(token(lparen_arg)); + break; + case token_kind::begin_multi_select_hash: + output_stack_.emplace_back(std::move(tok)); + operator_stack_.emplace_back(token(lparen_arg)); + break; + case token_kind::function: + output_stack_.emplace_back(std::move(tok)); + operator_stack_.emplace_back(token(lparen_arg)); + break; + case token_kind::current_node: + output_stack_.emplace_back(std::move(tok)); + break; + case token_kind::key: + case token_kind::pipe: + case token_kind::argument: + case token_kind::begin_expression_type: + output_stack_.emplace_back(std::move(tok)); + break; + case token_kind::lparen: + operator_stack_.emplace_back(std::move(tok)); + break; + default: + break; + } + } + + uint32_t append_to_codepoint(uint32_t cp, int c, std::error_code& ec) + { + cp *= 16; + if (c >= '0' && c <= '9') + { + cp += c - '0'; + } + else if (c >= 'a' && c <= 'f') + { + cp += c - 'a' + 10; + } + else if (c >= 'A' && c <= 'F') + { + cp += c - 'A' + 10; + } + else + { + ec = jmespath_errc::invalid_codepoint; + } + return cp; + } + }; + + } // detail + + template <class Json> + using jmespath_expression = typename jsoncons::jmespath::detail::jmespath_evaluator<Json,const Json&>::jmespath_expression; + + template<class Json> + Json search(const Json& doc, const typename Json::string_view_type& path) + { + jsoncons::jmespath::detail::jmespath_evaluator<Json,const Json&> evaluator; + std::error_code ec; + auto expr = evaluator.compile(path.data(), path.size(), ec); + if (ec) + { + JSONCONS_THROW(jmespath_error(ec, evaluator.line(), evaluator.column())); + } + auto result = expr.evaluate(doc, ec); + if (ec) + { + JSONCONS_THROW(jmespath_error(ec)); + } + return result; + } + + template<class Json> + Json search(const Json& doc, const typename Json::string_view_type& path, std::error_code& ec) + { + jsoncons::jmespath::detail::jmespath_evaluator<Json,const Json&> evaluator; + auto expr = evaluator.compile(path.data(), path.size(), ec); + if (ec) + { + return Json::null(); + } + auto result = expr.evaluate(doc, ec); + if (ec) + { + return Json::null(); + } + return result; + } + + template <class Json> + jmespath_expression<Json> make_expression(const typename json::string_view_type& expr) + { + return jmespath_expression<Json>::compile(expr); + } + + template <class Json> + jmespath_expression<Json> make_expression(const typename json::string_view_type& expr, + std::error_code& ec) + { + return jmespath_expression<Json>::compile(expr, ec); + } + + +} // namespace jmespath +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/jmespath/jmespath_error.hpp b/include/jsoncons_ext/jmespath/jmespath_error.hpp new file mode 100644 index 0000000..6422c65 --- /dev/null +++ b/include/jsoncons_ext/jmespath/jmespath_error.hpp @@ -0,0 +1,215 @@ +/// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JMESPATH_JMESPATH_ERROR_HPP +#define JSONCONS_JMESPATH_JMESPATH_ERROR_HPP + +#include <jsoncons/json_exception.hpp> +#include <system_error> + +namespace jsoncons { namespace jmespath { + + class jmespath_error : public std::system_error, public virtual json_exception + { + std::size_t line_number_; + std::size_t column_number_; + mutable std::string what_; + public: + jmespath_error(std::error_code ec) + : std::system_error(ec), line_number_(0), column_number_(0) + { + } + jmespath_error(std::error_code ec, const std::string& what_arg) + : std::system_error(ec, what_arg), line_number_(0), column_number_(0) + { + } + jmespath_error(std::error_code ec, std::size_t position) + : std::system_error(ec), line_number_(0), column_number_(position) + { + } + jmespath_error(std::error_code ec, std::size_t line, std::size_t column) + : std::system_error(ec), line_number_(line), column_number_(column) + { + } + jmespath_error(const jmespath_error& other) = default; + + jmespath_error(jmespath_error&& other) = default; + + const char* what() const noexcept override + { + if (what_.empty()) + { + JSONCONS_TRY + { + what_.append(std::system_error::what()); + if (line_number_ != 0 && column_number_ != 0) + { + what_.append(" at line "); + what_.append(std::to_string(line_number_)); + what_.append(" and column "); + what_.append(std::to_string(column_number_)); + } + else if (column_number_ != 0) + { + what_.append(" at position "); + what_.append(std::to_string(column_number_)); + } + return what_.c_str(); + } + JSONCONS_CATCH(...) + { + return std::system_error::what(); + } + } + else + { + return what_.c_str(); + } + } + + std::size_t line() const noexcept + { + return line_number_; + } + + std::size_t column() const noexcept + { + return column_number_; + } + }; + +enum class jmespath_errc +{ + success = 0, + expected_identifier, + expected_index, + expected_A_Za_Z_, + expected_rbracket, + expected_rparen, + expected_rbrace, + expected_colon, + expected_dot, + expected_or, + expected_and, + expected_multi_select_list, + invalid_number, + invalid_literal, + expected_comparator, + expected_key, + invalid_argument, + unknown_function, + invalid_type, + unexpected_end_of_input, + step_cannot_be_zero, + syntax_error, + invalid_codepoint, + illegal_escaped_character, + unbalanced_parentheses, + unbalanced_braces, + invalid_arity, + identifier_not_found, + expected_index_expression, + unknown_error +}; + +class jmespath_error_category_impl + : public std::error_category +{ +public: + const char* name() const noexcept override + { + return "jsoncons/jmespath"; + } + std::string message(int ev) const override + { + switch (static_cast<jmespath_errc>(ev)) + { + case jmespath_errc::expected_identifier: + return "Expected identifier"; + case jmespath_errc::expected_index: + return "Expected index"; + case jmespath_errc::expected_A_Za_Z_: + return "Expected A-Z, a-z, or _"; + case jmespath_errc::expected_rbracket: + return "Expected ]"; + case jmespath_errc::expected_rparen: + return "Expected )"; + case jmespath_errc::expected_rbrace: + return "Expected }"; + case jmespath_errc::expected_colon: + return "Expected :"; + case jmespath_errc::expected_dot: + return "Expected \".\""; + case jmespath_errc::expected_or: + return "Expected \"||\""; + case jmespath_errc::expected_and: + return "Expected \"&&\""; + case jmespath_errc::expected_multi_select_list: + return "Expected multi-select-list"; + case jmespath_errc::invalid_number: + return "Invalid number"; + case jmespath_errc::invalid_literal: + return "Invalid literal"; + case jmespath_errc::expected_comparator: + return "Expected <, <=, ==, >=, > or !="; + case jmespath_errc::expected_key: + return "Expected key"; + case jmespath_errc::invalid_argument: + return "Invalid argument type"; + case jmespath_errc::unknown_function: + return "Unknown function"; + case jmespath_errc::invalid_type: + return "Invalid type"; + case jmespath_errc::unexpected_end_of_input: + return "Unexpected end of jmespath input"; + case jmespath_errc::step_cannot_be_zero: + return "Slice step cannot be zero"; + case jmespath_errc::syntax_error: + return "Syntax error"; + case jmespath_errc::invalid_codepoint: + return "Invalid codepoint"; + case jmespath_errc::illegal_escaped_character: + return "Illegal escaped character"; + case jmespath_errc::unbalanced_parentheses: + return "Unbalanced parentheses"; + case jmespath_errc::unbalanced_braces: + return "Unbalanced braces"; + case jmespath_errc::invalid_arity: + return "Function called with wrong number of arguments"; + case jmespath_errc::identifier_not_found: + return "Identifier not found"; + case jmespath_errc::expected_index_expression: + return "Expected index expression"; + case jmespath_errc::unknown_error: + default: + return "Unknown jmespath parser error"; + } + } +}; + +inline +const std::error_category& jmespath_error_category() +{ + static jmespath_error_category_impl instance; + return instance; +} + +inline +std::error_code make_error_code(jmespath_errc result) +{ + return std::error_code(static_cast<int>(result),jmespath_error_category()); +} + +}} + +namespace std { + template<> + struct is_error_code_enum<jsoncons::jmespath::jmespath_errc> : public true_type + { + }; +} + +#endif diff --git a/include/jsoncons_ext/jsonpatch/jsonpatch.hpp b/include/jsoncons_ext/jsonpatch/jsonpatch.hpp new file mode 100644 index 0000000..ab4ace7 --- /dev/null +++ b/include/jsoncons_ext/jsonpatch/jsonpatch.hpp @@ -0,0 +1,579 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATCH_JSONPATCH_HPP +#define JSONCONS_JSONPATCH_JSONPATCH_HPP + +#include <string> +#include <vector> +#include <memory> +#include <algorithm> // std::min +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpointer/jsonpointer.hpp> +#include <jsoncons_ext/jsonpatch/jsonpatch_error.hpp> + +namespace jsoncons { namespace jsonpatch { + +namespace detail { + + template <class CharT> + struct jsonpatch_names + { + static std::basic_string<CharT> test_name() + { + static std::basic_string<CharT> name{'t','e','s','t'}; + return name; + } + static std::basic_string<CharT> add_name() + { + static std::basic_string<CharT> name{'a','d','d'}; + return name; + } + static std::basic_string<CharT> remove_name() + { + static std::basic_string<CharT> name{'r','e','m','o','v','e'}; + return name; + } + static std::basic_string<CharT> replace_name() + { + static std::basic_string<CharT> name{'r','e','p','l','a','c','e'}; + return name; + } + static std::basic_string<CharT> move_name() + { + static std::basic_string<CharT> name{'m','o','v','e'}; + return name; + } + static std::basic_string<CharT> copy_name() + { + static std::basic_string<CharT> name{'c','o','p','y'}; + return name; + } + static std::basic_string<CharT> op_name() + { + static std::basic_string<CharT> name{'o','p'}; + return name; + } + static std::basic_string<CharT> path_name() + { + static std::basic_string<CharT> name{'p','a','t','h'}; + return name; + } + static std::basic_string<CharT> from_name() + { + static std::basic_string<CharT> name{'f','r','o','m'}; + return name; + } + static std::basic_string<CharT> value_name() + { + static std::basic_string<CharT> name{'v','a','l','u','e'}; + return name; + } + static std::basic_string<CharT> dash_name() + { + static std::basic_string<CharT> name{'-'}; + return name; + } + }; + + template<class Json> + jsonpointer::basic_json_pointer<typename Json::char_type> definite_path(const Json& root, jsonpointer::basic_json_pointer<typename Json::char_type>& location) + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + + auto rit = location.rbegin(); + if (rit == location.rend()) + { + return location; + } + + if (*rit != jsonpatch_names<char_type>::dash_name()) + { + return location; + } + + std::vector<string_type> tokens; + for (auto it = location.begin(); it != location.rbegin().base()-1; ++it) + { + tokens.push_back(*it); + } + jsonpointer::basic_json_pointer<char_type> pointer(tokens); + + std::error_code ec; + + Json val = jsonpointer::get(root, pointer, ec); + if (ec || !val.is_array()) + { + return location; + } + string_type last_token; + jsoncons::detail::from_integer(val.size(), last_token); + tokens.emplace_back(std::move(last_token)); + + return jsonpointer::basic_json_pointer<char_type>(std::move(tokens)); + } + + enum class op_type {add,remove,replace}; + enum class state_type {begin,abort,commit}; + + template <class Json> + struct operation_unwinder + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + using json_pointer_type = jsonpointer::basic_json_pointer<char_type>; + + struct entry + { + op_type op; + json_pointer_type path; + Json value; + + entry(op_type op, const json_pointer_type& path, const Json& value) + : op(op), path(path), value(value) + { + } + + entry(const entry&) = default; + + entry(entry&&) = default; + + entry& operator=(const entry&) = default; + + entry& operator=(entry&&) = default; + }; + + Json& target; + state_type state; + std::vector<entry> stack; + + operation_unwinder(Json& j) + : target(j), state(state_type::begin) + { + } + + ~operation_unwinder() noexcept + { + std::error_code ec; + if (state != state_type::commit) + { + for (auto it = stack.rbegin(); it != stack.rend(); ++it) + { + if (it->op == op_type::add) + { + jsonpointer::add(target,it->path,it->value,ec); + if (ec) + { + //std::cout << "add: " << it->path << std::endl; + break; + } + } + else if (it->op == op_type::remove) + { + jsonpointer::remove(target,it->path,ec); + if (ec) + { + //std::cout << "remove: " << it->path << std::endl; + break; + } + } + else if (it->op == op_type::replace) + { + jsonpointer::replace(target,it->path,it->value,ec); + if (ec) + { + //std::cout << "replace: " << it->path << std::endl; + break; + } + } + } + } + } + }; + + template <class Json> + Json from_diff(const Json& source, const Json& target, const typename Json::string_view_type& path) + { + using char_type = typename Json::char_type; + + Json result = typename Json::array(); + + if (source == target) + { + return result; + } + + if (source.is_array() && target.is_array()) + { + std::size_t common = (std::min)(source.size(),target.size()); + for (std::size_t i = 0; i < common; ++i) + { + std::basic_string<char_type> ss(path); + ss.push_back('/'); + jsoncons::detail::from_integer(i,ss); + auto temp_diff = from_diff(source[i],target[i],ss); + result.insert(result.array_range().end(),temp_diff.array_range().begin(),temp_diff.array_range().end()); + } + // Element in source, not in target - remove + for (std::size_t i = source.size(); i-- > target.size();) + { + std::basic_string<char_type> ss(path); + ss.push_back('/'); + jsoncons::detail::from_integer(i,ss); + Json val(json_object_arg); + val.insert_or_assign(jsonpatch_names<char_type>::op_name(), jsonpatch_names<char_type>::remove_name()); + val.insert_or_assign(jsonpatch_names<char_type>::path_name(), ss); + result.push_back(std::move(val)); + } + // Element in target, not in source - add, + // Fix contributed by Alexander rog13 + for (std::size_t i = source.size(); i < target.size(); ++i) + { + const auto& a = target[i]; + std::basic_string<char_type> ss(path); + ss.push_back('/'); + jsoncons::detail::from_integer(i,ss); + Json val(json_object_arg); + val.insert_or_assign(jsonpatch_names<char_type>::op_name(), jsonpatch_names<char_type>::add_name()); + val.insert_or_assign(jsonpatch_names<char_type>::path_name(), ss); + val.insert_or_assign(jsonpatch_names<char_type>::value_name(), a); + result.push_back(std::move(val)); + } + } + else if (source.is_object() && target.is_object()) + { + for (const auto& a : source.object_range()) + { + std::basic_string<char_type> ss(path); + ss.push_back('/'); + jsonpointer::escape(a.key(),ss); + auto it = target.find(a.key()); + if (it != target.object_range().end()) + { + auto temp_diff = from_diff(a.value(),it->value(),ss); + result.insert(result.array_range().end(),temp_diff.array_range().begin(),temp_diff.array_range().end()); + } + else + { + Json val(json_object_arg); + val.insert_or_assign(jsonpatch_names<char_type>::op_name(), jsonpatch_names<char_type>::remove_name()); + val.insert_or_assign(jsonpatch_names<char_type>::path_name(), ss); + result.push_back(std::move(val)); + } + } + for (const auto& a : target.object_range()) + { + auto it = source.find(a.key()); + if (it == source.object_range().end()) + { + std::basic_string<char_type> ss(path); + ss.push_back('/'); + jsonpointer::escape(a.key(),ss); + Json val(json_object_arg); + val.insert_or_assign(jsonpatch_names<char_type>::op_name(), jsonpatch_names<char_type>::add_name()); + val.insert_or_assign(jsonpatch_names<char_type>::path_name(), ss); + val.insert_or_assign(jsonpatch_names<char_type>::value_name(), a.value()); + result.push_back(std::move(val)); + } + } + } + else + { + Json val(json_object_arg); + val.insert_or_assign(jsonpatch_names<char_type>::op_name(), jsonpatch_names<char_type>::replace_name()); + val.insert_or_assign(jsonpatch_names<char_type>::path_name(), path); + val.insert_or_assign(jsonpatch_names<char_type>::value_name(), target); + result.push_back(std::move(val)); + } + + return result; + } +} + +template <class Json> +void apply_patch(Json& target, const Json& patch, std::error_code& ec) +{ + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + using json_pointer_type = jsonpointer::basic_json_pointer<char_type>; + + jsoncons::jsonpatch::detail::operation_unwinder<Json> unwinder(target); + std::error_code local_ec; + + // Validate + + for (const auto& operation : patch.array_range()) + { + unwinder.state =jsoncons::jsonpatch::detail::state_type::begin; + + auto it_op = operation.find(detail::jsonpatch_names<char_type>::op_name()); + if (it_op == operation.object_range().end()) + { + ec = jsonpatch_errc::invalid_patch; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + string_type op = it_op->value().template as<string_type>(); + + auto it_path = operation.find(detail::jsonpatch_names<char_type>::path_name()); + if (it_path == operation.object_range().end()) + { + ec = jsonpatch_errc::invalid_patch; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + string_type path = it_path->value().template as<string_type>(); + auto location = json_pointer_type::parse(path, local_ec); + if (local_ec) + { + ec = jsonpatch_errc::invalid_patch; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + + if (op ==jsoncons::jsonpatch::detail::jsonpatch_names<char_type>::test_name()) + { + Json val = jsonpointer::get(target,location,local_ec); + if (local_ec) + { + ec = jsonpatch_errc::test_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + auto it_value = operation.find(detail::jsonpatch_names<char_type>::value_name()); + if (it_value == operation.object_range().end()) + { + ec = jsonpatch_errc::invalid_patch; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + if (val != it_value->value()) + { + ec = jsonpatch_errc::test_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + } + else if (op ==jsoncons::jsonpatch::detail::jsonpatch_names<char_type>::add_name()) + { + auto it_value = operation.find(detail::jsonpatch_names<char_type>::value_name()); + if (it_value == operation.object_range().end()) + { + ec = jsonpatch_errc::invalid_patch; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + Json val = it_value->value(); + auto npath = jsonpatch::detail::definite_path(target,location); + + std::error_code insert_ec; + jsonpointer::add_if_absent(target,npath,val,insert_ec); // try insert without replace + if (insert_ec) // try a replace + { + std::error_code select_ec; + Json orig_val = jsonpointer::get(target,npath,select_ec); + if (select_ec) // shouldn't happen + { + ec = jsonpatch_errc::add_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + std::error_code replace_ec; + jsonpointer::replace(target,npath,val,replace_ec); + if (replace_ec) + { + ec = jsonpatch_errc::add_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + unwinder.stack.emplace_back(detail::op_type::replace,npath,orig_val); + } + else // insert without replace succeeded + { + unwinder.stack.emplace_back(detail::op_type::remove,npath,Json::null()); + } + } + else if (op ==jsoncons::jsonpatch::detail::jsonpatch_names<char_type>::remove_name()) + { + Json val = jsonpointer::get(target,location,local_ec); + if (local_ec) + { + ec = jsonpatch_errc::remove_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + jsonpointer::remove(target,location,local_ec); + if (local_ec) + { + ec = jsonpatch_errc::remove_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + unwinder.stack.emplace_back(detail::op_type::add, location, val); + } + else if (op ==jsoncons::jsonpatch::detail::jsonpatch_names<char_type>::replace_name()) + { + Json val = jsonpointer::get(target,location,local_ec); + if (local_ec) + { + ec = jsonpatch_errc::replace_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + auto it_value = operation.find(detail::jsonpatch_names<char_type>::value_name()); + if (it_value == operation.object_range().end()) + { + ec = jsonpatch_errc::invalid_patch; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + jsonpointer::replace(target, location, it_value->value(), local_ec); + if (local_ec) + { + ec = jsonpatch_errc::replace_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + unwinder.stack.emplace_back(detail::op_type::replace,location,val); + } + else if (op ==jsoncons::jsonpatch::detail::jsonpatch_names<char_type>::move_name()) + { + auto it_from = operation.find(detail::jsonpatch_names<char_type>::from_name()); + if (it_from == operation.object_range().end()) + { + ec = jsonpatch_errc::invalid_patch; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + string_type from = it_from->value().as_string(); + auto from_pointer = json_pointer_type::parse(from, local_ec); + if (local_ec) + { + ec = jsonpatch_errc::move_failed; + unwinder.state = jsoncons::jsonpatch::detail::state_type::abort; + return; + } + + Json val = jsonpointer::get(target, from_pointer, local_ec); + if (local_ec) + { + ec = jsonpatch_errc::move_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + jsonpointer::remove(target, from_pointer, local_ec); + if (local_ec) + { + ec = jsonpatch_errc::move_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + unwinder.stack.emplace_back(detail::op_type::add, from_pointer, val); + // add + std::error_code insert_ec; + auto npath = jsonpatch::detail::definite_path(target,location); + jsonpointer::add_if_absent(target,npath,val,insert_ec); // try insert without replace + if (insert_ec) // try a replace + { + std::error_code select_ec; + Json orig_val = jsonpointer::get(target,npath,select_ec); + if (select_ec) // shouldn't happen + { + ec = jsonpatch_errc::copy_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + std::error_code replace_ec; + jsonpointer::replace(target, npath, val, replace_ec); + if (replace_ec) + { + ec = jsonpatch_errc::copy_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + unwinder.stack.emplace_back(jsoncons::jsonpatch::detail::op_type::replace,npath,orig_val); + } + else + { + unwinder.stack.emplace_back(detail::op_type::remove,npath,Json::null()); + } + } + else if (op ==jsoncons::jsonpatch::detail::jsonpatch_names<char_type>::copy_name()) + { + auto it_from = operation.find(detail::jsonpatch_names<char_type>::from_name()); + if (it_from == operation.object_range().end()) + { + ec = jsonpatch_errc::invalid_patch; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + string_type from = it_from->value().as_string(); + Json val = jsonpointer::get(target,from,local_ec); + if (local_ec) + { + ec = jsonpatch_errc::copy_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + // add + auto npath = jsonpatch::detail::definite_path(target,location); + std::error_code insert_ec; + jsonpointer::add_if_absent(target,npath,val,insert_ec); // try insert without replace + if (insert_ec) // Failed, try a replace + { + std::error_code select_ec; + Json orig_val = jsonpointer::get(target,npath, select_ec); + if (select_ec) // shouldn't happen + { + ec = jsonpatch_errc::copy_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + std::error_code replace_ec; + jsonpointer::replace(target, npath, val,replace_ec); + if (replace_ec) + { + ec = jsonpatch_errc::copy_failed; + unwinder.state =jsoncons::jsonpatch::detail::state_type::abort; + return; + } + unwinder.stack.emplace_back(jsoncons::jsonpatch::detail::op_type::replace,npath,orig_val); + } + else + { + unwinder.stack.emplace_back(detail::op_type::remove,npath,Json::null()); + } + } + } + if (unwinder.state ==jsoncons::jsonpatch::detail::state_type::begin) + { + unwinder.state =jsoncons::jsonpatch::detail::state_type::commit; + } +} + +template <class Json> +Json from_diff(const Json& source, const Json& target) +{ + std::basic_string<typename Json::char_type> path; + return jsoncons::jsonpatch::detail::from_diff(source, target, path); +} + +template <class Json> +void apply_patch(Json& target, const Json& patch) +{ + std::error_code ec; + apply_patch(target, patch, ec); + if (ec) + { + JSONCONS_THROW(jsonpatch_error(ec)); + } +} + +}} + +#endif diff --git a/include/jsoncons_ext/jsonpatch/jsonpatch_error.hpp b/include/jsoncons_ext/jsonpatch/jsonpatch_error.hpp new file mode 100644 index 0000000..33f8007 --- /dev/null +++ b/include/jsoncons_ext/jsonpatch/jsonpatch_error.hpp @@ -0,0 +1,121 @@ +/// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATCH_JSONPATCH_ERROR_HPP +#define JSONCONS_JSONPATCH_JSONPATCH_ERROR_HPP + +#include <jsoncons/json_exception.hpp> +#include <system_error> + +namespace jsoncons { namespace jsonpatch { + + enum class jsonpatch_errc + { + success = 0, + invalid_patch = 1, + test_failed, + add_failed, + remove_failed, + replace_failed, + move_failed, + copy_failed + + }; + + class jsonpatch_error_category_impl + : public std::error_category + { + public: + const char* name() const noexcept override + { + return "jsoncons/jsonpatch"; + } + std::string message(int ev) const override + { + switch (static_cast<jsonpatch_errc>(ev)) + { + case jsonpatch_errc::invalid_patch: + return "Invalid JSON Patch document"; + case jsonpatch_errc::test_failed: + return "JSON Patch test operation failed"; + case jsonpatch_errc::add_failed: + return "JSON Patch add operation failed"; + case jsonpatch_errc::remove_failed: + return "JSON Patch remove operation failed"; + case jsonpatch_errc::replace_failed: + return "JSON Patch replace operation failed"; + case jsonpatch_errc::move_failed: + return "JSON Patch move operation failed"; + case jsonpatch_errc::copy_failed: + return "JSON Patch copy operation failed"; + default: + return "Unknown JSON Patch error"; + } + } + }; + + inline + const std::error_category& jsonpatch_error_category() + { + static jsonpatch_error_category_impl instance; + return instance; + } + + inline + std::error_code make_error_code(jsonpatch_errc result) + { + return std::error_code(static_cast<int>(result),jsonpatch_error_category()); + } + +} // jsonpatch +} // jsoncons + +namespace std { + template<> + struct is_error_code_enum<jsoncons::jsonpatch::jsonpatch_errc> : public true_type + { + }; +} + +namespace jsoncons { namespace jsonpatch { + +// allow to disable exceptions +#if !defined(JSONCONS_NO_EXCEPTIONS) + #define JSONCONS_THROW(exception) throw exception + #define JSONCONS_RETHROW throw + #define JSONCONS_TRY try + #define JSONCONS_CATCH(exception) catch(exception) +#else + #define JSONCONS_THROW(exception) std::terminate() + #define JSONCONS_RETHROW std::terminate() + #define JSONCONS_TRY if (true) + #define JSONCONS_CATCH(exception) if (false) +#endif + + class jsonpatch_error : public std::system_error, public virtual json_exception + { + public: + jsonpatch_error(const std::error_code& ec) + : std::system_error(ec) + { + } + + jsonpatch_error(const jsonpatch_error& other) = default; + + jsonpatch_error(jsonpatch_error&& other) = default; + + jsonpatch_error& operator=(const jsonpatch_error& e) = default; + jsonpatch_error& operator=(jsonpatch_error&& e) = default; + + const char* what() const noexcept override + { + return std::system_error::what(); + } + }; +} // jsonpatch +} // jsoncons + +#endif diff --git a/include/jsoncons_ext/jsonpath/expression.hpp b/include/jsoncons_ext/jsonpath/expression.hpp new file mode 100644 index 0000000..f655f2d --- /dev/null +++ b/include/jsoncons_ext/jsonpath/expression.hpp @@ -0,0 +1,3329 @@ +// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_EXPRESSION_HPP +#define JSONCONS_JSONPATH_EXPRESSION_HPP + +#include <string> // std::basic_string +#include <vector> // std::vector +#include <unordered_map> // std::unordered_map +#include <unordered_set> // std::unordered_set +#include <limits> // std::numeric_limits +#include <set> // std::set +#include <utility> // std::move +#if defined(JSONCONS_HAS_STD_REGEX) +#include <regex> +#endif +#include <jsoncons/json_type.hpp> +#include <jsoncons_ext/jsonpath/json_location.hpp> +#include <jsoncons_ext/jsonpath/jsonpath_error.hpp> + +namespace jsoncons { +namespace jsonpath { + + struct reference_arg_t + { + explicit reference_arg_t() = default; + }; + constexpr reference_arg_t reference_arg{}; + + struct const_reference_arg_t + { + explicit const_reference_arg_t() = default; + }; + constexpr const_reference_arg_t const_reference_arg{}; + + struct literal_arg_t + { + explicit literal_arg_t() = default; + }; + constexpr literal_arg_t literal_arg{}; + + struct end_of_expression_arg_t + { + explicit end_of_expression_arg_t() = default; + }; + constexpr end_of_expression_arg_t end_of_expression_arg{}; + + struct separator_arg_t + { + explicit separator_arg_t() = default; + }; + constexpr separator_arg_t separator_arg{}; + + struct lparen_arg_t + { + explicit lparen_arg_t() = default; + }; + constexpr lparen_arg_t lparen_arg{}; + + struct rparen_arg_t + { + explicit rparen_arg_t() = default; + }; + constexpr rparen_arg_t rparen_arg{}; + + struct begin_union_arg_t + { + explicit begin_union_arg_t() = default; + }; + constexpr begin_union_arg_t begin_union_arg{}; + + struct end_union_arg_t + { + explicit end_union_arg_t() = default; + }; + constexpr end_union_arg_t end_union_arg{}; + + struct begin_filter_arg_t + { + explicit begin_filter_arg_t() = default; + }; + constexpr begin_filter_arg_t begin_filter_arg{}; + + struct end_filter_arg_t + { + explicit end_filter_arg_t() = default; + }; + constexpr end_filter_arg_t end_filter_arg{}; + + struct begin_expression_arg_t + { + explicit begin_expression_arg_t() = default; + }; + constexpr begin_expression_arg_t begin_expression_arg{}; + + struct end_index_expression_arg_t + { + explicit end_index_expression_arg_t() = default; + }; + constexpr end_index_expression_arg_t end_index_expression_arg{}; + + struct end_argument_expression_arg_t + { + explicit end_argument_expression_arg_t() = default; + }; + constexpr end_argument_expression_arg_t end_argument_expression_arg{}; + + struct current_node_arg_t + { + explicit current_node_arg_t() = default; + }; + constexpr current_node_arg_t current_node_arg{}; + + struct root_node_arg_t + { + explicit root_node_arg_t() = default; + }; + constexpr root_node_arg_t root_node_arg{}; + + struct end_function_arg_t + { + explicit end_function_arg_t() = default; + }; + constexpr end_function_arg_t end_function_arg{}; + + struct argument_arg_t + { + explicit argument_arg_t() = default; + }; + constexpr argument_arg_t argument_arg{}; + + enum class result_options {value=0, nodups=1, sort=2, path=4}; + + using result_type = result_options; + + inline result_options operator~(result_options a) + { + return static_cast<result_options>(~static_cast<unsigned int>(a)); + } + + inline result_options operator&(result_options a, result_options b) + { + return static_cast<result_options>(static_cast<unsigned int>(a) & static_cast<unsigned int>(b)); + } + + inline result_options operator^(result_options a, result_options b) + { + return static_cast<result_options>(static_cast<unsigned int>(a) ^ static_cast<unsigned int>(b)); + } + + inline result_options operator|(result_options a, result_options b) + { + return static_cast<result_options>(static_cast<unsigned int>(a) | static_cast<unsigned int>(b)); + } + + inline result_options operator&=(result_options& a, result_options b) + { + a = a & b; + return a; + } + + inline result_options operator^=(result_options& a, result_options b) + { + a = a ^ b; + return a; + } + + inline result_options operator|=(result_options& a, result_options b) + { + a = a | b; + return a; + } + + template <class Json> + class parameter; + + template <class Json,class JsonReference> + class value_or_pointer + { + public: + friend class parameter<Json>; + using value_type = Json; + using reference = JsonReference; + using pointer = typename std::conditional<std::is_const<typename std::remove_reference<reference>::type>::value,typename Json::const_pointer,typename Json::pointer>::type; + private: + bool is_value_; + union + { + value_type val_; + pointer ptr_; + }; + public: + value_or_pointer(value_type&& val) + : is_value_(true), val_(std::move(val)) + { + } + + value_or_pointer(pointer ptr) + : is_value_(false), ptr_(std::move(ptr)) + { + } + + value_or_pointer(value_or_pointer&& other) noexcept + : is_value_(other.is_value_) + { + if (is_value_) + { + new(&val_)value_type(std::move(other.val_)); + } + else + { + ptr_ = other.ptr_; + } + } + + ~value_or_pointer() noexcept + { + if (is_value_) + { + val_.~value_type(); + } + } + + value_or_pointer& operator=(value_or_pointer&& other) noexcept + { + if (is_value_) + { + val_.~value_type(); + } + is_value_ = other.is_value_; + + if (is_value_) + { + new(&val_)value_type(std::move(other.val_)); + } + else + { + ptr_ = other.ptr_; + } + return *this; + } + + reference value() + { + return is_value_ ? val_ : *ptr_; + } + + pointer ptr() + { + return is_value_ ? &val_ : ptr_; + } + }; + + template <class Json> + class parameter + { + using value_type = Json; + using reference = const Json&; + using pointer = const Json*; + private: + value_or_pointer<Json,reference> data_; + public: + template <class JsonReference> + parameter(value_or_pointer<Json,JsonReference>&& data) noexcept + : data_(nullptr) + { + data_.is_value_ = data.is_value_; + if (data.is_value_) + { + data_.val_ = std::move(data.val_); + } + else + { + data_.ptr_ = data.ptr_; + } + } + + parameter(parameter&& other) noexcept = default; + + parameter& operator=(parameter&& other) noexcept = default; + + const Json& value() const + { + return data_.is_value_ ? data_.val_ : *data_.ptr_; + } + }; + + template <class Json> + class custom_function + { + public: + using value_type = Json; + using char_type = typename Json::char_type; + using parameter_type = parameter<Json>; + using function_type = std::function<value_type(jsoncons::span<const parameter_type>, std::error_code& ec)>; + using string_type = std::basic_string<char_type>; + + string_type function_name_; + optional<std::size_t> arity_; + function_type f_; + + custom_function(const string_type& function_name, + const optional<std::size_t>& arity, + const function_type& f) + : function_name_(function_name), + arity_(arity), + f_(f) + { + } + + custom_function(string_type&& function_name, + optional<std::size_t>&& arity, + function_type&& f) + : function_name_(std::move(function_name)), + arity_(std::move(arity)), + f_(std::move(f)) + { + } + + custom_function(const custom_function&) = default; + + custom_function(custom_function&&) = default; + + const string_type& name() const + { + return function_name_; + } + + optional<std::size_t> arity() const + { + return arity_; + } + + const function_type& function() const + { + return f_; + } + }; + + template <class Json> + class custom_functions + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + using value_type = Json; + using parameter_type = parameter<Json>; + using function_type = std::function<value_type(jsoncons::span<const parameter_type>, std::error_code& ec)>; + using const_iterator = typename std::vector<custom_function<Json>>::const_iterator; + + std::vector<custom_function<Json>> functions_; + public: + void register_function(const string_type& name, + jsoncons::optional<std::size_t> arity, + const function_type& f) + { + functions_.emplace_back(name, arity, f); + } + + const_iterator begin() const + { + return functions_.begin(); + } + + const_iterator end() const + { + return functions_.end(); + } + }; + +namespace detail { + + template <class Json,class JsonReference> + class dynamic_resources; + + template <class Json,class JsonReference> + struct unary_operator + { + std::size_t precedence_level_; + bool is_right_associative_; + + unary_operator(std::size_t precedence_level, + bool is_right_associative) + : precedence_level_(precedence_level), + is_right_associative_(is_right_associative) + { + } + + virtual ~unary_operator() = default; + + std::size_t precedence_level() const + { + return precedence_level_; + } + bool is_right_associative() const + { + return is_right_associative_; + } + + virtual Json evaluate(JsonReference, + std::error_code&) const = 0; + }; + + template <class Json> + bool is_false(const Json& val) + { + return ((val.is_array() && val.empty()) || + (val.is_object() && val.empty()) || + (val.is_string() && val.as_string_view().empty()) || + (val.is_bool() && !val.as_bool()) || + val.is_null()); + } + + template <class Json> + bool is_true(const Json& val) + { + return !is_false(val); + } + + template <class Json,class JsonReference> + class unary_not_operator final : public unary_operator<Json,JsonReference> + { + public: + unary_not_operator() + : unary_operator<Json,JsonReference>(1, true) + {} + + Json evaluate(JsonReference val, + std::error_code&) const override + { + return is_false(val) ? Json(true) : Json(false); + } + }; + + template <class Json,class JsonReference> + class unary_minus_operator final : public unary_operator<Json,JsonReference> + { + public: + unary_minus_operator() + : unary_operator<Json,JsonReference>(1, true) + {} + + Json evaluate(JsonReference val, + std::error_code&) const override + { + if (val.is_int64()) + { + return Json(-val.template as<int64_t>()); + } + else if (val.is_double()) + { + return Json(-val.as_double()); + } + else + { + return Json::null(); + } + } + }; + + template <class Json,class JsonReference> + class regex_operator final : public unary_operator<Json,JsonReference> + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + std::basic_regex<char_type> pattern_; + public: + regex_operator(std::basic_regex<char_type>&& pattern) + : unary_operator<Json,JsonReference>(2, true), + pattern_(std::move(pattern)) + { + } + + regex_operator(regex_operator&&) = default; + regex_operator& operator=(regex_operator&&) = default; + + Json evaluate(JsonReference val, + std::error_code&) const override + { + if (!val.is_string()) + { + return Json::null(); + } + return std::regex_search(val.as_string(), pattern_) ? Json(true) : Json(false); + } + }; + + template <class Json,class JsonReference> + struct binary_operator + { + std::size_t precedence_level_; + bool is_right_associative_; + + binary_operator(std::size_t precedence_level, + bool is_right_associative = false) + : precedence_level_(precedence_level), + is_right_associative_(is_right_associative) + { + } + + std::size_t precedence_level() const + { + return precedence_level_; + } + bool is_right_associative() const + { + return is_right_associative_; + } + + virtual Json evaluate(JsonReference, + JsonReference, + + std::error_code&) const = 0; + + virtual std::string to_string(int = 0) const + { + return "binary operator"; + } + + protected: + ~binary_operator() = default; + }; + + // Implementations + + template <class Json,class JsonReference> + class or_operator final : public binary_operator<Json,JsonReference> + { + public: + or_operator() + : binary_operator<Json,JsonReference>(9) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + if (lhs.is_null() && rhs.is_null()) + { + return Json::null(); + } + if (!is_false(lhs)) + { + return lhs; + } + else + { + return rhs; + } + } + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + //s.append("\n"); + s.append(level*2, ' '); + } + s.append("or operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class and_operator final : public binary_operator<Json,JsonReference> + { + public: + and_operator() + : binary_operator<Json,JsonReference>(8) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + if (is_true(lhs)) + { + return rhs; + } + else + { + return lhs; + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("and operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class eq_operator final : public binary_operator<Json,JsonReference> + { + public: + eq_operator() + : binary_operator<Json,JsonReference>(6) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + return lhs == rhs ? Json(true) : Json(false); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("equal operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class ne_operator final : public binary_operator<Json,JsonReference> + { + public: + ne_operator() + : binary_operator<Json,JsonReference>(6) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + return lhs != rhs ? Json(true) : Json(false); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("not equal operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class lt_operator final : public binary_operator<Json,JsonReference> + { + public: + lt_operator() + : binary_operator<Json,JsonReference>(5) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + if (lhs.is_number() && rhs.is_number()) + { + return lhs < rhs ? Json(true) : Json(false); + } + else if (lhs.is_string() && rhs.is_string()) + { + return lhs < rhs ? Json(true) : Json(false); + } + return Json::null(); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("less than operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class lte_operator final : public binary_operator<Json,JsonReference> + { + public: + lte_operator() + : binary_operator<Json,JsonReference>(5) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + if (lhs.is_number() && rhs.is_number()) + { + return lhs <= rhs ? Json(true) : Json(false); + } + else if (lhs.is_string() && rhs.is_string()) + { + return lhs <= rhs ? Json(true) : Json(false); + } + return Json::null(); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("less than or equal operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class gt_operator final : public binary_operator<Json,JsonReference> + { + public: + gt_operator() + : binary_operator<Json,JsonReference>(5) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + //std::cout << "operator> lhs: " << lhs << ", rhs: " << rhs << "\n"; + + if (lhs.is_number() && rhs.is_number()) + { + return lhs > rhs ? Json(true) : Json(false); + } + else if (lhs.is_string() && rhs.is_string()) + { + return lhs > rhs ? Json(true) : Json(false); + } + return Json::null(); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("greater than operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class gte_operator final : public binary_operator<Json,JsonReference> + { + public: + gte_operator() + : binary_operator<Json,JsonReference>(5) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + if (lhs.is_number() && rhs.is_number()) + { + return lhs >= rhs ? Json(true) : Json(false); + } + else if (lhs.is_string() && rhs.is_string()) + { + return lhs >= rhs ? Json(true) : Json(false); + } + return Json::null(); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("greater than or equal operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class plus_operator final : public binary_operator<Json,JsonReference> + { + public: + plus_operator() + : binary_operator<Json,JsonReference>(4) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + if (!(lhs.is_number() && rhs.is_number())) + { + return Json::null(); + } + else if (lhs.is_int64() && rhs.is_int64()) + { + return Json(((lhs.template as<int64_t>() + rhs.template as<int64_t>()))); + } + else if (lhs.is_uint64() && rhs.is_uint64()) + { + return Json((lhs.template as<uint64_t>() + rhs.template as<uint64_t>())); + } + else + { + return Json((lhs.as_double() + rhs.as_double())); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("plus operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class minus_operator final : public binary_operator<Json,JsonReference> + { + public: + minus_operator() + : binary_operator<Json,JsonReference>(4) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + if (!(lhs.is_number() && rhs.is_number())) + { + return Json::null(); + } + else if (lhs.is_int64() && rhs.is_int64()) + { + return Json(((lhs.template as<int64_t>() - rhs.template as<int64_t>()))); + } + else if (lhs.is_uint64() && rhs.is_uint64()) + { + return Json((lhs.template as<uint64_t>() - rhs.template as<uint64_t>())); + } + else + { + return Json((lhs.as_double() - rhs.as_double())); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("minus operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class mult_operator final : public binary_operator<Json,JsonReference> + { + public: + mult_operator() + : binary_operator<Json,JsonReference>(3) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + if (!(lhs.is_number() && rhs.is_number())) + { + return Json::null(); + } + else if (lhs.is_int64() && rhs.is_int64()) + { + return Json(((lhs.template as<int64_t>() * rhs.template as<int64_t>()))); + } + else if (lhs.is_uint64() && rhs.is_uint64()) + { + return Json((lhs.template as<uint64_t>() * rhs.template as<uint64_t>())); + } + else + { + return Json((lhs.as_double() * rhs.as_double())); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("multiply operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class div_operator final : public binary_operator<Json,JsonReference> + { + public: + div_operator() + : binary_operator<Json,JsonReference>(3) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + //std::cout << "operator/ lhs: " << lhs << ", rhs: " << rhs << "\n"; + + if (!(lhs.is_number() && rhs.is_number())) + { + return Json::null(); + } + else if (lhs.is_int64() && rhs.is_int64()) + { + return Json(((lhs.template as<int64_t>() / rhs.template as<int64_t>()))); + } + else if (lhs.is_uint64() && rhs.is_uint64()) + { + return Json((lhs.template as<uint64_t>() / rhs.template as<uint64_t>())); + } + else + { + return Json((lhs.as_double() / rhs.as_double())); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("divide operator"); + return s; + } + }; + + template <class Json,class JsonReference> + class modulus_operator final : public binary_operator<Json,JsonReference> + { + public: + modulus_operator() + : binary_operator<Json,JsonReference>(3) + { + } + + Json evaluate(JsonReference lhs, JsonReference rhs, std::error_code&) const override + { + //std::cout << "operator/ lhs: " << lhs << ", rhs: " << rhs << "\n"; + + if (!(lhs.is_number() && rhs.is_number())) + { + return Json::null(); + } + else if (lhs.is_int64() && rhs.is_int64()) + { + return Json(((lhs.template as<int64_t>() % rhs.template as<int64_t>()))); + } + else if (lhs.is_uint64() && rhs.is_uint64()) + { + return Json((lhs.template as<uint64_t>() % rhs.template as<uint64_t>())); + } + else + { + return Json(fmod(lhs.as_double(), rhs.as_double())); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("modulus operator"); + return s; + } + }; + + // function_base + template <class Json> + class function_base + { + jsoncons::optional<std::size_t> arg_count_; + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + function_base(jsoncons::optional<std::size_t> arg_count) + : arg_count_(arg_count) + { + } + + virtual ~function_base() noexcept = default; + + jsoncons::optional<std::size_t> arity() const + { + return arg_count_; + } + + virtual value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const = 0; + + virtual std::string to_string(int level = 0) const + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("function"); + return s; + } + }; + + template <class Json> + class decorator_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + using string_view_type = typename Json::string_view_type; + using function_type = std::function<value_type(jsoncons::span<const parameter_type>, std::error_code& ec)>; + private: + function_type f_; + public: + decorator_function(jsoncons::optional<std::size_t> arity, + const function_type& f) + : function_base<Json>(arity), f_(f) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + return f_(args, ec); + } + }; + + template <class Json> + class contains_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + using string_view_type = typename Json::string_view_type; + + contains_function() + : function_base<Json>(2) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + auto arg1= args[1].value(); + + switch (arg0.type()) + { + case json_type::array_value: + for (auto& j : arg0.array_range()) + { + if (j == arg1) + { + return value_type(true); + } + } + return value_type(false); + case json_type::string_value: + { + if (!arg1.is_string()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + auto sv0 = arg0.template as<string_view_type>(); + auto sv1 = arg1.template as<string_view_type>(); + return sv0.find(sv1) != string_view_type::npos ? value_type(true) : value_type(false); + } + default: + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("contains function"); + return s; + } + }; + + template <class Json> + class ends_with_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + using string_view_type = typename Json::string_view_type; + + ends_with_function() + : function_base<Json>(2) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + if (!arg0.is_string()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + + auto arg1= args[1].value(); + if (!arg1.is_string()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + + auto sv0 = arg0.template as<string_view_type>(); + auto sv1 = arg1.template as<string_view_type>(); + + if (sv1.length() <= sv0.length() && sv1 == sv0.substr(sv0.length() - sv1.length())) + { + return value_type(true); + } + else + { + return value_type(false); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("ends_with function"); + return s; + } + }; + + template <class Json> + class starts_with_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + using string_view_type = typename Json::string_view_type; + + starts_with_function() + : function_base<Json>(2) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + if (!arg0.is_string()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + + auto arg1= args[1].value(); + if (!arg1.is_string()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + + auto sv0 = arg0.template as<string_view_type>(); + auto sv1 = arg1.template as<string_view_type>(); + + if (sv1.length() <= sv0.length() && sv1 == sv0.substr(0, sv1.length())) + { + return value_type(true); + } + else + { + return value_type(false); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("starts_with function"); + return s; + } + }; + + template <class Json> + class sum_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + sum_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + if (!arg0.is_array()) + { + //std::cout << "arg: " << arg0 << "\n"; + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + //std::cout << "sum function arg: " << arg0 << "\n"; + + double sum = 0; + for (auto& j : arg0.array_range()) + { + if (!j.is_number()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + sum += j.template as<double>(); + } + + return value_type(sum); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("sum function"); + return s; + } + }; + +#if defined(JSONCONS_HAS_STD_REGEX) + + template <class Json> + class tokenize_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + + tokenize_function() + : function_base<Json>(2) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + if (!args[0].value().is_string() || !args[1].value().is_string()) + { + //std::cout << "arg: " << arg0 << "\n"; + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + auto arg0 = args[0].value().template as<string_type>(); + auto arg1 = args[1].value().template as<string_type>(); + + std::regex::flag_type options = std::regex_constants::ECMAScript; + std::basic_regex<char_type> pieces_regex(arg1, options); + + std::regex_token_iterator<typename string_type::const_iterator> rit ( arg0.begin(), arg0.end(), pieces_regex, -1); + std::regex_token_iterator<typename string_type::const_iterator> rend; + + value_type j(json_array_arg); + while (rit != rend) + { + j.emplace_back(rit->str()); + ++rit; + } + return j; + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("tokenize function"); + return s; + } + }; + +#endif // defined(JSONCONS_HAS_STD_REGEX) + + template <class Json> + class ceil_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + ceil_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + switch (arg0.type()) + { + case json_type::uint64_value: + case json_type::int64_value: + { + return value_type(arg0.template as<double>()); + } + case json_type::double_value: + { + return value_type(std::ceil(arg0.template as<double>())); + } + default: + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("ceil function"); + return s; + } + }; + + template <class Json> + class floor_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + floor_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + switch (arg0.type()) + { + case json_type::uint64_value: + case json_type::int64_value: + { + return value_type(arg0.template as<double>()); + } + case json_type::double_value: + { + return value_type(std::floor(arg0.template as<double>())); + } + default: + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("floor function"); + return s; + } + }; + + template <class Json> + class to_number_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + to_number_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + switch (arg0.type()) + { + case json_type::int64_value: + case json_type::uint64_value: + case json_type::double_value: + return arg0; + case json_type::string_value: + { + auto sv = arg0.as_string_view(); + uint64_t un{0}; + auto result1 = jsoncons::detail::to_integer(sv.data(), sv.length(), un); + if (result1) + { + return value_type(un); + } + int64_t sn{0}; + auto result2 = jsoncons::detail::to_integer(sv.data(), sv.length(), sn); + if (result2) + { + return value_type(sn); + } + jsoncons::detail::chars_to to_double; + try + { + auto s = arg0.as_string(); + double d = to_double(s.c_str(), s.length()); + return value_type(d); + } + catch (const std::exception&) + { + return value_type::null(); + } + } + default: + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("to_number function"); + return s; + } + }; + + template <class Json> + class prod_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + prod_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + if (!arg0.is_array() || arg0.empty()) + { + //std::cout << "arg: " << arg0 << "\n"; + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + double prod = 1; + for (auto& j : arg0.array_range()) + { + if (!j.is_number()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + prod *= j.template as<double>(); + } + + return value_type(prod); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("prod function"); + return s; + } + }; + + template <class Json> + class avg_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + avg_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + if (!arg0.is_array()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + if (arg0.empty()) + { + return value_type::null(); + } + double sum = 0; + for (auto& j : arg0.array_range()) + { + if (!j.is_number()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + sum += j.template as<double>(); + } + + return value_type(sum / static_cast<double>(arg0.size())); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("to_string function"); + return s; + } + }; + + template <class Json> + class min_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + min_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + if (!arg0.is_array()) + { + //std::cout << "arg: " << arg0 << "\n"; + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + if (arg0.empty()) + { + return value_type::null(); + } + bool is_number = arg0.at(0).is_number(); + bool is_string = arg0.at(0).is_string(); + if (!is_number && !is_string) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + + std::size_t index = 0; + for (std::size_t i = 1; i < arg0.size(); ++i) + { + if (!(arg0.at(i).is_number() == is_number && arg0.at(i).is_string() == is_string)) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + if (arg0.at(i) < arg0.at(index)) + { + index = i; + } + } + + return arg0.at(index); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("min function"); + return s; + } + }; + + template <class Json> + class max_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + max_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + if (!arg0.is_array()) + { + //std::cout << "arg: " << arg0 << "\n"; + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + if (arg0.empty()) + { + return value_type::null(); + } + + bool is_number = arg0.at(0).is_number(); + bool is_string = arg0.at(0).is_string(); + if (!is_number && !is_string) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + + std::size_t index = 0; + for (std::size_t i = 1; i < arg0.size(); ++i) + { + if (!(arg0.at(i).is_number() == is_number && arg0.at(i).is_string() == is_string)) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + if (arg0.at(i) > arg0.at(index)) + { + index = i; + } + } + + return arg0.at(index); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("max function"); + return s; + } + }; + + template <class Json> + class abs_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + + abs_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + switch (arg0.type()) + { + case json_type::uint64_value: + return arg0; + case json_type::int64_value: + { + return arg0.template as<int64_t>() >= 0 ? arg0 : value_type(std::abs(arg0.template as<int64_t>())); + } + case json_type::double_value: + { + return arg0.template as<double>() >= 0 ? arg0 : value_type(std::abs(arg0.template as<double>())); + } + default: + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("abs function"); + return s; + } + }; + + template <class Json> + class length_function : public function_base<Json> + { + public: + using value_type = Json; + using string_view_type = typename Json::string_view_type; + using parameter_type = parameter<Json>; + + length_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + //std::cout << "length function arg: " << arg0 << "\n"; + + switch (arg0.type()) + { + case json_type::object_value: + case json_type::array_value: + return value_type(arg0.size()); + case json_type::string_value: + { + auto sv0 = arg0.template as<string_view_type>(); + auto length = unicode_traits::count_codepoints(sv0.data(), sv0.size()); + return value_type(length); + } + default: + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("length function"); + return s; + } + }; + + template <class Json> + class keys_function : public function_base<Json> + { + public: + using value_type = Json; + using parameter_type = parameter<Json>; + using string_view_type = typename Json::string_view_type; + + keys_function() + : function_base<Json>(1) + { + } + + value_type evaluate(const std::vector<parameter_type>& args, + std::error_code& ec) const override + { + if (args.size() != *this->arity()) + { + ec = jsonpath_errc::invalid_arity; + return value_type::null(); + } + + auto arg0= args[0].value(); + if (!arg0.is_object()) + { + ec = jsonpath_errc::invalid_type; + return value_type::null(); + } + + value_type result(json_array_arg); + result.reserve(args.size()); + + for (auto& item : arg0.object_range()) + { + result.emplace_back(item.key()); + } + return result; + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("keys function"); + return s; + } + }; + + enum class jsonpath_token_kind + { + root_node, + current_node, + expression, + lparen, + rparen, + begin_union, + end_union, + begin_filter, + end_filter, + begin_expression, + end_index_expression, + end_argument_expression, + separator, + literal, + selector, + function, + end_function, + argument, + unary_operator, + binary_operator + }; + + inline + std::string to_string(jsonpath_token_kind kind) + { + switch (kind) + { + case jsonpath_token_kind::root_node: + return "root_node"; + case jsonpath_token_kind::current_node: + return "current_node"; + case jsonpath_token_kind::lparen: + return "lparen"; + case jsonpath_token_kind::rparen: + return "rparen"; + case jsonpath_token_kind::begin_union: + return "begin_union"; + case jsonpath_token_kind::end_union: + return "end_union"; + case jsonpath_token_kind::begin_filter: + return "begin_filter"; + case jsonpath_token_kind::end_filter: + return "end_filter"; + case jsonpath_token_kind::begin_expression: + return "begin_expression"; + case jsonpath_token_kind::end_index_expression: + return "end_index_expression"; + case jsonpath_token_kind::end_argument_expression: + return "end_argument_expression"; + case jsonpath_token_kind::separator: + return "separator"; + case jsonpath_token_kind::literal: + return "literal"; + case jsonpath_token_kind::selector: + return "selector"; + case jsonpath_token_kind::function: + return "function"; + case jsonpath_token_kind::end_function: + return "end_function"; + case jsonpath_token_kind::argument: + return "argument"; + case jsonpath_token_kind::unary_operator: + return "unary_operator"; + case jsonpath_token_kind::binary_operator: + return "binary_operator"; + default: + return ""; + } + } + + template <class Json,class JsonReference> + struct path_value_pair + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type,std::char_traits<char_type>>; + using value_type = Json; + using reference = JsonReference; + using value_pointer = typename std::conditional<std::is_const<typename std::remove_reference<JsonReference>::type>::value,typename Json::const_pointer,typename Json::pointer>::type; + using json_location_node_type = json_location_node<char_type>; + using json_location_type = json_location<char_type>; + using path_pointer = const json_location_node_type*; + + json_location_type path_; + value_pointer value_ptr_; + + path_value_pair(const json_location_type& path, reference value) noexcept + : path_(path), value_ptr_(std::addressof(value)) + { + } + + path_value_pair(json_location_type&& path, value_pointer valp) noexcept + : path_(std::move(path)), value_ptr_(valp) + { + } + + path_value_pair(const path_value_pair&) = default; + path_value_pair(path_value_pair&& other) = default; + path_value_pair& operator=(const path_value_pair&) = default; + path_value_pair& operator=(path_value_pair&& other) = default; + + json_location_type path() const + { + return path_; + } + + reference value() + { + return *value_ptr_; + } + }; + + template <class Json,class JsonReference> + struct path_value_pair_less + { + bool operator()(const path_value_pair<Json,JsonReference>& lhs, + const path_value_pair<Json,JsonReference>& rhs) const noexcept + { + return lhs.path() < rhs.path(); + } + }; + + template <class Json,class JsonReference> + struct path_value_pair_equal + { + bool operator()(const path_value_pair<Json,JsonReference>& lhs, + const path_value_pair<Json,JsonReference>& rhs) const noexcept + { + return lhs.path() == rhs.path(); + } + }; + + template <class Json,class JsonReference> + struct path_component_value_pair + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type,std::char_traits<char_type>>; + using value_type = Json; + using reference = JsonReference; + using value_pointer = typename std::conditional<std::is_const<typename std::remove_reference<JsonReference>::type>::value,typename Json::const_pointer,typename Json::pointer>::type; + using json_location_node_type = json_location_node<char_type>; + using json_location_type = json_location<char_type>; + using path_pointer = const json_location_node_type*; + private: + const json_location_node_type* last_ptr_; + value_pointer value_ptr_; + public: + path_component_value_pair(const json_location_node_type& last, reference value) noexcept + : last_ptr_(std::addressof(last)), value_ptr_(std::addressof(value)) + { + } + + const json_location_node_type& last() const + { + return *last_ptr_; + } + + reference value() const + { + return *value_ptr_; + } + }; + + template <class Json,class JsonReference> + class node_receiver + { + public: + using char_type = typename Json::char_type; + using reference = JsonReference; + using json_location_node_type = json_location_node<char_type>; + + virtual ~node_receiver() noexcept = default; + + virtual void add(const json_location_node_type& path_tail, + reference value) = 0; + }; + + template <class Json,class JsonReference> + class path_value_receiver : public node_receiver<Json,JsonReference> + { + public: + using reference = JsonReference; + using char_type = typename Json::char_type; + using json_location_node_type = json_location_node<char_type>; + using json_location_type = json_location<char_type>; + using path_value_pair_type = path_value_pair<Json,JsonReference>; + + std::vector<path_value_pair_type> nodes; + + void add(const json_location_node_type& path_tail, + reference value) override + { + nodes.emplace_back(json_location_type(path_tail), std::addressof(value)); + } + }; + + template <class Json,class JsonReference> + class path_stem_value_receiver : public node_receiver<Json,JsonReference> + { + public: + using reference = JsonReference; + using char_type = typename Json::char_type; + using json_location_node_type = json_location_node<char_type>; + using path_stem_value_pair_type = path_component_value_pair<Json,JsonReference>; + + std::vector<path_stem_value_pair_type> nodes; + + void add(const json_location_node_type& path_tail, + reference value) override + { + nodes.emplace_back(path_tail, value); + } + }; + + template <class Json, class JsonReference> + class dynamic_resources + { + using reference = JsonReference; + using pointer = typename std::conditional<std::is_const<typename std::remove_reference<reference>::type>::value,typename Json::const_pointer,typename Json::pointer>::type; + using json_location_node_type = json_location_node<typename Json::char_type>; + using path_stem_value_pair_type = path_component_value_pair<Json,JsonReference>; + std::vector<std::unique_ptr<Json>> temp_json_values_; + std::vector<std::unique_ptr<json_location_node_type>> temp_path_node_values_; + std::unordered_map<std::size_t,pointer> cache_; + public: + bool is_cached(std::size_t id) const + { + return cache_.find(id) != cache_.end(); + } + void add_to_cache(std::size_t id, reference val) + { + cache_.emplace(id, std::addressof(val)); + } + reference retrieve_from_cache(std::size_t id) + { + return *cache_[id]; + } + + reference null_value() + { + static Json j{ null_type{} }; + return j; + } + + template <typename... Args> + Json* create_json(Args&& ... args) + { + auto temp = jsoncons::make_unique<Json>(std::forward<Args>(args)...); + Json* ptr = temp.get(); + temp_json_values_.emplace_back(std::move(temp)); + return ptr; + } + + const json_location_node_type& root_path_node() const + { + static json_location_node_type root('$'); + return root; + } + + const json_location_node_type& current_path_node() const + { + static json_location_node_type root('@'); + return root; + } + + template <typename... Args> + const json_location_node_type* create_path_node(Args&& ... args) + { + auto temp = jsoncons::make_unique<json_location_node_type>(std::forward<Args>(args)...); + json_location_node_type* ptr = temp.get(); + temp_path_node_values_.emplace_back(std::move(temp)); + return ptr; + } + }; + + template <class Json,class JsonReference> + struct node_less + { + bool operator()(const path_value_pair<Json,JsonReference>& a, const path_value_pair<Json,JsonReference>& b) const + { + return *(a.ptr) < *(b.ptr); + } + }; + + template <class Json,class JsonReference> + class jsonpath_selector + { + bool is_path_; + std::size_t precedence_level_; + + public: + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type,std::char_traits<char_type>>; + using string_view_type = jsoncons::basic_string_view<char_type, std::char_traits<char_type>>; + using value_type = Json; + using reference = JsonReference; + using pointer = typename std::conditional<std::is_const<typename std::remove_reference<JsonReference>::type>::value,typename Json::const_pointer,typename Json::pointer>::type; + using path_value_pair_type = path_value_pair<Json,JsonReference>; + using json_location_node_type = json_location_node<char_type>; + using json_location_type = json_location<char_type>; + using node_receiver_type = node_receiver<Json,JsonReference>; + using selector_type = jsonpath_selector<Json,JsonReference>; + + jsonpath_selector(bool is_path, + std::size_t precedence_level = 0) + : is_path_(is_path), + precedence_level_(precedence_level) + { + } + + virtual ~jsonpath_selector() noexcept = default; + + bool is_path() const + { + return is_path_; + } + + std::size_t precedence_level() const + { + return precedence_level_; + } + + bool is_right_associative() const + { + return true; + } + + virtual void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& path_tail, + reference val, + node_receiver_type& receiver, + result_options options) const = 0; + + virtual reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& path_tail, + reference current, + result_options options, + std::error_code& ec) const = 0; + + virtual void append_selector(jsonpath_selector*) + { + } + + virtual std::string to_string(int = 0) const + { + return std::string(); + } + }; + + template <class Json, class JsonReference> + struct static_resources + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + using value_type = Json; + using reference = JsonReference; + using function_base_type = function_base<Json>; + using selector_type = jsonpath_selector<Json,JsonReference>; + + std::vector<std::unique_ptr<selector_type>> selectors_; + std::vector<std::unique_ptr<Json>> temp_json_values_; + std::vector<std::unique_ptr<unary_operator<Json,JsonReference>>> unary_operators_; + std::unordered_map<string_type,std::unique_ptr<function_base_type>> custom_functions_; + + static_resources() + { + } + + static_resources(const custom_functions<Json>& functions) + { + for (const auto& item : functions) + { + custom_functions_.emplace(item.name(), + jsoncons::make_unique<decorator_function<Json>>(item.arity(),item.function())); + } + } + + static_resources(const static_resources&) = default; + + static_resources(static_resources&& other) noexcept + : selectors_(std::move(other.selectors_)), + temp_json_values_(std::move(other.temp_json_values_)), + unary_operators_(std::move(other.unary_operators_)), + custom_functions_(std::move(other.custom_functions_)) + { + } + + const function_base_type* get_function(const string_type& name, std::error_code& ec) const + { + static abs_function<Json> abs_func; + static contains_function<Json> contains_func; + static starts_with_function<Json> starts_with_func; + static ends_with_function<Json> ends_with_func; + static ceil_function<Json> ceil_func; + static floor_function<Json> floor_func; + static to_number_function<Json> to_number_func; + static sum_function<Json> sum_func; + static prod_function<Json> prod_func; + static avg_function<Json> avg_func; + static min_function<Json> min_func; + static max_function<Json> max_func; + static length_function<Json> length_func; + static keys_function<Json> keys_func; +#if defined(JSONCONS_HAS_STD_REGEX) + static tokenize_function<Json> tokenize_func; +#endif + + static std::unordered_map<string_type,const function_base_type*> functions = + { + {string_type{'a','b','s'}, &abs_func}, + {string_type{'c','o','n','t','a','i','n','s'}, &contains_func}, + {string_type{'s','t','a','r','t','s','_','w','i','t','h'}, &starts_with_func}, + {string_type{'e','n','d','s','_','w','i','t','h'}, &ends_with_func}, + {string_type{'c','e','i','l'}, &ceil_func}, + {string_type{'f','l','o','o','r'}, &floor_func}, + {string_type{'t','o','_','n','u','m','b','e','r'}, &to_number_func}, + {string_type{'s','u','m'}, &sum_func}, + {string_type{'p','r','o', 'd'}, &prod_func}, + {string_type{'a','v','g'}, &avg_func}, + {string_type{'m','i','n'}, &min_func}, + {string_type{'m','a','x'}, &max_func}, + {string_type{'l','e','n','g','t','h'}, &length_func}, + {string_type{'k','e','y','s'}, &keys_func}, +#if defined(JSONCONS_HAS_STD_REGEX) + {string_type{'t','o','k','e','n','i','z','e'}, &tokenize_func}, +#endif + {string_type{'c','o','u','n','t'}, &length_func} + }; + + auto it = functions.find(name); + if (it == functions.end()) + { + auto it2 = custom_functions_.find(name); + if (it2 == custom_functions_.end()) + { + ec = jsonpath_errc::unknown_function; + return nullptr; + } + else + { + return it2->second.get(); + } + } + else + { + return it->second; + } + } + + const unary_operator<Json,JsonReference>* get_unary_not() const + { + static unary_not_operator<Json,JsonReference> oper; + return &oper; + } + + const unary_operator<Json,JsonReference>* get_unary_minus() const + { + static unary_minus_operator<Json,JsonReference> oper; + return &oper; + } + + const unary_operator<Json,JsonReference>* get_regex_operator(std::basic_regex<char_type>&& pattern) + { + unary_operators_.push_back(jsoncons::make_unique<regex_operator<Json,JsonReference>>(std::move(pattern))); + return unary_operators_.back().get(); + } + + const binary_operator<Json,JsonReference>* get_or_operator() const + { + static or_operator<Json,JsonReference> oper; + + return &oper; + } + + const binary_operator<Json,JsonReference>* get_and_operator() const + { + static and_operator<Json,JsonReference> oper; + + return &oper; + } + + const binary_operator<Json,JsonReference>* get_eq_operator() const + { + static eq_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_ne_operator() const + { + static ne_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_lt_operator() const + { + static lt_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_lte_operator() const + { + static lte_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_gt_operator() const + { + static gt_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_gte_operator() const + { + static gte_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_plus_operator() const + { + static plus_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_minus_operator() const + { + static minus_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_mult_operator() const + { + static mult_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_div_operator() const + { + static div_operator<Json,JsonReference> oper; + return &oper; + } + + const binary_operator<Json,JsonReference>* get_modulus_operator() const + { + static modulus_operator<Json,JsonReference> oper; + return &oper; + } + + template <typename T> + selector_type* new_selector(T&& val) + { + selectors_.emplace_back(jsoncons::make_unique<T>(std::forward<T>(val))); + return selectors_.back().get(); + } + + template <typename... Args> + Json* create_json(Args&& ... args) + { + auto temp = jsoncons::make_unique<Json>(std::forward<Args>(args)...); + Json* ptr = temp.get(); + temp_json_values_.emplace_back(std::move(temp)); + return ptr; + } + }; + + template <class Json, class JsonReference> + class expression_base + { + public: + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type,std::char_traits<char_type>>; + using string_view_type = jsoncons::basic_string_view<char_type, std::char_traits<char_type>>; + using value_type = Json; + using reference = JsonReference; + using pointer = typename std::conditional<std::is_const<typename std::remove_reference<JsonReference>::type>::value,typename Json::const_pointer,typename Json::pointer>::type; + using path_value_pair_type = path_value_pair<Json,JsonReference>; + using json_location_node_type = json_location_node<char_type>; + + virtual ~expression_base() noexcept = default; + + virtual value_type evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + //const json_location_node_type& path, + reference val, + result_options options, + std::error_code& ec) const = 0; + + virtual std::string to_string(int level = 0) const = 0; + }; + + template <class Json,class JsonReference> + class token + { + public: + using selector_type = jsonpath_selector<Json,JsonReference>; + using expression_base_type = expression_base<Json,JsonReference>; + + jsonpath_token_kind token_kind_; + + union + { + selector_type* selector_; + std::unique_ptr<expression_base_type> expression_; + const unary_operator<Json,JsonReference>* unary_operator_; + const binary_operator<Json,JsonReference>* binary_operator_; + const function_base<Json>* function_; + Json value_; + }; + public: + + token(const unary_operator<Json,JsonReference>* expr) noexcept + : token_kind_(jsonpath_token_kind::unary_operator), + unary_operator_(expr) + { + } + + token(const binary_operator<Json,JsonReference>* expr) noexcept + : token_kind_(jsonpath_token_kind::binary_operator), + binary_operator_(expr) + { + } + + token(current_node_arg_t) noexcept + : token_kind_(jsonpath_token_kind::current_node) + { + } + + token(root_node_arg_t) noexcept + : token_kind_(jsonpath_token_kind::root_node) + { + } + + token(end_function_arg_t) noexcept + : token_kind_(jsonpath_token_kind::end_function) + { + } + + token(separator_arg_t) noexcept + : token_kind_(jsonpath_token_kind::separator) + { + } + + token(lparen_arg_t) noexcept + : token_kind_(jsonpath_token_kind::lparen) + { + } + + token(rparen_arg_t) noexcept + : token_kind_(jsonpath_token_kind::rparen) + { + } + + token(begin_union_arg_t) noexcept + : token_kind_(jsonpath_token_kind::begin_union) + { + } + + token(end_union_arg_t) noexcept + : token_kind_(jsonpath_token_kind::end_union) + { + } + + token(begin_filter_arg_t) noexcept + : token_kind_(jsonpath_token_kind::begin_filter) + { + } + + token(end_filter_arg_t) noexcept + : token_kind_(jsonpath_token_kind::end_filter) + { + } + + token(begin_expression_arg_t) noexcept + : token_kind_(jsonpath_token_kind::begin_expression) + { + } + + token(end_index_expression_arg_t) noexcept + : token_kind_(jsonpath_token_kind::end_index_expression) + { + } + + token(end_argument_expression_arg_t) noexcept + : token_kind_(jsonpath_token_kind::end_argument_expression) + { + } + + token(selector_type* selector) + : token_kind_(jsonpath_token_kind::selector), selector_(selector) + { + } + + token(std::unique_ptr<expression_base_type>&& expr) + : token_kind_(jsonpath_token_kind::expression) + { + new (&expression_) std::unique_ptr<expression_base_type>(std::move(expr)); + } + + token(const function_base<Json>* function) noexcept + : token_kind_(jsonpath_token_kind::function), + function_(function) + { + } + + token(argument_arg_t) noexcept + : token_kind_(jsonpath_token_kind::argument) + { + } + + token(literal_arg_t, Json&& value) noexcept + : token_kind_(jsonpath_token_kind::literal), value_(std::move(value)) + { + } + + token(token&& other) noexcept + { + construct(std::forward<token>(other)); + } + + const Json& get_value(const_reference_arg_t, dynamic_resources<Json,JsonReference>&) const + { + return value_; + } + + Json& get_value(reference_arg_t, dynamic_resources<Json,JsonReference>& resources) const + { + return *resources.create_json(value_); + } + + token& operator=(token&& other) + { + if (&other != this) + { + if (token_kind_ == other.token_kind_) + { + switch (token_kind_) + { + case jsonpath_token_kind::selector: + selector_ = other.selector_; + break; + case jsonpath_token_kind::expression: + expression_ = std::move(other.expression_); + break; + case jsonpath_token_kind::unary_operator: + unary_operator_ = other.unary_operator_; + break; + case jsonpath_token_kind::binary_operator: + binary_operator_ = other.binary_operator_; + break; + case jsonpath_token_kind::function: + function_ = other.function_; + break; + case jsonpath_token_kind::literal: + value_ = std::move(other.value_); + break; + default: + break; + } + } + else + { + destroy(); + construct(std::forward<token>(other)); + } + } + return *this; + } + + ~token() noexcept + { + destroy(); + } + + jsonpath_token_kind token_kind() const + { + return token_kind_; + } + + bool is_lparen() const + { + return token_kind_ == jsonpath_token_kind::lparen; + } + + bool is_rparen() const + { + return token_kind_ == jsonpath_token_kind::rparen; + } + + bool is_current_node() const + { + return token_kind_ == jsonpath_token_kind::current_node; + } + + bool is_path() const + { + return token_kind_ == jsonpath_token_kind::selector && selector_->is_path(); + } + + bool is_operator() const + { + return token_kind_ == jsonpath_token_kind::unary_operator || + token_kind_ == jsonpath_token_kind::binary_operator; + } + + std::size_t precedence_level() const + { + switch(token_kind_) + { + case jsonpath_token_kind::selector: + return selector_->precedence_level(); + case jsonpath_token_kind::unary_operator: + return unary_operator_->precedence_level(); + case jsonpath_token_kind::binary_operator: + return binary_operator_->precedence_level(); + default: + return 0; + } + } + + jsoncons::optional<std::size_t> arity() const + { + return token_kind_ == jsonpath_token_kind::function ? function_->arity() : jsoncons::optional<std::size_t>(); + } + + bool is_right_associative() const + { + switch(token_kind_) + { + case jsonpath_token_kind::selector: + return selector_->is_right_associative(); + case jsonpath_token_kind::unary_operator: + return unary_operator_->is_right_associative(); + case jsonpath_token_kind::binary_operator: + return binary_operator_->is_right_associative(); + default: + return false; + } + } + + void construct(token&& other) + { + token_kind_ = other.token_kind_; + switch (token_kind_) + { + case jsonpath_token_kind::selector: + selector_ = other.selector_; + break; + case jsonpath_token_kind::expression: + new (&expression_) std::unique_ptr<expression_base_type>(std::move(other.expression_)); + break; + case jsonpath_token_kind::unary_operator: + unary_operator_ = other.unary_operator_; + break; + case jsonpath_token_kind::binary_operator: + binary_operator_ = other.binary_operator_; + break; + case jsonpath_token_kind::function: + function_ = other.function_; + break; + case jsonpath_token_kind::literal: + new (&value_) Json(std::move(other.value_)); + break; + default: + break; + } + } + + void destroy() noexcept + { + switch(token_kind_) + { + case jsonpath_token_kind::expression: + expression_.~unique_ptr(); + break; + case jsonpath_token_kind::literal: + value_.~Json(); + break; + default: + break; + } + } + + std::string to_string(int level = 0) const + { + std::string s; + switch (token_kind_) + { + case jsonpath_token_kind::root_node: + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("root node"); + break; + case jsonpath_token_kind::current_node: + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("current node"); + break; + case jsonpath_token_kind::argument: + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("argument"); + break; + case jsonpath_token_kind::selector: + s.append(selector_->to_string(level)); + break; + case jsonpath_token_kind::expression: + s.append(expression_->to_string(level)); + break; + case jsonpath_token_kind::literal: + { + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + auto sbuf = value_.to_string(); + unicode_traits::convert(sbuf.data(), sbuf.size(), s); + break; + } + case jsonpath_token_kind::binary_operator: + s.append(binary_operator_->to_string(level)); + break; + case jsonpath_token_kind::function: + s.append(function_->to_string(level)); + break; + default: + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("token kind: "); + s.append(jsoncons::jsonpath::detail::to_string(token_kind_)); + break; + } + //s.append("\n"); + return s; + } + }; + + template <class Callback, class Json,class JsonReference> + class callback_receiver : public node_receiver<Json,JsonReference> + { + Callback& callback_; + public: + using reference = JsonReference; + using char_type = typename Json::char_type; + using json_location_node_type = json_location_node<char_type>; + using json_location_type = json_location<char_type>; + + callback_receiver(Callback& callback) + : callback_(callback) + { + } + + void add(const json_location_node_type& path_tail, + reference value) override + { + callback_(json_location_type(path_tail), value); + } + }; + + template <class Json,class JsonReference> + class path_expression + { + public: + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type,std::char_traits<char_type>>; + using string_view_type = typename Json::string_view_type; + using path_value_pair_type = path_value_pair<Json,JsonReference>; + using path_value_pair_less_type = path_value_pair_less<Json,JsonReference>; + using path_value_pair_equal_type = path_value_pair_equal<Json,JsonReference>; + using value_type = Json; + using reference = typename path_value_pair_type::reference; + using pointer = typename path_value_pair_type::value_pointer; + using token_type = token<Json,JsonReference>; + using reference_arg_type = typename std::conditional<std::is_const<typename std::remove_reference<JsonReference>::type>::value, + const_reference_arg_t,reference_arg_t>::type; + using json_location_node_type = json_location_node<char_type>; + using json_location_type = json_location<char_type>; + using selector_type = jsonpath_selector<Json,JsonReference>; + private: + selector_type* selector_; + result_options required_options_; + public: + + path_expression() + : required_options_() + { + } + + path_expression(path_expression&& expr) = default; + + path_expression(selector_type* selector, bool paths_required) + : selector_(selector), required_options_() + { + if (paths_required) + { + required_options_ |= result_options::path; + } + } + + path_expression& operator=(path_expression&& expr) = default; + + Json evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& path, + reference instance, + result_options options) const + { + Json result(json_array_arg); + + if ((options & result_options::path) == result_options::path) + { + auto callback = [&result](const json_location_type& path, reference) + { + result.emplace_back(path.to_string()); + }; + evaluate(resources, root, path, instance, callback, options); + } + else + { + auto callback = [&result](const json_location_type&, reference val) + { + result.push_back(val); + }; + evaluate(resources, root, path, instance, callback, options); + } + + return result; + } + + template <class Callback> + typename std::enable_if<type_traits::is_binary_function_object<Callback,const json_location_type&,reference>::value,void>::type + evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& path, + reference current, + Callback callback, + result_options options) const + { + std::error_code ec; + + options |= required_options_; + + const result_options require_more = result_options::nodups | result_options::sort; + + if ((options & require_more) != result_options()) + { + path_value_receiver<Json,JsonReference> receiver; + selector_->select(resources, root, path, current, receiver, options); + + if (receiver.nodes.size() > 1 && (options & result_options::sort) == result_options::sort) + { + std::sort(receiver.nodes.begin(), receiver.nodes.end(), path_value_pair_less_type()); + } + + if (receiver.nodes.size() > 1 && (options & result_options::nodups) == result_options::nodups) + { + if ((options & result_options::sort) == result_options::sort) + { + auto last = std::unique(receiver.nodes.begin(),receiver.nodes.end(),path_value_pair_equal_type()); + receiver.nodes.erase(last,receiver.nodes.end()); + for (auto& node : receiver.nodes) + { + callback(node.path(), node.value()); + } + } + else + { + std::vector<path_value_pair_type> index(receiver.nodes); + std::sort(index.begin(), index.end(), path_value_pair_less_type()); + auto last = std::unique(index.begin(),index.end(),path_value_pair_equal_type()); + index.erase(last,index.end()); + + std::vector<path_value_pair_type> temp2; + temp2.reserve(index.size()); + for (auto&& node : receiver.nodes) + { + auto it = std::lower_bound(index.begin(),index.end(),node, path_value_pair_less_type()); + if (it != index.end() && it->path() == node.path()) + { + temp2.emplace_back(std::move(node)); + index.erase(it); + } + } + for (auto& node : temp2) + { + callback(node.path(), node.value()); + } + } + } + else + { + for (auto& node : receiver.nodes) + { + callback(node.path(), node.value()); + } + } + } + else + { + callback_receiver<Callback,Json,JsonReference> receiver(callback); + selector_->select(resources, root, path, current, receiver, options); + } + } + + std::string to_string(int level) const + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("expression "); + s.append(selector_->to_string(level+1)); + + return s; + + } + }; + + template <class Json,class JsonReference> + class expression : public expression_base<Json,JsonReference> + { + public: + using path_value_pair_type = path_value_pair<Json,JsonReference>; + using value_type = Json; + using reference = typename path_value_pair_type::reference; + using pointer = typename path_value_pair_type::value_pointer; + using const_pointer = const value_type*; + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type,std::char_traits<char_type>>; + using string_view_type = typename Json::string_view_type; + using path_value_pair_less_type = path_value_pair_less<Json,reference>; + using path_value_pair_equal_type = path_value_pair_equal<Json,reference>; + using parameter_type = parameter<Json>; + using token_type = token<Json,reference>; + using reference_arg_type = typename std::conditional<std::is_const<typename std::remove_reference<reference>::type>::value, + const_reference_arg_t,reference_arg_t>::type; + using json_location_node_type = json_location_node<char_type>; + using stack_item_type = value_or_pointer<Json,JsonReference>; + private: + std::vector<token_type> token_list_; + public: + + expression() + { + } + + expression(expression&& expr) + : token_list_(std::move(expr.token_list_)) + { + } + + expression(std::vector<token_type>&& token_stack) + : token_list_(std::move(token_stack)) + { + } + + expression& operator=(expression&& expr) = default; + + value_type evaluate(dynamic_resources<Json,reference>& resources, + reference root, + reference current, + result_options options, + std::error_code& ec) const override + { + std::vector<stack_item_type> stack; + std::vector<parameter_type> arg_stack; + + //std::cout << "EVALUATE TOKENS\n"; + //for (auto& tok : token_list_) + //{ + // std::cout << tok.to_string() << "\n"; + //} + //std::cout << "\n"; + + if (!token_list_.empty()) + { + for (auto& tok : token_list_) + { + //std::cout << "Token: " << tok.to_string() << "\n"; + switch (tok.token_kind()) + { + case jsonpath_token_kind::literal: + { + stack.emplace_back(std::addressof(tok.get_value(reference_arg_type(), resources))); + break; + } + case jsonpath_token_kind::unary_operator: + { + JSONCONS_ASSERT(stack.size() >= 1); + auto item = std::move(stack.back()); + stack.pop_back(); + + auto val = tok.unary_operator_->evaluate(item.value(), ec); + stack.emplace_back(std::move(val)); + break; + } + case jsonpath_token_kind::binary_operator: + { + //std::cout << "binary operator: " << stack.size() << "\n"; + JSONCONS_ASSERT(stack.size() >= 2); + auto rhs = std::move(stack.back()); + //std::cout << "rhs: " << *rhs << "\n"; + stack.pop_back(); + auto lhs = std::move(stack.back()); + //std::cout << "lhs: " << *lhs << "\n"; + stack.pop_back(); + + auto val = tok.binary_operator_->evaluate(lhs.value(), rhs.value(), ec); + //std::cout << "Evaluate binary expression: " << r << "\n"; + stack.emplace_back(std::move(val)); + break; + } + case jsonpath_token_kind::root_node: + //std::cout << "root: " << root << "\n"; + stack.emplace_back(std::addressof(root)); + break; + case jsonpath_token_kind::current_node: + //std::cout << "current: " << current << "\n"; + stack.emplace_back(std::addressof(current)); + break; + case jsonpath_token_kind::argument: + JSONCONS_ASSERT(!stack.empty()); + //std::cout << "argument stack items " << stack.size() << "\n"; + //for (auto& item : stack) + //{ + // std::cout << *item.to_pointer(resources) << "\n"; + //} + //std::cout << "\n"; + arg_stack.emplace_back(std::move(stack.back())); + //for (auto& item : arg_stack) + //{ + // std::cout << *item << "\n"; + //} + //std::cout << "\n"; + stack.pop_back(); + break; + case jsonpath_token_kind::function: + { + if (tok.function_->arity() && *(tok.function_->arity()) != arg_stack.size()) + { + ec = jsonpath_errc::invalid_arity; + return Json::null(); + } + //std::cout << "function arg stack:\n"; + //for (auto& item : arg_stack) + //{ + // std::cout << *item << "\n"; + //} + //std::cout << "\n"; + + value_type val = tok.function_->evaluate(arg_stack, ec); + if (ec) + { + return Json::null(); + } + //std::cout << "function result: " << val << "\n"; + arg_stack.clear(); + stack.emplace_back(std::move(val)); + break; + } + case jsonpath_token_kind::expression: + { + value_type val = tok.expression_->evaluate(resources, root, current, options, ec); + stack.emplace_back(std::move(val)); + break; + } + case jsonpath_token_kind::selector: + { + JSONCONS_ASSERT(!stack.empty()); + auto item = std::move(stack.back()); + //for (auto& item : stack) + //{ + //std::cout << "selector stack input:\n"; + //switch (item.tag) + //{ + // case node_set_tag::single: + // std::cout << "single: " << *(item.node.ptr) << "\n"; + // break; + // case node_set_tag::multi: + // for (auto& node : stack.back().ptr().nodes) + // { + // std::cout << "multi: " << *node.ptr << "\n"; + // } + // break; + // default: + // break; + //} + //std::cout << "\n"; + //} + //std::cout << "selector item: " << *ptr << "\n"; + + reference val = tok.selector_->evaluate(resources, root, resources.current_path_node(), item.value(), options, ec); + + stack.pop_back(); + stack.emplace_back(stack_item_type(std::addressof(val))); + break; + } + default: + break; + } + } + } + + //if (stack.size() != 1) + //{ + // std::cout << "Stack size: " << stack.size() << "\n"; + //} + return stack.empty() ? Json::null() : stack.back().value(); + } + + std::string to_string(int level) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("expression "); + for (const auto& item : token_list_) + { + s.append(item.to_string(level+1)); + } + + return s; + + } + private: + }; + +} // namespace detail +} // namespace jsonpath +} // namespace jsoncons + +#endif // JSONCONS_JSONPATH_JSONPATH_EXPRESSION_HPP diff --git a/include/jsoncons_ext/jsonpath/flatten.hpp b/include/jsoncons_ext/jsonpath/flatten.hpp new file mode 100644 index 0000000..938391f --- /dev/null +++ b/include/jsoncons_ext/jsonpath/flatten.hpp @@ -0,0 +1,432 @@ +// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_FLATTEN_HPP +#define JSONCONS_JSONPATH_FLATTEN_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::is_const +#include <limits> // std::numeric_limits +#include <utility> // std::move +#include <algorithm> // std::copy +#include <iterator> // std::back_inserter +#include <jsoncons_ext/jsonpath/jsonpath.hpp> + +namespace jsoncons { namespace jsonpath { + + template <class CharT, class Sink> + std::size_t escape_string(const CharT* s, std::size_t length, + Sink& sink) + { + std::size_t count = 0; + const CharT* begin = s; + const CharT* end = s + length; + for (const CharT* it = begin; it != end; ++it) + { + CharT c = *it; + switch (c) + { + case '\\': + sink.push_back('\\'); + sink.push_back('\\'); + count += 2; + break; + case '\'': + sink.push_back('\\'); + sink.push_back('\''); + count += 2; + break; + case '\b': + sink.push_back('\\'); + sink.push_back('b'); + count += 2; + break; + case '\f': + sink.push_back('\\'); + sink.push_back('f'); + count += 2; + break; + case '\n': + sink.push_back('\\'); + sink.push_back('n'); + count += 2; + break; + case '\r': + sink.push_back('\\'); + sink.push_back('r'); + count += 2; + break; + case '\t': + sink.push_back('\\'); + sink.push_back('t'); + count += 2; + break; + default: + sink.push_back(c); + ++count; + break; + } + } + return count; + } + + template<class Json> + void flatten_(const std::basic_string<typename Json::char_type>& parent_key, + const Json& parent_value, + Json& result) + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + + switch (parent_value.type()) + { + case json_type::array_value: + { + if (parent_value.empty()) + { + result.try_emplace(parent_key, parent_value); + } + else + { + for (std::size_t i = 0; i < parent_value.size(); ++i) + { + string_type key(parent_key); + key.push_back('['); + jsoncons::detail::from_integer(i,key); + key.push_back(']'); + flatten_(key, parent_value.at(i), result); + } + } + break; + } + + case json_type::object_value: + { + if (parent_value.empty()) + { + result.try_emplace(parent_key, Json()); + } + else + { + for (const auto& item : parent_value.object_range()) + { + string_type key(parent_key); + key.push_back('['); + key.push_back('\''); + escape_string(item.key().data(), item.key().length(), key); + key.push_back('\''); + key.push_back(']'); + flatten_(key, item.value(), result); + } + } + break; + } + + default: + { + result[parent_key] = parent_value; + break; + } + } + } + + template<class Json> + Json flatten(const Json& value) + { + Json result; + std::basic_string<typename Json::char_type> parent_key = {'$'}; + flatten_(parent_key, value, result); + return result; + } + + enum class unflatten_state + { + start, + expect_lbracket, + lbracket, + single_quoted_name_state, + double_quoted_name_state, + index_state, + expect_rbracket, + double_quoted_string_escape_char, + single_quoted_string_escape_char + }; + + template<class Json> + Json unflatten(const Json& value) + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + + if (JSONCONS_UNLIKELY(!value.is_object())) + { + JSONCONS_THROW(jsonpath_error(jsonpath_errc::argument_to_unflatten_invalid)); + } + + Json result; + + for (const auto& item : value.object_range()) + { + Json* part = &result; + string_type buffer; + unflatten_state state = unflatten_state::start; + + auto it = item.key().begin(); + auto last = item.key().end(); + + for (; it != last; ++it) + { + switch (state) + { + case unflatten_state::start: + { + switch (*it) + { + case '$': + state = unflatten_state::expect_lbracket; + break; + default: + break; + } + break; + } + case unflatten_state::expect_lbracket: + { + switch (*it) + { + case '[': + state = unflatten_state::lbracket; + break; + default: + JSONCONS_THROW(jsonpath_error(jsonpath_errc::invalid_flattened_key)); + break; + } + break; + } + case unflatten_state::lbracket: + { + switch (*it) + { + case '\'': + state = unflatten_state::single_quoted_name_state; + break; + case '\"': + state = unflatten_state::double_quoted_name_state; + break; + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + buffer.push_back(*it); + state = unflatten_state::index_state; + break; + default: + JSONCONS_THROW(jsonpath_error(jsonpath_errc::invalid_flattened_key)); + break; + } + break; + } + case unflatten_state::single_quoted_name_state: + { + switch (*it) + { + case '\'': + if (it != last-2) + { + auto res = part->try_emplace(buffer,Json()); + part = &(res.first->value()); + } + else + { + auto res = part->try_emplace(buffer,item.value()); + part = &(res.first->value()); + } + buffer.clear(); + state = unflatten_state::expect_rbracket; + break; + case '\\': + state = unflatten_state::single_quoted_string_escape_char; + break; + default: + buffer.push_back(*it); + break; + } + break; + } + case unflatten_state::double_quoted_name_state: + { + switch (*it) + { + case '\"': + if (it != last-2) + { + auto res = part->try_emplace(buffer,Json()); + part = &(res.first->value()); + } + else + { + auto res = part->try_emplace(buffer,item.value()); + part = &(res.first->value()); + } + buffer.clear(); + state = unflatten_state::expect_rbracket; + break; + case '\\': + state = unflatten_state::double_quoted_string_escape_char; + break; + default: + buffer.push_back(*it); + break; + } + break; + } + case unflatten_state::double_quoted_string_escape_char: + switch (*it) + { + case '\"': + buffer.push_back('\"'); + state = unflatten_state::double_quoted_name_state; + break; + case '\\': + buffer.push_back('\\'); + state = unflatten_state::double_quoted_name_state; + break; + case '/': + buffer.push_back('/'); + state = unflatten_state::double_quoted_name_state; + break; + case 'b': + buffer.push_back('\b'); + state = unflatten_state::double_quoted_name_state; + break; + case 'f': + buffer.push_back('\f'); + state = unflatten_state::double_quoted_name_state; + break; + case 'n': + buffer.push_back('\n'); + state = unflatten_state::double_quoted_name_state; + break; + case 'r': + buffer.push_back('\r'); + state = unflatten_state::double_quoted_name_state; + break; + case 't': + buffer.push_back('\t'); + state = unflatten_state::double_quoted_name_state; + break; + default: + break; + } + break; + case unflatten_state::single_quoted_string_escape_char: + switch (*it) + { + case '\'': + buffer.push_back('\''); + state = unflatten_state::single_quoted_name_state; + break; + case '\\': + buffer.push_back('\\'); + state = unflatten_state::double_quoted_name_state; + break; + case '/': + buffer.push_back('/'); + state = unflatten_state::double_quoted_name_state; + break; + case 'b': + buffer.push_back('\b'); + state = unflatten_state::double_quoted_name_state; + break; + case 'f': + buffer.push_back('\f'); + state = unflatten_state::double_quoted_name_state; + break; + case 'n': + buffer.push_back('\n'); + state = unflatten_state::double_quoted_name_state; + break; + case 'r': + buffer.push_back('\r'); + state = unflatten_state::double_quoted_name_state; + break; + case 't': + buffer.push_back('\t'); + state = unflatten_state::double_quoted_name_state; + break; + default: + break; + } + break; + case unflatten_state::index_state: + { + switch (*it) + { + case ']': + { + std::size_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (r) + { + if (!part->is_array()) + { + *part = Json(json_array_arg); + } + if (it != last-1) + { + if (n+1 > part->size()) + { + Json& ref = part->emplace_back(); + part = std::addressof(ref); + } + else + { + part = &part->at(n); + } + } + else + { + Json& ref = part->emplace_back(item.value()); + part = std::addressof(ref); + } + } + buffer.clear(); + state = unflatten_state::expect_lbracket; + break; + } + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + buffer.push_back(*it); + break; + default: + JSONCONS_THROW(jsonpath_error(jsonpath_errc::invalid_flattened_key)); + break; + } + break; + } + case unflatten_state::expect_rbracket: + { + switch (*it) + { + case ']': + state = unflatten_state::expect_lbracket; + break; + default: + JSONCONS_THROW(jsonpath_error(jsonpath_errc::invalid_flattened_key)); + break; + } + break; + } + default: + JSONCONS_UNREACHABLE(); + break; + } + } + } + + return result; + } +}} + +#endif diff --git a/include/jsoncons_ext/jsonpath/json_location.hpp b/include/jsoncons_ext/jsonpath/json_location.hpp new file mode 100644 index 0000000..9105608 --- /dev/null +++ b/include/jsoncons_ext/jsonpath/json_location.hpp @@ -0,0 +1,445 @@ +// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_JSON_LOCATION_HPP +#define JSONCONS_JSONPATH_JSON_LOCATION_HPP + +#include <string> +#include <vector> +#include <functional> +#include <algorithm> // std::reverse +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/detail/write_number.hpp> +#include <jsoncons_ext/jsonpath/jsonpath_error.hpp> +#include <jsoncons/json_type.hpp> + +namespace jsoncons { +namespace jsonpath { + + template <class CharT> + class json_location; + + enum class json_location_node_kind { root, index, name }; + + template <class CharT> + class json_location_node + { + friend class json_location<CharT>; + public: + using char_type = CharT; + using string_type = std::basic_string<CharT>; + private: + + const json_location_node* parent_; + json_location_node_kind node_kind_; + string_type name_; + std::size_t index_; + public: + json_location_node(char_type c) + : parent_(nullptr), node_kind_(json_location_node_kind::root), index_(0) + { + name_.push_back(c); + } + + json_location_node(const json_location_node* parent, const string_type& name) + : parent_(parent), node_kind_(json_location_node_kind::name), name_(name), index_(0) + { + } + + json_location_node(const json_location_node* parent, std::size_t index) + : parent_(parent), node_kind_(json_location_node_kind::index), index_(index) + { + } + + const json_location_node* parent() const { return parent_;} + + json_location_node_kind node_kind() const + { + return node_kind_; + } + + const string_type& name() const + { + return name_; + } + + std::size_t index() const + { + return index_; + } + + void swap(json_location_node& node) + { + std::swap(parent_, node.parent_); + std::swap(node_kind_, node.node_kind_); + std::swap(name_, node.name_); + std::swap(index_, node.index_); + } + + private: + + std::size_t node_hash() const + { + std::size_t h = node_kind_ == json_location_node_kind::index ? std::hash<std::size_t>{}(index_) : std::hash<string_type>{}(name_); + + return h; + } + + int compare_node(const json_location_node& other) const + { + int diff = 0; + if (node_kind_ != other.node_kind_) + { + diff = static_cast<int>(node_kind_) - static_cast<int>(other.node_kind_); + } + else + { + switch (node_kind_) + { + case json_location_node_kind::root: + diff = name_.compare(other.name_); + break; + case json_location_node_kind::index: + diff = index_ < other.index_ ? -1 : index_ > other.index_ ? 1 : 0; + break; + case json_location_node_kind::name: + diff = name_.compare(other.name_); + break; + } + } + return diff; + } + }; + + namespace detail { + + template <class Iterator> + class json_location_iterator + { + Iterator it_; + + public: + using iterator_category = std::random_access_iterator_tag; + + using value_type = typename std::remove_pointer<typename std::iterator_traits<Iterator>::value_type>::type; + using difference_type = typename std::iterator_traits<Iterator>::difference_type; + using pointer = const value_type*; + using reference = const value_type&; + + json_location_iterator() : it_() + { + } + + explicit json_location_iterator(Iterator ptr) : it_(ptr) + { + } + + json_location_iterator(const json_location_iterator&) = default; + json_location_iterator(json_location_iterator&&) = default; + json_location_iterator& operator=(const json_location_iterator&) = default; + json_location_iterator& operator=(json_location_iterator&&) = default; + + template <class Iter, + class=typename std::enable_if<!std::is_same<Iter,Iterator>::value && std::is_convertible<Iter,Iterator>::value>::type> + json_location_iterator(const json_location_iterator<Iter>& other) + : it_(other.it_) + { + } + + operator Iterator() const + { + return it_; + } + + reference operator*() const + { + return *(*it_); + } + + pointer operator->() const + { + return (*it_); + } + + json_location_iterator& operator++() + { + ++it_; + return *this; + } + + json_location_iterator operator++(int) + { + json_location_iterator temp = *this; + ++*this; + return temp; + } + + json_location_iterator& operator--() + { + --it_; + return *this; + } + + json_location_iterator operator--(int) + { + json_location_iterator temp = *this; + --*this; + return temp; + } + + json_location_iterator& operator+=(const difference_type offset) + { + it_ += offset; + return *this; + } + + json_location_iterator operator+(const difference_type offset) const + { + json_location_iterator temp = *this; + return temp += offset; + } + + json_location_iterator& operator-=(const difference_type offset) + { + return *this += -offset; + } + + json_location_iterator operator-(const difference_type offset) const + { + json_location_iterator temp = *this; + return temp -= offset; + } + + difference_type operator-(const json_location_iterator& rhs) const noexcept + { + return it_ - rhs.it_; + } + + reference operator[](const difference_type offset) const noexcept + { + return *(*(*this + offset)); + } + + bool operator==(const json_location_iterator& rhs) const noexcept + { + return it_ == rhs.it_; + } + + bool operator!=(const json_location_iterator& rhs) const noexcept + { + return !(*this == rhs); + } + + bool operator<(const json_location_iterator& rhs) const noexcept + { + return it_ < rhs.it_; + } + + bool operator>(const json_location_iterator& rhs) const noexcept + { + return rhs < *this; + } + + bool operator<=(const json_location_iterator& rhs) const noexcept + { + return !(rhs < *this); + } + + bool operator>=(const json_location_iterator& rhs) const noexcept + { + return !(*this < rhs); + } + + inline + friend json_location_iterator<Iterator> operator+( + difference_type offset, json_location_iterator<Iterator> next) + { + return next += offset; + } + }; + + } // namespace detail + + template <class CharT> + class json_location + { + public: + using char_type = CharT; + using string_type = std::basic_string<CharT>; + using json_location_node_type = json_location_node<CharT>; + private: + std::vector<const json_location_node_type*> nodes_; + public: + using iterator = typename detail::json_location_iterator<typename std::vector<const json_location_node_type*>::iterator>; + using const_iterator = typename detail::json_location_iterator<typename std::vector<const json_location_node_type*>::const_iterator>; + + json_location(const json_location_node_type& node) + { + const json_location_node_type* p = std::addressof(node); + do + { + nodes_.push_back(p); + p = p->parent_; + } + while (p != nullptr); + + std::reverse(nodes_.begin(), nodes_.end()); + } + + iterator begin() + { + return iterator(nodes_.begin()); + } + + iterator end() + { + return iterator(nodes_.end()); + } + + const_iterator begin() const + { + return const_iterator(nodes_.begin()); + } + + const_iterator end() const + { + return const_iterator(nodes_.end()); + } + + const json_location_node_type& last() const + { + return *nodes_.back(); + } + + string_type to_string() const + { + string_type buffer; + + for (const auto& node : nodes_) + { + switch (node->node_kind()) + { + case json_location_node_kind::root: + buffer.append(node->name()); + break; + case json_location_node_kind::name: + buffer.push_back('['); + buffer.push_back('\''); + for (auto c : node->name()) + { + if (c == '\'') + { + buffer.push_back('\\'); + buffer.push_back('\''); + } + else + { + buffer.push_back(c); + } + } + buffer.push_back('\''); + buffer.push_back(']'); + break; + case json_location_node_kind::index: + buffer.push_back('['); + jsoncons::detail::from_integer(node->index(), buffer); + buffer.push_back(']'); + break; + } + } + + return buffer; + } + + int compare(const json_location& other) const + { + if (this == &other) + { + return 0; + } + + auto it1 = nodes_.begin(); + auto it2 = other.nodes_.begin(); + while (it1 != nodes_.end() && it2 != other.nodes_.end()) + { + int diff = (*it1)->compare_node(*(*it2)); + if (diff != 0) + { + return diff; + } + ++it1; + ++it2; + } + return (nodes_.size() < other.nodes_.size()) ? -1 : (nodes_.size() == other.nodes_.size()) ? 0 : 1; + } + + std::size_t hash() const + { + + auto it = nodes_.begin(); + std::size_t hash = (*it).hash(); + ++it; + + while (it != nodes_.end()) + { + hash += 17*(*it)->node_hash(); + ++it; + } + + return hash; + } + + friend bool operator==(const json_location& lhs, const json_location& rhs) + { + return lhs.compare(rhs) == 0; + } + + friend bool operator!=(const json_location& lhs, const json_location& rhs) + { + return !(lhs == rhs); + } + + friend bool operator<(const json_location& lhs, const json_location& rhs) + { + return lhs.compare(rhs) < 0; + } + }; + + template <class Json> + Json* select(Json& root, const json_location<typename Json::char_type>& path) + { + Json* current = std::addressof(root); + for (const auto& json_location_node : path) + { + if (json_location_node.node_kind() == json_location_node_kind::index) + { + if (current->type() != json_type::array_value || json_location_node.index() >= current->size()) + { + return nullptr; + } + current = std::addressof(current->at(json_location_node.index())); + } + else if (json_location_node.node_kind() == json_location_node_kind::name) + { + if (current->type() != json_type::object_value) + { + return nullptr; + } + auto it = current->find(json_location_node.name()); + if (it == current->object_range().end()) + { + return nullptr; + } + current = std::addressof(it->value()); + } + } + return current; + } + +} // namespace jsonpath +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/jsonpath/json_query.hpp b/include/jsoncons_ext/jsonpath/json_query.hpp new file mode 100644 index 0000000..8facfa9 --- /dev/null +++ b/include/jsoncons_ext/jsonpath/json_query.hpp @@ -0,0 +1,115 @@ +// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_JSON_QUERY_HPP +#define JSONCONS_JSONPATH_JSON_QUERY_HPP + +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpath/jsonpath_expression.hpp> + +namespace jsoncons { +namespace jsonpath { + + template<class Json> + Json json_query(const Json& instance, + const typename Json::string_view_type& path, + result_options options = result_options(), + const custom_functions<Json>& functions = custom_functions<Json>()) + { + auto expr = make_expression<Json>(path, functions); + return expr.evaluate(instance, options); + } + + template<class Json,class Callback> + typename std::enable_if<type_traits::is_binary_function_object<Callback,const std::basic_string<typename Json::char_type>&,const Json&>::value,void>::type + json_query(const Json& instance, + const typename Json::string_view_type& path, + Callback callback, + result_options options = result_options(), + const custom_functions<Json>& functions = custom_functions<Json>()) + { + auto expr = make_expression<Json>(path, functions); + expr.evaluate(instance, callback, options); + } + + template<class Json, class T> + typename std::enable_if<is_json_type_traits_specialized<Json,T>::value,void>::type + json_replace(Json& instance, const typename Json::string_view_type& path, T&& new_value, + result_options options = result_options::nodups, + const custom_functions<Json>& funcs = custom_functions<Json>()) + { + using evaluator_t = typename jsoncons::jsonpath::detail::jsonpath_evaluator<Json, Json&>; + //using string_type = typename evaluator_t::string_type; + using value_type = typename evaluator_t::value_type; + using reference = typename evaluator_t::reference; + using json_selector_t = typename evaluator_t::path_expression_type; + using json_location_type = typename evaluator_t::json_location_type; + + jsoncons::jsonpath::detail::static_resources<value_type,reference> static_resources(funcs); + evaluator_t e; + json_selector_t expr = e.compile(static_resources, path); + + jsoncons::jsonpath::detail::dynamic_resources<Json,reference> resources; + auto callback = [&new_value](const json_location_type&, reference v) + { + v = std::forward<T>(new_value); + }; + expr.evaluate(resources, instance, resources.root_path_node(), instance, callback, options); + } + + template<class Json, class UnaryCallback> + typename std::enable_if<type_traits::is_unary_function_object<UnaryCallback,Json>::value,void>::type + json_replace(Json& instance, const typename Json::string_view_type& path , UnaryCallback callback) + { + using evaluator_t = typename jsoncons::jsonpath::detail::jsonpath_evaluator<Json, Json&>; + //using string_type = typename evaluator_t::string_type; + using value_type = typename evaluator_t::value_type; + using reference = typename evaluator_t::reference; + using json_selector_t = typename evaluator_t::path_expression_type; + using json_location_type = typename evaluator_t::json_location_type; + + jsoncons::jsonpath::detail::static_resources<value_type,reference> static_resources; + evaluator_t e; + json_selector_t expr = e.compile(static_resources, path); + + jsoncons::jsonpath::detail::dynamic_resources<Json,reference> resources; + auto f = [callback](const json_location_type&, reference v) + { + v = callback(v); + }; + expr.evaluate(resources, instance, resources.root_path_node(), instance, f, result_options::nodups); + } + + template<class Json, class BinaryCallback> + typename std::enable_if<type_traits::is_binary_function_object<BinaryCallback,const std::basic_string<typename Json::char_type>&,Json&>::value,void>::type + json_replace(Json& instance, const typename Json::string_view_type& path , BinaryCallback callback, + result_options options = result_options::nodups, + const custom_functions<Json>& funcs = custom_functions<Json>()) + { + using evaluator_t = typename jsoncons::jsonpath::detail::jsonpath_evaluator<Json, Json&>; + //using string_type = typename evaluator_t::string_type; + using value_type = typename evaluator_t::value_type; + using reference = typename evaluator_t::reference; + using json_selector_t = typename evaluator_t::path_expression_type; + using json_location_type = typename evaluator_t::json_location_type; + + jsoncons::jsonpath::detail::static_resources<value_type,reference> static_resources(funcs); + evaluator_t e; + json_selector_t expr = e.compile(static_resources, path); + + jsoncons::jsonpath::detail::dynamic_resources<Json,reference> resources; + + auto f = [&callback](const json_location_type& path, reference val) + { + callback(path.to_string(), val); + }; + expr.evaluate(resources, instance, resources.root_path_node(), instance, f, options); + } + +} // namespace jsonpath +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/jsonpath/jsonpath.hpp b/include/jsoncons_ext/jsonpath/jsonpath.hpp new file mode 100644 index 0000000..18500c5 --- /dev/null +++ b/include/jsoncons_ext/jsonpath/jsonpath.hpp @@ -0,0 +1,13 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_JSONPATH_HPP +#define JSONCONS_JSONPATH_JSONPATH_HPP + +#include <jsoncons_ext/jsonpath/json_query.hpp> +#include <jsoncons_ext/jsonpath/flatten.hpp> + +#endif diff --git a/include/jsoncons_ext/jsonpath/jsonpath_error.hpp b/include/jsoncons_ext/jsonpath/jsonpath_error.hpp new file mode 100644 index 0000000..8157bba --- /dev/null +++ b/include/jsoncons_ext/jsonpath/jsonpath_error.hpp @@ -0,0 +1,240 @@ +/// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_JSONPATH_ERROR_HPP +#define JSONCONS_JSONPATH_JSONPATH_ERROR_HPP + +#include <jsoncons/json_exception.hpp> +#include <system_error> + +namespace jsoncons { namespace jsonpath { + + enum class jsonpath_errc + { + success = 0, + expected_root_or_function, + expected_current_node, + expected_rparen, + expected_rbracket, + expected_separator, + expected_forward_slash, + expected_slice_start, + expected_slice_end, + expected_slice_step, + expected_bracket_specifier_or_union, + unexpected_operator, + invalid_function_name, + invalid_argument, + invalid_arity, + function_name_not_found, + parse_error_in_filter, + argument_parse_error, + unidentified_error, + unexpected_eof, + expected_colon_dot_left_bracket_comma_or_rbracket, + argument_to_unflatten_invalid, + invalid_flattened_key, + step_cannot_be_zero, + invalid_number, + illegal_escaped_character, + invalid_codepoint, + unknown_function, + invalid_type, + unbalanced_parentheses, + syntax_error, + expected_comparator, + expected_or, + expected_and, + expected_comma_or_rparen, + expected_comma_or_rbracket, + expected_relative_path + }; + + class jsonpath_error_category_impl + : public std::error_category + { + public: + const char* name() const noexcept override + { + return "jsoncons/jsonpath"; + } + std::string message(int ev) const override + { + switch (static_cast<jsonpath_errc>(ev)) + { + case jsonpath_errc::expected_root_or_function: + return "Expected '$' or function expression"; + case jsonpath_errc::expected_current_node: + return "Expected @"; + case jsonpath_errc::expected_rbracket: + return "Expected ]"; + case jsonpath_errc::expected_rparen: + return "Expected )"; + case jsonpath_errc::expected_slice_start: + return "Expected slice start"; + case jsonpath_errc::expected_slice_end: + return "Expected slice end"; + case jsonpath_errc::expected_slice_step: + return "Expected slice step"; + case jsonpath_errc::expected_separator: + return "Expected dot or left bracket separator"; + case jsonpath_errc::expected_forward_slash: + return "Invalid path filter, expected '/'"; + case jsonpath_errc::expected_bracket_specifier_or_union: + return "Expected index, single or double quoted name, expression, filter, absolute ('$') path or relative ('@') path"; + case jsonpath_errc::invalid_function_name: + return "Invalid function name"; + case jsonpath_errc::invalid_argument: + return "Invalid argument type"; + case jsonpath_errc::invalid_arity: + return "Incorrect number of arguments"; + case jsonpath_errc::function_name_not_found: + return "Function name not found"; + case jsonpath_errc::parse_error_in_filter: + return "Could not parse JSON expression in a JSONPath filter"; + case jsonpath_errc::argument_parse_error: + return "Could not parse JSON expression passed to JSONPath function"; + case jsonpath_errc::unidentified_error: + return "Unidentified error"; + case jsonpath_errc::unexpected_eof: + return "Unexpected EOF while parsing jsonpath expression"; + case jsonpath_errc::expected_colon_dot_left_bracket_comma_or_rbracket: + return "Expected ':', '.', '[', ',', or ']'"; + case jsonpath_errc::argument_to_unflatten_invalid: + return "Argument to unflatten must be an object"; + case jsonpath_errc::invalid_flattened_key: + return "Flattened key is invalid"; + case jsonpath_errc::step_cannot_be_zero: + return "Slice step cannot be zero"; + case jsonpath_errc::invalid_number: + return "Invalid number"; + case jsonpath_errc::illegal_escaped_character: + return "Illegal escaped character"; + case jsonpath_errc::invalid_codepoint: + return "Invalid codepoint"; + case jsonpath_errc::unknown_function: + return "Unknown function"; + case jsonpath_errc::invalid_type: + return "Invalid type"; + case jsonpath_errc::unbalanced_parentheses: + return "Unbalanced parentheses"; + case jsonpath_errc::syntax_error: + return "Syntax error"; + case jsonpath_errc::expected_comparator: + return "Expected comparator"; + case jsonpath_errc::expected_or: + return "Expected operator '||'"; + case jsonpath_errc::expected_and: + return "Expected operator '&&'"; + case jsonpath_errc::expected_comma_or_rparen: + return "Expected comma or right parenthesis"; + case jsonpath_errc::expected_comma_or_rbracket: + return "Expected comma or right bracket"; + case jsonpath_errc::expected_relative_path: + return "Expected unquoted string, or single or double quoted string, or index or '*'"; + default: + return "Unknown jsonpath parser error"; + } + } + }; + + inline + const std::error_category& jsonpath_error_category() + { + static jsonpath_error_category_impl instance; + return instance; + } + + inline + std::error_code make_error_code(jsonpath_errc result) + { + return std::error_code(static_cast<int>(result),jsonpath_error_category()); + } + +} // jsonpath +} // jsoncons + +namespace std { + template<> + struct is_error_code_enum<jsoncons::jsonpath::jsonpath_errc> : public true_type + { + }; +} + +namespace jsoncons { namespace jsonpath { + + class jsonpath_error : public std::system_error, public virtual json_exception + { + std::size_t line_number_; + std::size_t column_number_; + mutable std::string what_; + public: + jsonpath_error(std::error_code ec) + : std::system_error(ec), line_number_(0), column_number_(0) + { + } + jsonpath_error(std::error_code ec, const std::string& what_arg) + : std::system_error(ec, what_arg), line_number_(0), column_number_(0) + { + } + jsonpath_error(std::error_code ec, std::size_t position) + : std::system_error(ec), line_number_(0), column_number_(position) + { + } + jsonpath_error(std::error_code ec, std::size_t line, std::size_t column) + : std::system_error(ec), line_number_(line), column_number_(column) + { + } + jsonpath_error(const jsonpath_error& other) = default; + + jsonpath_error(jsonpath_error&& other) = default; + + const char* what() const noexcept override + { + if (what_.empty()) + { + JSONCONS_TRY + { + what_.append(std::system_error::what()); + if (line_number_ != 0 && column_number_ != 0) + { + what_.append(" at line "); + what_.append(std::to_string(line_number_)); + what_.append(" and column "); + what_.append(std::to_string(column_number_)); + } + else if (column_number_ != 0) + { + what_.append(" at position "); + what_.append(std::to_string(column_number_)); + } + return what_.c_str(); + } + JSONCONS_CATCH(...) + { + return std::system_error::what(); + } + } + else + { + return what_.c_str(); + } + } + + std::size_t line() const noexcept + { + return line_number_; + } + + std::size_t column() const noexcept + { + return column_number_; + } + }; + +}} + +#endif diff --git a/include/jsoncons_ext/jsonpath/jsonpath_expression.hpp b/include/jsoncons_ext/jsonpath/jsonpath_expression.hpp new file mode 100644 index 0000000..a3537b3 --- /dev/null +++ b/include/jsoncons_ext/jsonpath/jsonpath_expression.hpp @@ -0,0 +1,2612 @@ +// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_JSONPATH_EXPRESSION_HPP +#define JSONCONS_JSONPATH_JSONPATH_EXPRESSION_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::is_const +#include <limits> // std::numeric_limits +#include <utility> // std::move +#include <regex> +#include <algorithm> // std::reverse +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpath/jsonpath_error.hpp> +#include <jsoncons_ext/jsonpath/expression.hpp> +#include <jsoncons_ext/jsonpath/jsonpath_selector.hpp> + +namespace jsoncons { +namespace jsonpath { +namespace detail { + + enum class path_state + { + start, + root_or_current_node, + expect_function_expr, + relative_path, + relative_location, + parent_operator, + ancestor_depth, + filter_expression, + expression_rhs, + recursive_descent_or_expression_lhs, + path_or_literal_or_function, + json_text_or_function, + json_text_or_function_name, + json_text_string, + json_value, + json_string, + identifier_or_function_expr, + name_or_lbracket, + unquoted_string, + anything, + number, + function_expression, + argument, + zero_or_one_arguments, + one_or_more_arguments, + identifier, + single_quoted_string, + double_quoted_string, + bracketed_unquoted_name_or_union, + union_expression, + identifier_or_union, + bracket_specifier_or_union, + bracketed_wildcard, + index_or_slice, + wildcard_or_union, + union_element, + index_or_slice_or_union, + integer, + digit, + slice_expression_stop, + slice_expression_step, + comma_or_rbracket, + expect_rparen, + expect_rbracket, + quoted_string_escape_char, + escape_u1, + escape_u2, + escape_u3, + escape_u4, + escape_expect_surrogate_pair1, + escape_expect_surrogate_pair2, + escape_u5, + escape_u6, + escape_u7, + escape_u8, + expression, + comparator_expression, + eq_or_regex, + expect_regex, + regex, + regex_options, + regex_pattern, + cmp_lt_or_lte, + cmp_gt_or_gte, + cmp_ne, + expect_or, + expect_and + }; + + template<class Json, + class JsonReference> + class jsonpath_evaluator : public ser_context + { + public: + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type,std::char_traits<char_type>>; + using string_view_type = typename Json::string_view_type; + using path_value_pair_type = path_value_pair<Json,JsonReference>; + using value_type = Json; + using reference = JsonReference; + using pointer = typename path_value_pair_type::value_pointer; + using token_type = token<Json,JsonReference>; + using path_expression_type = path_expression<Json,JsonReference>; + using expression_type = expression<Json,JsonReference>; + using json_location_type = json_location<char_type>; + using json_location_node_type = json_location_node<char_type>; + using selector_type = jsonpath_selector<Json,JsonReference>; + + private: + + std::size_t line_; + std::size_t column_; + const char_type* begin_input_; + const char_type* end_input_; + const char_type* p_; + + using argument_type = std::vector<pointer>; + std::vector<argument_type> function_stack_; + std::vector<path_state> state_stack_; + std::vector<token_type> output_stack_; + std::vector<token_type> operator_stack_; + + public: + jsonpath_evaluator() + : line_(1), column_(1), + begin_input_(nullptr), end_input_(nullptr), + p_(nullptr) + { + } + + jsonpath_evaluator(std::size_t line, std::size_t column) + : line_(line), column_(column), + begin_input_(nullptr), end_input_(nullptr), + p_(nullptr) + { + } + + std::size_t line() const + { + return line_; + } + + std::size_t column() const + { + return column_; + } + + path_expression_type compile(static_resources<value_type,reference>& resources, const string_view_type& path) + { + std::error_code ec; + auto result = compile(resources, path, ec); + if (ec) + { + JSONCONS_THROW(jsonpath_error(ec, line_, column_)); + } + return result; + } + + path_expression_type compile(static_resources<value_type,reference>& resources, + const string_view_type& path, + std::error_code& ec) + { + std::size_t selector_id = 0; + + string_type buffer; + string_type buffer2; + uint32_t cp = 0; + uint32_t cp2 = 0; + + begin_input_ = path.data(); + end_input_ = path.data() + path.length(); + p_ = begin_input_; + + slice slic; + bool paths_required = false; + int ancestor_depth = 0; + + state_stack_.emplace_back(path_state::start); + while (p_ < end_input_ && !state_stack_.empty()) + { + switch (state_stack_.back()) + { + case path_state::start: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '$': + case '@': + { + push_token(resources, token_type(resources.new_selector(current_node_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + state_stack_.emplace_back(path_state::relative_location); + ++p_; + ++column_; + break; + } + default: + { + state_stack_.emplace_back(path_state::relative_location); + state_stack_.emplace_back(path_state::expect_function_expr); + state_stack_.emplace_back(path_state::unquoted_string); + break; + } + } + break; + } + case path_state::root_or_current_node: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '$': + push_token(resources, token_type(root_node_arg), ec); + push_token(resources, token_type(resources.new_selector(root_selector<Json,JsonReference>(selector_id++))), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case '@': + push_token(resources, token_type(current_node_arg), ec); // ISSUE + push_token(resources, token_type(resources.new_selector(current_node_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::syntax_error; + return path_expression_type(); + } + break; + case path_state::recursive_descent_or_expression_lhs: + switch (*p_) + { + case '.': + push_token(resources, token_type(resources.new_selector(recursive_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + ++p_; + ++column_; + state_stack_.back() = path_state::name_or_lbracket; + break; + default: + state_stack_.back() = path_state::relative_path; + break; + } + break; + case path_state::name_or_lbracket: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '[': // [ can follow .. + state_stack_.back() = path_state::bracket_specifier_or_union; + ++p_; + ++column_; + break; + default: + buffer.clear(); + state_stack_.back() = path_state::relative_path; + break; + } + break; + case path_state::json_string: + { + //std::cout << "literal: " << buffer << "\n"; + push_token(resources, token_type(literal_arg, Json(buffer)), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); // json_value + break; + } + case path_state::path_or_literal_or_function: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '$': + case '@': + state_stack_.back() = path_state::relative_location; + state_stack_.push_back(path_state::root_or_current_node); + break; + case '(': + { + ++p_; + ++column_; + push_token(resources, lparen_arg, ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::expect_rparen; + state_stack_.emplace_back(path_state::expression_rhs); + state_stack_.emplace_back(path_state::path_or_literal_or_function); + break; + } + case '\'': + state_stack_.back() = path_state::json_string; + state_stack_.emplace_back(path_state::single_quoted_string); + ++p_; + ++column_; + break; + case '\"': + state_stack_.back() = path_state::json_string; + state_stack_.emplace_back(path_state::double_quoted_string); + ++p_; + ++column_; + break; + case '!': + { + ++p_; + ++column_; + push_token(resources, token_type(resources.get_unary_not()), ec); + if (ec) {return path_expression_type();} + break; + } + case '-': + { + ++p_; + ++column_; + push_token(resources, token_type(resources.get_unary_minus()), ec); + if (ec) {return path_expression_type();} + break; + } + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + { + state_stack_.back() = path_state::json_value; + state_stack_.emplace_back(path_state::number); + break; + } + default: + { + state_stack_.back() = path_state::json_text_or_function_name; + break; + } + } + break; + } + case path_state::json_text_or_function: + { + switch(*p_) + { + case '(': + { + auto f = resources.get_function(buffer, ec); + if (ec) + { + return path_expression_type(); + } + buffer.clear(); + push_token(resources, current_node_arg, ec); + if (ec) {return path_expression_type();} + push_token(resources, token_type(f), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::function_expression; + state_stack_.emplace_back(path_state::zero_or_one_arguments); + ++p_; + ++column_; + break; + } + default: + { + json_decoder<Json> decoder; + basic_json_parser<char_type> parser; + parser.update(buffer.data(),buffer.size()); + parser.parse_some(decoder, ec); + if (ec) + { + return path_expression_type(); + } + parser.finish_parse(decoder, ec); + if (ec) + { + return path_expression_type(); + } + push_token(resources, token_type(literal_arg, decoder.get_result()), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + break; + } + } + break; + } + case path_state::json_value: + { + json_decoder<Json> decoder; + basic_json_parser<char_type> parser; + parser.update(buffer.data(),buffer.size()); + parser.parse_some(decoder, ec); + if (ec) + { + return path_expression_type(); + } + parser.finish_parse(decoder, ec); + if (ec) + { + return path_expression_type(); + } + push_token(resources, token_type(literal_arg, decoder.get_result()), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + break; + } + case path_state::json_text_or_function_name: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '{': + case '[': + { + json_decoder<Json> decoder; + basic_json_parser<char_type> parser; + parser.update(p_,end_input_ - p_); + parser.parse_some(decoder, ec); + if (ec) + { + return path_expression_type(); + } + parser.finish_parse(decoder, ec); + if (ec) + { + return path_expression_type(); + } + push_token(resources, token_type(literal_arg, decoder.get_result()), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + p_ = parser.current(); + column_ = column_ + parser.column() - 1; + break; + } + case '-':case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + state_stack_.back() = path_state::json_text_or_function; + state_stack_.emplace_back(path_state::number); + buffer.push_back(*p_); + ++p_; + ++column_; + break; + case '\"': + state_stack_.back() = path_state::json_text_or_function; + state_stack_.emplace_back(path_state::json_text_string); + buffer.push_back(*p_); + ++p_; + ++column_; + break; + default: + state_stack_.back() = path_state::json_text_or_function; + state_stack_.emplace_back(path_state::unquoted_string); + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case path_state::number: + switch (*p_) + { + case '-':case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + case 'e':case 'E':case '.': + buffer.push_back(*p_); + ++p_; + ++column_; + break; + default: + state_stack_.pop_back(); // number + break; + }; + break; + case path_state::json_text_string: + switch (*p_) + { + case '\\': + buffer.push_back(*p_); + ++p_; + ++column_; + if (p_ == end_input_) + { + ec = jsonpath_errc::unexpected_eof; + return path_expression_type(); + } + buffer.push_back(*p_); + ++p_; + ++column_; + break; + case '\"': + buffer.push_back(*p_); + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case path_state::relative_path: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '*': + push_token(resources, token_type(resources.new_selector(wildcard_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case '\'': + state_stack_.back() = path_state::identifier; + state_stack_.emplace_back(path_state::single_quoted_string); + ++p_; + ++column_; + break; + case '\"': + state_stack_.back() = path_state::identifier; + state_stack_.emplace_back(path_state::double_quoted_string); + ++p_; + ++column_; + break; + case '[': + case '.': + ec = jsonpath_errc::expected_relative_path; + return path_expression_type(); + default: + buffer.clear(); + state_stack_.back() = path_state::identifier_or_function_expr; + state_stack_.emplace_back(path_state::unquoted_string); + break; + } + break; + case path_state::identifier_or_function_expr: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '(': + { + auto f = resources.get_function(buffer, ec); + if (ec) + { + return path_expression_type(); + } + buffer.clear(); + push_token(resources, current_node_arg, ec); + push_token(resources, token_type(f), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::function_expression; + state_stack_.emplace_back(path_state::zero_or_one_arguments); + ++p_; + ++column_; + break; + } + default: + { + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + break; + } + } + break; + } + case path_state::expect_function_expr: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '(': + { + auto f = resources.get_function(buffer, ec); + if (ec) + { + return path_expression_type(); + } + buffer.clear(); + push_token(resources, current_node_arg, ec); + push_token(resources, token_type(f), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::function_expression; + state_stack_.emplace_back(path_state::zero_or_one_arguments); + ++p_; + ++column_; + break; + } + default: + { + ec = jsonpath_errc::expected_root_or_function; + return path_expression_type(); + } + } + break; + } + case path_state::function_expression: + { + + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ',': + push_token(resources, token_type(current_node_arg), ec); + if (ec) {return path_expression_type();} + push_token(resources, token_type(begin_expression_arg), ec); + if (ec) {return path_expression_type();} + if (ec) {return path_expression_type();} + state_stack_.emplace_back(path_state::argument); + state_stack_.emplace_back(path_state::expression_rhs); + state_stack_.emplace_back(path_state::path_or_literal_or_function); + ++p_; + ++column_; + break; + case ')': + { + push_token(resources, token_type(end_function_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + } + default: + ec = jsonpath_errc::syntax_error; + return path_expression_type(); + } + break; + } + case path_state::zero_or_one_arguments: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ')': + state_stack_.pop_back(); + break; + default: + push_token(resources, token_type(begin_expression_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::one_or_more_arguments; + state_stack_.emplace_back(path_state::argument); + state_stack_.emplace_back(path_state::expression_rhs); + state_stack_.emplace_back(path_state::path_or_literal_or_function); + break; + } + break; + } + case path_state::one_or_more_arguments: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ')': + state_stack_.pop_back(); + break; + case ',': + push_token(resources, token_type(begin_expression_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.emplace_back(path_state::argument); + state_stack_.emplace_back(path_state::expression_rhs); + state_stack_.emplace_back(path_state::path_or_literal_or_function); + ++p_; + ++column_; + break; + } + break; + } + case path_state::argument: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ',': + case ')': + { + push_token(resources, token_type(end_argument_expression_arg), ec); + push_token(resources, argument_arg, ec); + //push_token(resources, argument_arg, ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + break; + } + default: + ec = jsonpath_errc::expected_comma_or_rparen; + return path_expression_type(); + } + break; + } + case path_state::unquoted_string: + switch (*p_) + { + case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'g':case 'h':case 'i':case 'j':case 'k':case 'l':case 'm':case 'n':case 'o':case 'p':case 'q':case 'r':case 's':case 't':case 'u':case 'v':case 'w':case 'x':case 'y':case 'z': + case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':case 'G':case 'H':case 'I':case 'J':case 'K':case 'L':case 'M':case 'N':case 'O':case 'P':case 'Q':case 'R':case 'S':case 'T':case 'U':case 'V':case 'W':case 'X':case 'Y':case 'Z': + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + case '_': + buffer.push_back(*p_); + ++p_; + ++column_; + break; + default: + if (typename std::make_unsigned<char_type>::type(*p_) > 127) + { + buffer.push_back(*p_); + ++p_; + ++column_; + } + else + { + state_stack_.pop_back(); // unquoted_string + } + break; + }; + break; + case path_state::relative_location: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '.': + state_stack_.emplace_back(path_state::recursive_descent_or_expression_lhs); + ++p_; + ++column_; + break; + case '[': + state_stack_.emplace_back(path_state::bracket_specifier_or_union); + ++p_; + ++column_; + break; + case '^': + ancestor_depth = 0; + state_stack_.emplace_back(path_state::parent_operator); + state_stack_.emplace_back(path_state::ancestor_depth); + break; + default: + state_stack_.pop_back(); + break; + }; + break; + case path_state::parent_operator: + { + push_token(resources, token_type(resources.new_selector(parent_node_selector<Json,JsonReference>(ancestor_depth))), ec); + paths_required = true; + ancestor_depth = 0; + ++p_; + ++column_; + state_stack_.pop_back(); + break; + } + case path_state::ancestor_depth: + { + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '^': + { + ++ancestor_depth; + ++p_; + ++column_; + break; + } + default: + { + state_stack_.pop_back(); + break; + } + } + break; + } + case path_state::expression_rhs: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '.': + state_stack_.emplace_back(path_state::recursive_descent_or_expression_lhs); + ++p_; + ++column_; + break; + case '[': + state_stack_.emplace_back(path_state::bracket_specifier_or_union); + ++p_; + ++column_; + break; + case ')': + { + state_stack_.pop_back(); + break; + } + case '|': + ++p_; + ++column_; + state_stack_.emplace_back(path_state::path_or_literal_or_function); + state_stack_.emplace_back(path_state::expect_or); + break; + case '&': + ++p_; + ++column_; + state_stack_.emplace_back(path_state::path_or_literal_or_function); + state_stack_.emplace_back(path_state::expect_and); + break; + case '<': + case '>': + { + state_stack_.emplace_back(path_state::comparator_expression); + break; + } + case '=': + { + state_stack_.emplace_back(path_state::eq_or_regex); + ++p_; + ++column_; + break; + } + case '!': + { + ++p_; + ++column_; + state_stack_.emplace_back(path_state::path_or_literal_or_function); + state_stack_.emplace_back(path_state::cmp_ne); + break; + } + case '+': + state_stack_.emplace_back(path_state::path_or_literal_or_function); + push_token(resources, token_type(resources.get_plus_operator()), ec); + if (ec) {return path_expression_type();} + ++p_; + ++column_; + break; + case '-': + state_stack_.emplace_back(path_state::path_or_literal_or_function); + push_token(resources, token_type(resources.get_minus_operator()), ec); + if (ec) {return path_expression_type();} + ++p_; + ++column_; + break; + case '*': + state_stack_.emplace_back(path_state::path_or_literal_or_function); + push_token(resources, token_type(resources.get_mult_operator()), ec); + if (ec) {return path_expression_type();} + ++p_; + ++column_; + break; + case '/': + state_stack_.emplace_back(path_state::path_or_literal_or_function); + push_token(resources, token_type(resources.get_div_operator()), ec); + if (ec) {return path_expression_type();} + ++p_; + ++column_; + break; + case '%': + state_stack_.emplace_back(path_state::path_or_literal_or_function); + push_token(resources, token_type(resources.get_modulus_operator()), ec); + if (ec) {return path_expression_type();} + ++p_; + ++column_; + break; + case ']': + case ',': + state_stack_.pop_back(); + break; + default: + ec = jsonpath_errc::expected_separator; + return path_expression_type(); + }; + break; + case path_state::expect_or: + { + switch (*p_) + { + case '|': + push_token(resources, token_type(resources.get_or_operator()), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_or; + return path_expression_type(); + } + break; + } + case path_state::expect_and: + { + switch(*p_) + { + case '&': + push_token(resources, token_type(resources.get_and_operator()), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); // expect_and + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_and; + return path_expression_type(); + } + break; + } + case path_state::comparator_expression: + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '<': + ++p_; + ++column_; + state_stack_.back() = path_state::path_or_literal_or_function; + state_stack_.emplace_back(path_state::cmp_lt_or_lte); + break; + case '>': + ++p_; + ++column_; + state_stack_.back() = path_state::path_or_literal_or_function; + state_stack_.emplace_back(path_state::cmp_gt_or_gte); + break; + default: + if (state_stack_.size() > 1) + { + state_stack_.pop_back(); + } + else + { + ec = jsonpath_errc::syntax_error; + return path_expression_type(); + } + break; + } + break; + case path_state::eq_or_regex: + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '=': + { + push_token(resources, token_type(resources.get_eq_operator()), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::path_or_literal_or_function; + ++p_; + ++column_; + break; + } + case '~': + { + ++p_; + ++column_; + state_stack_.emplace_back(path_state::expect_regex); + break; + } + default: + if (state_stack_.size() > 1) + { + state_stack_.pop_back(); + } + else + { + ec = jsonpath_errc::syntax_error; + return path_expression_type(); + } + break; + } + break; + case path_state::expect_regex: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '/': + state_stack_.back() = path_state::regex; + state_stack_.emplace_back(path_state::regex_options); + state_stack_.emplace_back(path_state::regex_pattern); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_forward_slash; + return path_expression_type(); + }; + break; + case path_state::regex: + { + std::regex::flag_type options = std::regex_constants::ECMAScript; + if (buffer2.find('i') != string_type::npos) + { + options |= std::regex_constants::icase; + } + std::basic_regex<char_type> pattern(buffer, options); + push_token(resources, resources.get_regex_operator(std::move(pattern)), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + buffer2.clear(); + state_stack_.pop_back(); + break; + } + case path_state::regex_pattern: + { + switch (*p_) + { + case '/': + { + state_stack_.pop_back(); + ++p_; + ++column_; + } + break; + + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + } + break; + } + case path_state::regex_options: + { + if (*p_ == 'i') + { + buffer2.push_back(*p_); + ++p_; + ++column_; + } + else + { + state_stack_.pop_back(); + } + break; + } + case path_state::cmp_lt_or_lte: + { + switch(*p_) + { + case '=': + push_token(resources, token_type(resources.get_lte_operator()), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + push_token(resources, token_type(resources.get_lt_operator()), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + break; + } + break; + } + case path_state::cmp_gt_or_gte: + { + switch(*p_) + { + case '=': + push_token(resources, token_type(resources.get_gte_operator()), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + //std::cout << "Parse: gt_operator\n"; + push_token(resources, token_type(resources.get_gt_operator()), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + break; + } + break; + } + case path_state::cmp_ne: + { + switch(*p_) + { + case '=': + push_token(resources, token_type(resources.get_ne_operator()), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_comparator; + return path_expression_type(); + } + break; + } + case path_state::identifier: + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + break; + case path_state::single_quoted_string: + switch (*p_) + { + case '\'': + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case '\\': + state_stack_.emplace_back(path_state::quoted_string_escape_char); + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case path_state::double_quoted_string: + switch (*p_) + { + case '\"': + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case '\\': + state_stack_.emplace_back(path_state::quoted_string_escape_char); + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + }; + break; + case path_state::comma_or_rbracket: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ',': + state_stack_.back() = path_state::bracket_specifier_or_union; + ++p_; + ++column_; + break; + case ']': + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_comma_or_rbracket; + return path_expression_type(); + } + break; + case path_state::expect_rbracket: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ']': + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + case path_state::expect_rparen: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ')': + ++p_; + ++column_; + push_token(resources, rparen_arg, ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::expression_rhs; + break; + default: + ec = jsonpath_errc::expected_rparen; + return path_expression_type(); + } + break; + case path_state::bracket_specifier_or_union: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '(': + { + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, token_type(begin_expression_arg), ec); + push_token(resources, lparen_arg, ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::expression); + state_stack_.emplace_back(path_state::expect_rparen); + state_stack_.emplace_back(path_state::expression_rhs); + state_stack_.emplace_back(path_state::path_or_literal_or_function); + ++p_; + ++column_; + break; + } + case '?': + { + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, token_type(begin_filter_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::filter_expression); + state_stack_.emplace_back(path_state::expression_rhs); + state_stack_.emplace_back(path_state::path_or_literal_or_function); + ++p_; + ++column_; + break; + } + case '*': + state_stack_.back() = path_state::wildcard_or_union; + ++p_; + ++column_; + break; + case '\'': + state_stack_.back() = path_state::identifier_or_union; + state_stack_.push_back(path_state::single_quoted_string); + ++p_; + ++column_; + break; + case '\"': + state_stack_.back() = path_state::identifier_or_union; + state_stack_.push_back(path_state::double_quoted_string); + ++p_; + ++column_; + break; + case ':': // slice_expression + state_stack_.back() = path_state::index_or_slice_or_union; + break; + case '-':case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + state_stack_.back() = path_state::index_or_slice_or_union; + state_stack_.emplace_back(path_state::integer); + break; + case '$': + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, root_node_arg, ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::relative_location); + ++p_; + ++column_; + break; + case '@': + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, token_type(current_node_arg), ec); // ISSUE + push_token(resources, token_type(resources.new_selector(current_node_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::relative_location); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_bracket_specifier_or_union; + return path_expression_type(); + } + break; + case path_state::union_element: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ':': // slice_expression + state_stack_.back() = path_state::index_or_slice; + break; + case '-':case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + state_stack_.back() = path_state::index_or_slice; + state_stack_.emplace_back(path_state::integer); + break; + case '(': + { + push_token(resources, token_type(begin_expression_arg), ec); + push_token(resources, lparen_arg, ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::expression; + state_stack_.emplace_back(path_state::expect_rparen); + state_stack_.emplace_back(path_state::expression_rhs); + state_stack_.emplace_back(path_state::path_or_literal_or_function); + ++p_; + ++column_; + break; + } + case '?': + { + push_token(resources, token_type(begin_filter_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::filter_expression; + state_stack_.emplace_back(path_state::expression_rhs); + state_stack_.emplace_back(path_state::path_or_literal_or_function); + ++p_; + ++column_; + break; + } + case '*': + push_token(resources, token_type(resources.new_selector(wildcard_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::relative_location; + ++p_; + ++column_; + break; + case '$': + push_token(resources, token_type(root_node_arg), ec); + push_token(resources, token_type(resources.new_selector(root_selector<Json,JsonReference>(selector_id++))), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::relative_location; + ++p_; + ++column_; + break; + case '@': + push_token(resources, token_type(current_node_arg), ec); // ISSUE + push_token(resources, token_type(resources.new_selector(current_node_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::relative_location; + ++p_; + ++column_; + break; + case '\'': + state_stack_.back() = path_state::identifier; + state_stack_.push_back(path_state::single_quoted_string); + ++p_; + ++column_; + break; + case '\"': + state_stack_.back() = path_state::identifier; + state_stack_.push_back(path_state::double_quoted_string); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_bracket_specifier_or_union; + return path_expression_type(); + } + break; + + case path_state::integer: + switch(*p_) + { + case '-': + buffer.push_back(*p_); + state_stack_.back() = path_state::digit; + ++p_; + ++column_; + break; + default: + state_stack_.back() = path_state::digit; + break; + } + break; + case path_state::digit: + switch(*p_) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + buffer.push_back(*p_); + ++p_; + ++column_; + break; + default: + state_stack_.pop_back(); // digit + break; + } + break; + case path_state::index_or_slice_or_union: + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ']': + { + if (buffer.empty()) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + int64_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + push_token(resources, token_type(resources.new_selector(index_selector<Json,JsonReference>(n))), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); // index_or_slice_or_union + ++p_; + ++column_; + break; + } + case ',': + { + push_token(resources, token_type(begin_union_arg), ec); + if (ec) {return path_expression_type();} + if (buffer.empty()) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + else + { + int64_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + push_token(resources, token_type(resources.new_selector(index_selector<Json,JsonReference>(n))), ec); + if (ec) {return path_expression_type();} + + buffer.clear(); + } + push_token(resources, token_type(separator_arg), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::union_element); + ++p_; + ++column_; + break; + } + case ':': + { + if (!buffer.empty()) + { + int64_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + slic.start_ = n; + buffer.clear(); + } + push_token(resources, token_type(begin_union_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::slice_expression_stop); + state_stack_.emplace_back(path_state::integer); + ++p_; + ++column_; + break; + } + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + case path_state::slice_expression_stop: + { + if (!buffer.empty()) + { + int64_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + slic.stop_ = jsoncons::optional<int64_t>(n); + buffer.clear(); + } + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ']': + case ',': + push_token(resources, token_type(resources.new_selector(slice_selector<Json,JsonReference>(slic))), ec); + if (ec) {return path_expression_type();} + slic = slice{}; + state_stack_.pop_back(); // bracket_specifier2 + break; + case ':': + state_stack_.back() = path_state::slice_expression_step; + state_stack_.emplace_back(path_state::integer); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + } + case path_state::slice_expression_step: + { + if (!buffer.empty()) + { + int64_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + if (n == 0) + { + ec = jsonpath_errc::step_cannot_be_zero; + return path_expression_type(); + } + slic.step_ = n; + buffer.clear(); + } + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ']': + case ',': + push_token(resources, token_type(resources.new_selector(slice_selector<Json,JsonReference>(slic))), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + slic = slice{}; + state_stack_.pop_back(); // slice_expression_step + break; + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + } + + case path_state::bracketed_unquoted_name_or_union: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ']': + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case '.': + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::relative_path); + ++p_; + ++column_; + break; + case '[': + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + if (ec) {return path_expression_type();} + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::relative_path); + ++p_; + ++column_; + break; + case ',': + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + push_token(resources, token_type(separator_arg), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::relative_path); + ++p_; + ++column_; + break; + default: + buffer.push_back(*p_); + ++p_; + ++column_; + break; + } + break; + case path_state::union_expression: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '.': + state_stack_.emplace_back(path_state::relative_path); + ++p_; + ++column_; + break; + case '[': + state_stack_.emplace_back(path_state::bracket_specifier_or_union); + ++p_; + ++column_; + break; + case ',': + push_token(resources, token_type(separator_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.emplace_back(path_state::union_element); + ++p_; + ++column_; + break; + case ']': + push_token(resources, token_type(end_union_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + case path_state::identifier_or_union: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ']': + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case ',': + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + push_token(resources, token_type(separator_arg), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::union_element); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + case path_state::bracketed_wildcard: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case '[': + case ']': + case ',': + case '.': + push_token(resources, token_type(resources.new_selector(wildcard_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + break; + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + case path_state::index_or_slice: + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ',': + case ']': + { + if (buffer.empty()) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + else + { + int64_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + push_token(resources, token_type(resources.new_selector(index_selector<Json,JsonReference>(n))), ec); + if (ec) {return path_expression_type();} + + buffer.clear(); + } + state_stack_.pop_back(); // bracket_specifier + break; + } + case ':': + { + if (!buffer.empty()) + { + int64_t n{0}; + auto r = jsoncons::detail::to_integer(buffer.data(), buffer.size(), n); + if (!r) + { + ec = jsonpath_errc::invalid_number; + return path_expression_type(); + } + slic.start_ = n; + buffer.clear(); + } + state_stack_.back() = path_state::slice_expression_stop; + state_stack_.emplace_back(path_state::integer); + ++p_; + ++column_; + break; + } + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + case path_state::wildcard_or_union: + switch (*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ']': + push_token(resources, token_type(resources.new_selector(wildcard_selector<Json,JsonReference>())), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.pop_back(); + ++p_; + ++column_; + break; + case ',': + push_token(resources, token_type(begin_union_arg), ec); + push_token(resources, token_type(resources.new_selector(wildcard_selector<Json,JsonReference>())), ec); + push_token(resources, token_type(separator_arg), ec); + if (ec) {return path_expression_type();} + buffer.clear(); + state_stack_.back() = path_state::union_expression; // union + state_stack_.emplace_back(path_state::union_element); + ++p_; + ++column_; + break; + default: + ec = jsonpath_errc::expected_rbracket; + return path_expression_type(); + } + break; + case path_state::quoted_string_escape_char: + switch (*p_) + { + case '\"': + buffer.push_back('\"'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case '\'': + buffer.push_back('\''); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case '\\': + buffer.push_back('\\'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case '/': + buffer.push_back('/'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'b': + buffer.push_back('\b'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'f': + buffer.push_back('\f'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'n': + buffer.push_back('\n'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'r': + buffer.push_back('\r'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 't': + buffer.push_back('\t'); + ++p_; + ++column_; + state_stack_.pop_back(); + break; + case 'u': + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u1; + break; + default: + ec = jsonpath_errc::illegal_escaped_character; + return path_expression_type(); + } + break; + case path_state::escape_u1: + cp = append_to_codepoint(0, *p_, ec); + if (ec) + { + return path_expression_type(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u2; + break; + case path_state::escape_u2: + cp = append_to_codepoint(cp, *p_, ec); + if (ec) + { + return path_expression_type(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u3; + break; + case path_state::escape_u3: + cp = append_to_codepoint(cp, *p_, ec); + if (ec) + { + return path_expression_type(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u4; + break; + case path_state::escape_u4: + cp = append_to_codepoint(cp, *p_, ec); + if (ec) + { + return path_expression_type(); + } + if (unicode_traits::is_high_surrogate(cp)) + { + ++p_; + ++column_; + state_stack_.back() = path_state::escape_expect_surrogate_pair1; + } + else + { + unicode_traits::convert(&cp, 1, buffer); + ++p_; + ++column_; + state_stack_.pop_back(); + } + break; + case path_state::escape_expect_surrogate_pair1: + switch (*p_) + { + case '\\': + ++p_; + ++column_; + state_stack_.back() = path_state::escape_expect_surrogate_pair2; + break; + default: + ec = jsonpath_errc::invalid_codepoint; + return path_expression_type(); + } + break; + case path_state::escape_expect_surrogate_pair2: + switch (*p_) + { + case 'u': + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u5; + break; + default: + ec = jsonpath_errc::invalid_codepoint; + return path_expression_type(); + } + break; + case path_state::escape_u5: + cp2 = append_to_codepoint(0, *p_, ec); + if (ec) + { + return path_expression_type(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u6; + break; + case path_state::escape_u6: + cp2 = append_to_codepoint(cp2, *p_, ec); + if (ec) + { + return path_expression_type(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u7; + break; + case path_state::escape_u7: + cp2 = append_to_codepoint(cp2, *p_, ec); + if (ec) + { + return path_expression_type(); + } + ++p_; + ++column_; + state_stack_.back() = path_state::escape_u8; + break; + case path_state::escape_u8: + { + cp2 = append_to_codepoint(cp2, *p_, ec); + if (ec) + { + return path_expression_type(); + } + uint32_t codepoint = 0x10000 + ((cp & 0x3FF) << 10) + (cp2 & 0x3FF); + unicode_traits::convert(&codepoint, 1, buffer); + state_stack_.pop_back(); + ++p_; + ++column_; + break; + } + case path_state::filter_expression: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ',': + case ']': + { + push_token(resources, token_type(end_filter_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + break; + } + default: + ec = jsonpath_errc::expected_comma_or_rbracket; + return path_expression_type(); + } + break; + } + case path_state::expression: + { + switch(*p_) + { + case ' ':case '\t':case '\r':case '\n': + advance_past_space_character(); + break; + case ',': + case ']': + { + push_token(resources, token_type(end_index_expression_arg), ec); + if (ec) {return path_expression_type();} + state_stack_.pop_back(); + break; + } + default: + ec = jsonpath_errc::expected_comma_or_rbracket; + return path_expression_type(); + } + break; + } + default: + ++p_; + ++column_; + break; + } + } + + if (state_stack_.empty()) + { + ec = jsonpath_errc::syntax_error; + return path_expression_type(); + } + + while (state_stack_.size() > 1) + { + switch (state_stack_.back()) + { + case path_state::name_or_lbracket: + state_stack_.back() = path_state::relative_path; + break; + case path_state::relative_path: + state_stack_.back() = path_state::identifier_or_function_expr; + state_stack_.emplace_back(path_state::unquoted_string); + break; + case path_state::identifier_or_function_expr: + if (!buffer.empty()) // Can't be quoted string + { + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + if (ec) {return path_expression_type();} + } + state_stack_.pop_back(); + break; + case path_state::unquoted_string: + state_stack_.pop_back(); // unquoted_string + break; + case path_state::relative_location: + state_stack_.pop_back(); + break; + case path_state::identifier: + if (!buffer.empty()) // Can't be quoted string + { + push_token(resources, token_type(resources.new_selector(identifier_selector<Json,JsonReference>(buffer))), ec); + if (ec) {return path_expression_type();} + } + state_stack_.pop_back(); + break; + case path_state::parent_operator: + { + push_token(resources, token_type(resources.new_selector(parent_node_selector<Json,JsonReference>(ancestor_depth))), ec); + if (ec) { return path_expression_type(); } + paths_required = true; + state_stack_.pop_back(); + break; + } + case path_state::ancestor_depth: + state_stack_.pop_back(); + break; + default: + ec = jsonpath_errc::syntax_error; + return path_expression_type(); + } + } + + if (state_stack_.size() > 2) + { + ec = jsonpath_errc::unexpected_eof; + return path_expression_type(); + } + + //std::cout << "\nTokens\n\n"; + //for (const auto& tok : output_stack_) + //{ + // std::cout << tok.to_string() << "\n"; + //} + //std::cout << "\n"; + + if (output_stack_.empty() || !operator_stack_.empty()) + { + ec = jsonpath_errc::unexpected_eof; + return path_expression_type(); + } + + return path_expression_type(output_stack_.back().selector_, paths_required); + } + + void advance_past_space_character() + { + switch (*p_) + { + case ' ':case '\t': + ++p_; + ++column_; + break; + case '\r': + if (p_+1 < end_input_ && *(p_+1) == '\n') + ++p_; + ++line_; + column_ = 1; + ++p_; + break; + case '\n': + ++line_; + column_ = 1; + ++p_; + break; + default: + break; + } + } + + void unwind_rparen(std::error_code& ec) + { + auto it = operator_stack_.rbegin(); + while (it != operator_stack_.rend() && !it->is_lparen()) + { + output_stack_.emplace_back(std::move(*it)); + ++it; + } + if (it == operator_stack_.rend()) + { + ec = jsonpath_errc::unbalanced_parentheses; + return; + } + ++it; + operator_stack_.erase(it.base(),operator_stack_.end()); + } + + void push_token(jsoncons::jsonpath::detail::static_resources<value_type,reference>& resources, token_type&& tok, std::error_code& ec) + { + //std::cout << tok.to_string() << "\n"; + switch (tok.token_kind()) + { + case jsonpath_token_kind::begin_filter: + output_stack_.emplace_back(std::move(tok)); + operator_stack_.emplace_back(token_type(lparen_arg)); + break; + case jsonpath_token_kind::end_filter: + { + //std::cout << "push_token end_filter 1\n"; + //for (const auto& tok2 : output_stack_) + //{ + // std::cout << tok2.to_string() << "\n"; + //} + //std::cout << "\n\n"; + unwind_rparen(ec); + if (ec) + { + return; + } + std::vector<token_type> toks; + auto it = output_stack_.rbegin(); + while (it != output_stack_.rend() && it->token_kind() != jsonpath_token_kind::begin_filter) + { + toks.emplace_back(std::move(*it)); + ++it; + } + if (it == output_stack_.rend()) + { + ec = jsonpath_errc::unbalanced_parentheses; + return; + } + std::reverse(toks.begin(), toks.end()); + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + + if (!output_stack_.empty() && output_stack_.back().is_path()) + { + output_stack_.back().selector_->append_selector(resources.new_selector(filter_selector<Json,JsonReference>(expression_type(std::move(toks))))); + } + else + { + output_stack_.emplace_back(token_type(resources.new_selector(filter_selector<Json,JsonReference>(expression_type(std::move(toks)))))); + } + //std::cout << "push_token end_filter 2\n"; + //for (const auto& tok2 : output_stack_) + //{ + // std::cout << tok2.to_string() << "\n"; + //} + //std::cout << "\n\n"; + break; + } + case jsonpath_token_kind::begin_expression: + //std::cout << "begin_expression\n"; + output_stack_.emplace_back(std::move(tok)); + operator_stack_.emplace_back(token_type(lparen_arg)); + break; + case jsonpath_token_kind::end_index_expression: + { + //std::cout << "jsonpath_token_kind::end_index_expression\n"; + //for (const auto& t : output_stack_) + //{ + // std::cout << t.to_string() << "\n"; + //} + //std::cout << "/jsonpath_token_kind::end_index_expression\n"; + unwind_rparen(ec); + if (ec) + { + return; + } + std::vector<token_type> toks; + auto it = output_stack_.rbegin(); + while (it != output_stack_.rend() && it->token_kind() != jsonpath_token_kind::begin_expression) + { + toks.emplace_back(std::move(*it)); + ++it; + } + if (it == output_stack_.rend()) + { + ec = jsonpath_errc::unbalanced_parentheses; + return; + } + std::reverse(toks.begin(), toks.end()); + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + + if (!output_stack_.empty() && output_stack_.back().is_path()) + { + output_stack_.back().selector_->append_selector(resources.new_selector(index_expression_selector<Json,JsonReference>(expression_type(std::move(toks))))); + } + else + { + output_stack_.emplace_back(token_type(resources.new_selector(index_expression_selector<Json,JsonReference>(expression_type(std::move(toks)))))); + } + break; + } + case jsonpath_token_kind::end_argument_expression: + { + //std::cout << "jsonpath_token_kind::end_index_expression\n"; + //for (const auto& t : output_stack_) + //{ + // std::cout << t.to_string() << "\n"; + //} + //std::cout << "/jsonpath_token_kind::end_index_expression\n"; + unwind_rparen(ec); + if (ec) + { + return; + } + std::vector<token_type> toks; + auto it = output_stack_.rbegin(); + while (it != output_stack_.rend() && it->token_kind() != jsonpath_token_kind::begin_expression) + { + toks.emplace_back(std::move(*it)); + ++it; + } + if (it == output_stack_.rend()) + { + ec = jsonpath_errc::unbalanced_parentheses; + return; + } + std::reverse(toks.begin(), toks.end()); + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + output_stack_.emplace_back(token_type(jsoncons::make_unique<expression_type>(std::move(toks)))); + break; + } + case jsonpath_token_kind::selector: + { + if (!output_stack_.empty() && output_stack_.back().is_path()) + { + output_stack_.back().selector_->append_selector(std::move(tok.selector_)); + } + else + { + output_stack_.emplace_back(std::move(tok)); + } + break; + } + case jsonpath_token_kind::separator: + output_stack_.emplace_back(std::move(tok)); + break; + case jsonpath_token_kind::begin_union: + output_stack_.emplace_back(std::move(tok)); + break; + case jsonpath_token_kind::end_union: + { + std::vector<selector_type*> expressions; + auto it = output_stack_.rbegin(); + while (it != output_stack_.rend() && it->token_kind() != jsonpath_token_kind::begin_union) + { + if (it->token_kind() == jsonpath_token_kind::selector) + { + expressions.emplace_back(std::move(it->selector_)); + } + do + { + ++it; + } + while (it != output_stack_.rend() && it->token_kind() != jsonpath_token_kind::begin_union && it->token_kind() != jsonpath_token_kind::separator); + if (it->token_kind() == jsonpath_token_kind::separator) + { + ++it; + } + } + if (it == output_stack_.rend()) + { + ec = jsonpath_errc::unbalanced_parentheses; + return; + } + std::reverse(expressions.begin(), expressions.end()); + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + + if (!output_stack_.empty() && output_stack_.back().is_path()) + { + output_stack_.back().selector_->append_selector(resources.new_selector(union_selector<Json,JsonReference>(std::move(expressions)))); + } + else + { + output_stack_.emplace_back(token_type(resources.new_selector(union_selector<Json,JsonReference>(std::move(expressions))))); + } + break; + } + case jsonpath_token_kind::lparen: + operator_stack_.emplace_back(std::move(tok)); + break; + case jsonpath_token_kind::rparen: + { + unwind_rparen(ec); + break; + } + case jsonpath_token_kind::end_function: + { + //std::cout << "jsonpath_token_kind::end_function\n"; + unwind_rparen(ec); + if (ec) + { + return; + } + std::vector<token_type> toks; + auto it = output_stack_.rbegin(); + std::size_t arg_count = 0; + while (it != output_stack_.rend() && it->token_kind() != jsonpath_token_kind::function) + { + if (it->token_kind() == jsonpath_token_kind::argument) + { + ++arg_count; + } + toks.emplace_back(std::move(*it)); + ++it; + } + if (it == output_stack_.rend()) + { + ec = jsonpath_errc::unbalanced_parentheses; + return; + } + std::reverse(toks.begin(), toks.end()); + if (it->arity() && arg_count != *(it->arity())) + { + ec = jsonpath_errc::invalid_arity; + return; + } + toks.push_back(std::move(*it)); + ++it; + output_stack_.erase(it.base(),output_stack_.end()); + + if (!output_stack_.empty() && output_stack_.back().is_path()) + { + output_stack_.back().selector_->append_selector(resources.new_selector(function_selector<Json,JsonReference>(expression_type(std::move(toks))))); + } + else + { + output_stack_.emplace_back(token_type(resources.new_selector(function_selector<Json,JsonReference>(std::move(toks))))); + } + break; + } + case jsonpath_token_kind::literal: + if (!output_stack_.empty() && (output_stack_.back().token_kind() == jsonpath_token_kind::current_node || output_stack_.back().token_kind() == jsonpath_token_kind::root_node)) + { + output_stack_.back() = std::move(tok); + } + else + { + output_stack_.emplace_back(std::move(tok)); + } + break; + case jsonpath_token_kind::function: + output_stack_.emplace_back(std::move(tok)); + operator_stack_.emplace_back(token_type(lparen_arg)); + break; + case jsonpath_token_kind::argument: + output_stack_.emplace_back(std::move(tok)); + break; + case jsonpath_token_kind::root_node: + case jsonpath_token_kind::current_node: + output_stack_.emplace_back(std::move(tok)); + break; + case jsonpath_token_kind::unary_operator: + case jsonpath_token_kind::binary_operator: + { + if (operator_stack_.empty() || operator_stack_.back().is_lparen()) + { + operator_stack_.emplace_back(std::move(tok)); + } + else if (tok.precedence_level() < operator_stack_.back().precedence_level() + || (tok.precedence_level() == operator_stack_.back().precedence_level() && tok.is_right_associative())) + { + operator_stack_.emplace_back(std::move(tok)); + } + else + { + auto it = operator_stack_.rbegin(); + while (it != operator_stack_.rend() && it->is_operator() + && (tok.precedence_level() > it->precedence_level() + || (tok.precedence_level() == it->precedence_level() && tok.is_right_associative()))) + { + output_stack_.emplace_back(std::move(*it)); + ++it; + } + + operator_stack_.erase(it.base(),operator_stack_.end()); + operator_stack_.emplace_back(std::move(tok)); + } + break; + } + default: + break; + } + //std::cout << " " << "Output Stack\n"; + //for (auto&& t : output_stack_) + //{ + // std::cout << t.to_string(2) << "\n"; + //} + //if (!operator_stack_.empty()) + //{ + // std::cout << " " << "Operator Stack\n"; + // for (auto&& t : operator_stack_) + // { + // std::cout << t.to_string(2) << "\n"; + // } + //} + } + + uint32_t append_to_codepoint(uint32_t cp, int c, std::error_code& ec) + { + cp *= 16; + if (c >= '0' && c <= '9') + { + cp += c - '0'; + } + else if (c >= 'a' && c <= 'f') + { + cp += c - 'a' + 10; + } + else if (c >= 'A' && c <= 'F') + { + cp += c - 'A' + 10; + } + else + { + ec = jsonpath_errc::invalid_codepoint; + } + return cp; + } + }; + + } // namespace detail + + template <class Json,class JsonReference = const Json&> + class jsonpath_expression + { + public: + using evaluator_t = typename jsoncons::jsonpath::detail::jsonpath_evaluator<Json, JsonReference>; + using char_type = typename evaluator_t::char_type; + using string_type = typename evaluator_t::string_type; + using string_view_type = typename evaluator_t::string_view_type; + using value_type = typename evaluator_t::value_type; + using reference = typename evaluator_t::reference; + using parameter_type = parameter<Json>; + using json_selector_t = typename evaluator_t::path_expression_type; + using path_value_pair_type = typename evaluator_t::path_value_pair_type; + using json_location_type = typename evaluator_t::json_location_type; + using function_type = std::function<value_type(jsoncons::span<const parameter_type>, std::error_code& ec)>; + private: + jsoncons::jsonpath::detail::static_resources<value_type,reference> static_resources_; + json_selector_t expr_; + public: + jsonpath_expression(jsoncons::jsonpath::detail::static_resources<value_type,reference>&& resources, + json_selector_t&& expr) + : static_resources_(std::move(resources)), + expr_(std::move(expr)) + { + } + + jsonpath_expression(jsoncons::jsonpath::detail::static_resources<value_type,reference>&& resources, + json_selector_t&& expr, std::vector<function_type>&& custom_functions) + : static_resources_(std::move(resources)), + expr_(std::move(expr), std::move(custom_functions)) + { + } + + template <class BinaryCallback> + typename std::enable_if<type_traits::is_binary_function_object<BinaryCallback,const string_type&,reference>::value,void>::type + evaluate(reference instance, BinaryCallback callback, result_options options = result_options()) + { + jsoncons::jsonpath::detail::dynamic_resources<Json,reference> resources; + auto f = [&callback](const json_location_type& path, reference val) + { + callback(path.to_string(), val); + }; + expr_.evaluate(resources, instance, resources.root_path_node(), instance, f, options); + } + + Json evaluate(reference instance, result_options options = result_options()) + { + if ((options & result_options::path) == result_options::path) + { + jsoncons::jsonpath::detail::dynamic_resources<Json,reference> resources; + + Json result(json_array_arg); + auto callback = [&result](const json_location_type& p, reference) + { + result.emplace_back(p.to_string()); + }; + expr_.evaluate(resources, instance, resources.root_path_node(), instance, callback, options); + return result; + } + else + { + jsoncons::jsonpath::detail::dynamic_resources<Json,reference> resources; + return expr_.evaluate(resources, instance, resources.current_path_node(), instance, options); + } + } + + static jsonpath_expression compile(const string_view_type& path) + { + jsoncons::jsonpath::detail::static_resources<value_type,reference> resources; + + evaluator_t e; + json_selector_t expr = e.compile(resources, path); + return jsonpath_expression(std::move(resources), std::move(expr)); + } + + static jsonpath_expression compile(const string_view_type& path, std::error_code& ec) + { + jsoncons::jsonpath::detail::static_resources<value_type,reference> resources; + evaluator_t e; + json_selector_t expr = e.compile(resources, path, ec); + return jsonpath_expression(std::move(resources), std::move(expr)); + } + + static jsonpath_expression compile(const string_view_type& path, + const custom_functions<Json>& functions) + { + jsoncons::jsonpath::detail::static_resources<value_type,reference> resources(functions); + + evaluator_t e; + json_selector_t expr = e.compile(resources, path); + return jsonpath_expression(std::move(resources), std::move(expr)); + } + + static jsonpath_expression compile(const string_view_type& path, + const custom_functions<Json>& functions, + std::error_code& ec) + { + jsoncons::jsonpath::detail::static_resources<value_type,reference> resources(functions); + evaluator_t e; + json_selector_t expr = e.compile(resources, path, ec); + return jsonpath_expression(std::move(resources), std::move(expr)); + } + }; + + template <class Json> + jsonpath_expression<Json> make_expression(const typename Json::string_view_type& expr, + const custom_functions<Json>& functions = custom_functions<Json>()) + { + return jsonpath_expression<Json>::compile(expr, functions); + } + + template <class Json> + jsonpath_expression<Json> make_expression(const typename Json::string_view_type& expr, std::error_code& ec) + { + return jsonpath_expression<Json>::compile(expr, ec); + } + + template <class Json> + jsonpath_expression<Json> make_expression(const typename Json::string_view_type& expr, + const custom_functions<Json>& functions, + std::error_code& ec) + { + return jsonpath_expression<Json>::compile(expr, functions, ec); + } + +} // namespace jsonpath +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/jsonpath/jsonpath_selector.hpp b/include/jsoncons_ext/jsonpath/jsonpath_selector.hpp new file mode 100644 index 0000000..e06d37c --- /dev/null +++ b/include/jsoncons_ext/jsonpath/jsonpath_selector.hpp @@ -0,0 +1,1322 @@ +// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPATH_JSONPATH_SELECTOR_HPP +#define JSONCONS_JSONPATH_JSONPATH_SELECTOR_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::is_const +#include <limits> // std::numeric_limits +#include <utility> // std::move +#include <regex> +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpath/jsonpath_error.hpp> +#include <jsoncons_ext/jsonpath/expression.hpp> + +namespace jsoncons { +namespace jsonpath { +namespace detail { + + struct slice + { + jsoncons::optional<int64_t> start_; + jsoncons::optional<int64_t> stop_; + int64_t step_; + + slice() + : start_(), stop_(), step_(1) + { + } + + slice(const jsoncons::optional<int64_t>& start, const jsoncons::optional<int64_t>& end, int64_t step) + : start_(start), stop_(end), step_(step) + { + } + + slice(const slice& other) + : start_(other.start_), stop_(other.stop_), step_(other.step_) + { + } + + slice& operator=(const slice& rhs) + { + if (this != &rhs) + { + if (rhs.start_) + { + start_ = rhs.start_; + } + else + { + start_.reset(); + } + if (rhs.stop_) + { + stop_ = rhs.stop_; + } + else + { + stop_.reset(); + } + step_ = rhs.step_; + } + return *this; + } + + int64_t get_start(std::size_t size) const + { + if (start_) + { + auto len = *start_ >= 0 ? *start_ : (static_cast<int64_t>(size) + *start_); + return len <= static_cast<int64_t>(size) ? len : static_cast<int64_t>(size); + } + else + { + if (step_ >= 0) + { + return 0; + } + else + { + return static_cast<int64_t>(size); + } + } + } + + int64_t get_stop(std::size_t size) const + { + if (stop_) + { + auto len = *stop_ >= 0 ? *stop_ : (static_cast<int64_t>(size) + *stop_); + return len <= static_cast<int64_t>(size) ? len : static_cast<int64_t>(size); + } + else + { + return step_ >= 0 ? static_cast<int64_t>(size) : -1; + } + } + + int64_t step() const + { + return step_; // Allow negative + } + }; + + template <class Json,class JsonReference> + class json_array_receiver : public node_receiver<Json,JsonReference> + { + public: + using reference = JsonReference; + using char_type = typename Json::char_type; + using json_location_node_type = json_location_node<char_type>; + + Json* val; + + json_array_receiver(Json* ptr) + : val(ptr) + { + } + + void add(const json_location_node_type&, reference value) override + { + val->emplace_back(value); + } + }; + + template <class Json,class JsonReference> + struct path_generator + { + using char_type = typename Json::char_type; + using json_location_node_type = json_location_node<char_type>; + using string_type = std::basic_string<char_type>; + + static const json_location_node_type& generate(dynamic_resources<Json,JsonReference>& resources, + const json_location_node_type& last, + std::size_t index, + result_options options) + { + const result_options require_path = result_options::path | result_options::nodups | result_options::sort; + if ((options & require_path) != result_options()) + { + return *resources.create_path_node(&last, index); + } + else + { + return last; + } + } + + static const json_location_node_type& generate(dynamic_resources<Json,JsonReference>& resources, + const json_location_node_type& last, + const string_type& identifier, + result_options options) + { + const result_options require_path = result_options::path | result_options::nodups | result_options::sort; + if ((options & require_path) != result_options()) + { + return *resources.create_path_node(&last, identifier); + } + else + { + return last; + } + } + }; + + template <class Json,class JsonReference> + class base_selector : public jsonpath_selector<Json,JsonReference> + { + using supertype = jsonpath_selector<Json,JsonReference>; + + supertype* tail_; + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using json_location_type = typename supertype::json_location_type; + using node_receiver_type = typename supertype::node_receiver_type; + using selector_type = typename supertype::selector_type; + + base_selector() + : supertype(true, 11), tail_(nullptr) + { + } + + base_selector(bool is_path, std::size_t precedence_level) + : supertype(is_path, precedence_level), tail_(nullptr) + { + } + + void append_selector(selector_type* expr) override + { + if (!tail_) + { + tail_ = expr; + } + else + { + tail_->append_selector(expr); + } + } + + void tail_select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const + { + if (!tail_) + { + receiver.add(last, current); + } + else + { + tail_->select(resources, root, last, current, receiver, options); + } + } + + reference evaluate_tail(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code& ec) const + { + if (!tail_) + { + return current; + } + else + { + return tail_->evaluate(resources, root, last, current, options, ec); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + if (tail_) + { + s.append(tail_->to_string(level)); + } + return s; + } + }; + + template <class Json,class JsonReference> + class identifier_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + using path_generator_type = path_generator<Json,JsonReference>; + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + using string_view_type = basic_string_view<char_type>; + using node_receiver_type = typename supertype::node_receiver_type; + private: + string_type identifier_; + public: + + identifier_selector(const string_view_type& identifier) + : base_selector<Json,JsonReference>(), identifier_(identifier) + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + //std::string buf; + //buf.append("identifier selector: "); + //unicode_traits::convert(identifier_.data(),identifier_.size(),buf); + + static const char_type length_name[] = {'l', 'e', 'n', 'g', 't', 'h', 0}; + + if (current.is_object()) + { + auto it = current.find(identifier_); + if (it != current.object_range().end()) + { + this->tail_select(resources, root, + path_generator_type::generate(resources, last, identifier_, options), + it->value(), receiver, options); + } + } + else if (current.is_array()) + { + int64_t n{0}; + auto r = jsoncons::detail::to_integer_decimal(identifier_.data(), identifier_.size(), n); + if (r) + { + std::size_t index = (n >= 0) ? static_cast<std::size_t>(n) : static_cast<std::size_t>(static_cast<int64_t>(current.size()) + n); + if (index < current.size()) + { + this->tail_select(resources, root, + path_generator_type::generate(resources, last, index, options), + current[index], receiver, options); + } + } + else if (identifier_ == length_name && current.size() > 0) + { + pointer ptr = resources.create_json(current.size()); + this->tail_select(resources, root, + path_generator_type::generate(resources, last, identifier_, options), + *ptr, + receiver, options); + } + } + else if (current.is_string() && identifier_ == length_name) + { + string_view_type sv = current.as_string_view(); + std::size_t count = unicode_traits::count_codepoints(sv.data(), sv.size()); + pointer ptr = resources.create_json(count); + this->tail_select(resources, root, + path_generator_type::generate(resources, last, identifier_, options), + *ptr, receiver, options); + } + //std::cout << "end identifier_selector\n"; + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code& ec) const override + { + static const char_type length_name[] = {'l', 'e', 'n', 'g', 't', 'h', 0}; + + if (current.is_object()) + { + auto it = current.find(identifier_); + if (it != current.object_range().end()) + { + return this->evaluate_tail(resources, root, + path_generator_type::generate(resources, last, identifier_, options), + it->value(), options, ec); + } + else + { + return resources.null_value(); + } + } + else if (current.is_array()) + { + int64_t n{0}; + auto r = jsoncons::detail::to_integer_decimal(identifier_.data(), identifier_.size(), n); + if (r) + { + std::size_t index = (n >= 0) ? static_cast<std::size_t>(n) : static_cast<std::size_t>(static_cast<int64_t>(current.size()) + n); + if (index < current.size()) + { + return this->evaluate_tail(resources, root, + path_generator_type::generate(resources, last, index, options), + current[index], options, ec); + } + else + { + return resources.null_value(); + } + } + else if (identifier_ == length_name && current.size() > 0) + { + pointer ptr = resources.create_json(current.size()); + return this->evaluate_tail(resources, root, + path_generator_type::generate(resources, last, identifier_, options), + *ptr, + options, ec); + } + else + { + return resources.null_value(); + } + } + else if (current.is_string() && identifier_ == length_name) + { + string_view_type sv = current.as_string_view(); + std::size_t count = unicode_traits::count_codepoints(sv.data(), sv.size()); + pointer ptr = resources.create_json(count); + return this->evaluate_tail(resources, root, + path_generator_type::generate(resources, last, identifier_, options), + *ptr, options, ec); + } + else + { + return resources.null_value(); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("identifier selector "); + unicode_traits::convert(identifier_.data(),identifier_.size(),s); + s.append(base_selector<Json,JsonReference>::to_string(level+1)); + //s.append("\n"); + + return s; + } + }; + + template <class Json,class JsonReference> + class root_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + using path_generator_type = path_generator<Json,JsonReference>; + + std::size_t id_; + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using node_receiver_type = typename supertype::node_receiver_type; + + root_selector(std::size_t id) + : base_selector<Json,JsonReference>(), id_(id) + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference, + node_receiver_type& receiver, + result_options options) const override + { + this->tail_select(resources, root, last, root, receiver, options); + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference, + result_options options, + std::error_code& ec) const override + { + if (resources.is_cached(id_)) + { + return resources.retrieve_from_cache(id_); + } + else + { + auto& ref = this->evaluate_tail(resources, root, last, root, options, ec); + if (!ec) + { + resources.add_to_cache(id_, ref); + } + + return ref; + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("root_selector "); + s.append(base_selector<Json,JsonReference>::to_string(level+1)); + + return s; + } + }; + + template <class Json,class JsonReference> + class current_node_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + + current_node_selector() + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + this->tail_select(resources, + root, last, current, receiver, options); + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code& ec) const override + { + //std::cout << "current_node_selector: " << current << "\n"; + return this->evaluate_tail(resources, + root, last, current, options, ec); + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("current_node_selector"); + s.append(base_selector<Json,JsonReference>::to_string(level+1)); + + return s; + } + }; + + template <class Json,class JsonReference> + class parent_node_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + + int ancestor_depth_; + + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using json_location_type = typename supertype::json_location_type; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + + parent_node_selector(int ancestor_depth) + { + ancestor_depth_ = ancestor_depth; + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference, + node_receiver_type& receiver, + result_options options) const override + { + const json_location_node_type* ancestor = std::addressof(last); + int index = 0; + while (ancestor != nullptr && index < ancestor_depth_) + { + ancestor = ancestor->parent(); + ++index; + } + + if (ancestor != nullptr) + { + json_location_type path(*ancestor); + pointer ptr = jsoncons::jsonpath::select(root,path); + if (ptr != nullptr) + { + this->tail_select(resources, root, path.last(), *ptr, receiver, options); + } + } + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference, + result_options options, + std::error_code& ec) const override + { + const json_location_node_type* ancestor = std::addressof(last); + int index = 0; + while (ancestor != nullptr && index < ancestor_depth_) + { + ancestor = ancestor->parent(); + ++index; + } + + if (ancestor != nullptr) + { + json_location_type path(*ancestor); + pointer ptr = jsoncons::jsonpath::select(root,path); + if (ptr != nullptr) + { + return this->evaluate_tail(resources, root, path.last(), *ptr, options, ec); + } + else + { + return resources.null_value(); + } + } + else + { + return resources.null_value(); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("parent_node_selector"); + s.append(base_selector<Json,JsonReference>::to_string(level+1)); + + return s; + } + }; + + template <class Json,class JsonReference> + class index_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + + int64_t index_; + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + + index_selector(int64_t index) + : base_selector<Json,JsonReference>(), index_(index) + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + if (current.is_array()) + { + int64_t slen = static_cast<int64_t>(current.size()); + if (index_ >= 0 && index_ < slen) + { + std::size_t i = static_cast<std::size_t>(index_); + this->tail_select(resources, root, + path_generator_type::generate(resources, last, i, options), + current.at(i), receiver, options); + } + else + { + int64_t index = slen + index_; + if (index >= 0 && index < slen) + { + std::size_t i = static_cast<std::size_t>(index); + this->tail_select(resources, root, + path_generator_type::generate(resources, last, i, options), + current.at(i), receiver, options); + } + } + } + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code& ec) const override + { + if (current.is_array()) + { + int64_t slen = static_cast<int64_t>(current.size()); + if (index_ >= 0 && index_ < slen) + { + std::size_t i = static_cast<std::size_t>(index_); + return this->evaluate_tail(resources, root, + path_generator_type::generate(resources, last, i, options), + current.at(i), options, ec); + } + else + { + int64_t index = slen + index_; + if (index >= 0 && index < slen) + { + std::size_t i = static_cast<std::size_t>(index); + return this->evaluate_tail(resources, root, + path_generator_type::generate(resources, last, i, options), + current.at(i), options, ec); + } + else + { + return resources.null_value(); + } + } + } + else + { + return resources.null_value(); + } + } + }; + + template <class Json,class JsonReference> + class wildcard_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + + wildcard_selector() + : base_selector<Json,JsonReference>() + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + if (current.is_array()) + { + for (std::size_t i = 0; i < current.size(); ++i) + { + this->tail_select(resources, root, + path_generator_type::generate(resources, last, i, options), current[i], + receiver, options); + } + } + else if (current.is_object()) + { + for (auto& member : current.object_range()) + { + this->tail_select(resources, root, + path_generator_type::generate(resources, last, member.key(), options), + member.value(), receiver, options); + } + } + //std::cout << "end wildcard_selector\n"; + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code&) const override + { + auto jptr = resources.create_json(json_array_arg); + json_array_receiver<Json,JsonReference> receiver(jptr); + select(resources, root, last, current, receiver, options); + return *jptr; + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("wildcard selector"); + s.append(base_selector<Json,JsonReference>::to_string(level)); + + return s; + } + }; + + template <class Json,class JsonReference> + class recursive_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + + recursive_selector() + : base_selector<Json,JsonReference>() + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + if (current.is_array()) + { + this->tail_select(resources, root, last, current, receiver, options); + for (std::size_t i = 0; i < current.size(); ++i) + { + select(resources, root, + path_generator_type::generate(resources, last, i, options), current[i], receiver, options); + } + } + else if (current.is_object()) + { + this->tail_select(resources, root, last, current, receiver, options); + for (auto& item : current.object_range()) + { + select(resources, root, + path_generator_type::generate(resources, last, item.key(), options), item.value(), receiver, options); + } + } + //std::cout << "end wildcard_selector\n"; + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code&) const override + { + auto jptr = resources.create_json(json_array_arg); + json_array_receiver<Json,JsonReference> receiver(jptr); + select(resources, root, last, current, receiver, options); + return *jptr; + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("wildcard selector"); + s.append(base_selector<Json,JsonReference>::to_string(level)); + + return s; + } + }; + + template <class Json,class JsonReference> + class union_selector final : public jsonpath_selector<Json,JsonReference> + { + using supertype = jsonpath_selector<Json,JsonReference>; + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using json_location_type = typename supertype::json_location_type; + using path_expression_type = path_expression<Json, JsonReference>; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + using selector_type = typename supertype::selector_type; + private: + std::vector<selector_type*> selectors_; + selector_type* tail_; + public: + union_selector(std::vector<selector_type*>&& selectors) + : supertype(true, 11), selectors_(std::move(selectors)), tail_(nullptr) + { + } + + void append_selector(selector_type* tail) override + { + if (tail_ == nullptr) + { + tail_ = tail; + for (auto& selector : selectors_) + { + selector->append_selector(tail); + } + } + else + { + tail_->append_selector(tail); + } + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + for (auto& selector : selectors_) + { + selector->select(resources, root, last, current, receiver, options); + } + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code&) const override + { + auto jptr = resources.create_json(json_array_arg); + json_array_receiver<Json,JsonReference> receiver(jptr); + select(resources,root,last,current,receiver,options); + return *jptr; + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("union selector "); + for (auto& selector : selectors_) + { + s.append(selector->to_string(level+1)); + //s.push_back('\n'); + } + + return s; + } + }; + + template <class Json,class JsonReference> + class filter_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + + expression<Json,JsonReference> expr_; + + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + + filter_selector(expression<Json,JsonReference>&& expr) + : base_selector<Json,JsonReference>(), expr_(std::move(expr)) + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + if (current.is_array()) + { + for (std::size_t i = 0; i < current.size(); ++i) + { + std::error_code ec; + value_type r = expr_.evaluate(resources, root, current[i], options, ec); + bool t = ec ? false : detail::is_true(r); + if (t) + { + this->tail_select(resources, root, + path_generator_type::generate(resources, last, i, options), + current[i], receiver, options); + } + } + } + else if (current.is_object()) + { + for (auto& member : current.object_range()) + { + std::error_code ec; + value_type r = expr_.evaluate(resources, root, member.value(), options, ec); + bool t = ec ? false : detail::is_true(r); + if (t) + { + this->tail_select(resources, root, + path_generator_type::generate(resources, last, member.key(), options), + member.value(), receiver, options); + } + } + } + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code&) const override + { + auto jptr = resources.create_json(json_array_arg); + json_array_receiver<Json,JsonReference> receiver(jptr); + select(resources, root, last, current, receiver, options); + return *jptr; + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("filter selector "); + s.append(expr_.to_string(level+1)); + + return s; + } + }; + + template <class Json,class JsonReference> + class index_expression_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + + expression<Json,JsonReference> expr_; + + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + + index_expression_selector(expression<Json,JsonReference>&& expr) + : base_selector<Json,JsonReference>(), expr_(std::move(expr)) + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + std::error_code ec; + value_type j = expr_.evaluate(resources, root, current, options, ec); + + if (!ec) + { + if (j.template is<std::size_t>() && current.is_array()) + { + std::size_t start = j.template as<std::size_t>(); + this->tail_select(resources, root, + path_generator_type::generate(resources, last, start, options), + current.at(start), receiver, options); + } + else if (j.is_string() && current.is_object()) + { + this->tail_select(resources, root, + path_generator_type::generate(resources, last, j.as_string(), options), + current.at(j.as_string_view()), receiver, options); + } + } + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code& ec) const override + { + //std::cout << "index_expression_selector current: " << current << "\n"; + + value_type j = expr_.evaluate(resources, root, current, options, ec); + + if (!ec) + { + if (j.template is<std::size_t>() && current.is_array()) + { + std::size_t start = j.template as<std::size_t>(); + return this->evaluate_tail(resources, root, last, current.at(start), options, ec); + } + else if (j.is_string() && current.is_object()) + { + return this->evaluate_tail(resources, root, last, current.at(j.as_string_view()), options, ec); + } + else + { + return resources.null_value(); + } + } + else + { + return resources.null_value(); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("bracket expression selector "); + s.append(expr_.to_string(level+1)); + s.append(base_selector<Json,JsonReference>::to_string(level+1)); + + return s; + } + }; + + template <class Json,class JsonReference> + class slice_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + using path_generator_type = path_generator<Json, JsonReference>; + + slice slice_; + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using node_receiver_type = typename supertype::node_receiver_type; + + slice_selector(const slice& slic) + : base_selector<Json,JsonReference>(), slice_(slic) + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + if (current.is_array()) + { + auto start = slice_.get_start(current.size()); + auto end = slice_.get_stop(current.size()); + auto step = slice_.step(); + + if (step > 0) + { + if (start < 0) + { + start = 0; + } + if (end > static_cast<int64_t>(current.size())) + { + end = current.size(); + } + for (int64_t i = start; i < end; i += step) + { + std::size_t j = static_cast<std::size_t>(i); + this->tail_select(resources, root, + path_generator_type::generate(resources, last, j, options), + current[j], receiver, options); + } + } + else if (step < 0) + { + if (start >= static_cast<int64_t>(current.size())) + { + start = static_cast<int64_t>(current.size()) - 1; + } + if (end < -1) + { + end = -1; + } + for (int64_t i = start; i > end; i += step) + { + std::size_t j = static_cast<std::size_t>(i); + if (j < current.size()) + { + this->tail_select(resources, root, + path_generator_type::generate(resources, last,j,options), current[j], receiver, options); + } + } + } + } + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code&) const override + { + auto jptr = resources.create_json(json_array_arg); + json_array_receiver<Json,JsonReference> accum(jptr); + select(resources, root, last, current, accum, options); + return *jptr; + } + }; + + template <class Json,class JsonReference> + class function_selector final : public base_selector<Json,JsonReference> + { + using supertype = base_selector<Json,JsonReference>; + + expression<Json,JsonReference> expr_; + + public: + using value_type = typename supertype::value_type; + using reference = typename supertype::reference; + using pointer = typename supertype::pointer; + using path_value_pair_type = typename supertype::path_value_pair_type; + using json_location_node_type = typename supertype::json_location_node_type; + using path_generator_type = path_generator<Json,JsonReference>; + using node_receiver_type = typename supertype::node_receiver_type; + + function_selector(expression<Json,JsonReference>&& expr) + : base_selector<Json,JsonReference>(), expr_(std::move(expr)) + { + } + + void select(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + node_receiver_type& receiver, + result_options options) const override + { + std::error_code ec; + value_type ref = expr_.evaluate(resources, root, current, options, ec); + if (!ec) + { + this->tail_select(resources, root, last, *resources.create_json(std::move(ref)), receiver, options); + } + } + + reference evaluate(dynamic_resources<Json,JsonReference>& resources, + reference root, + const json_location_node_type& last, + reference current, + result_options options, + std::error_code& ec) const override + { + value_type ref = expr_.evaluate(resources, root, current, options, ec); + if (!ec) + { + return this->evaluate_tail(resources, root, last, *resources.create_json(std::move(ref)), + options, ec); + } + else + { + return resources.null_value(); + } + } + + std::string to_string(int level = 0) const override + { + std::string s; + if (level > 0) + { + s.append("\n"); + s.append(level*2, ' '); + } + s.append("function_selector "); + s.append(expr_.to_string(level+1)); + + return s; + } + }; + +} // namespace detail +} // namespace jsonpath +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/jsonpointer/jsonpointer.hpp b/include/jsoncons_ext/jsonpointer/jsonpointer.hpp new file mode 100644 index 0000000..41e41e2 --- /dev/null +++ b/include/jsoncons_ext/jsonpointer/jsonpointer.hpp @@ -0,0 +1,1577 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPOINTER_JSONPOINTER_HPP +#define JSONCONS_JSONPOINTER_JSONPOINTER_HPP + +#include <string> +#include <vector> +#include <memory> +#include <iostream> +#include <iterator> +#include <utility> // std::move +#include <system_error> // system_error +#include <type_traits> // std::enable_if, std::true_type +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpointer/jsonpointer_error.hpp> +#include <jsoncons/detail/write_number.hpp> + +namespace jsoncons { namespace jsonpointer { + + namespace detail { + + enum class pointer_state + { + start, + escaped, + delim + }; + + } // namespace detail + + template <class CharT> + std::basic_string<CharT> escape_string(const std::basic_string<CharT>& s) + { + std::basic_string<CharT> result; + for (auto c : s) + { + switch (c) + { + case '~': + result.push_back('~'); + result.push_back('0'); + break; + case '/': + result.push_back('~'); + result.push_back('1'); + break; + default: + result.push_back(c); + break; + } + } + return result; + } + + // basic_json_pointer + + template <class CharT> + class basic_json_pointer + { + public: + // Member types + using char_type = CharT; + using string_type = std::basic_string<char_type>; + using string_view_type = jsoncons::basic_string_view<char_type>; + using const_iterator = typename std::vector<string_type>::const_iterator; + using iterator = const_iterator; + using const_reverse_iterator = typename std::vector<string_type>::const_reverse_iterator; + using reverse_iterator = const_reverse_iterator; + private: + std::vector<string_type> tokens_; + public: + // Constructors + basic_json_pointer() + { + } + + basic_json_pointer(const std::vector<string_type>& tokens) + : tokens_(tokens) + { + } + + basic_json_pointer(std::vector<string_type>&& tokens) + : tokens_(std::move(tokens)) + { + } + + explicit basic_json_pointer(const string_view_type& s) + { + std::error_code ec; + auto jp = parse(s, ec); + if (ec) + { + throw jsonpointer_error(ec); + } + tokens_ = std::move(jp.tokens_); + } + + explicit basic_json_pointer(const string_view_type& s, std::error_code& ec) + { + auto jp = parse(s, ec); + if (!ec) + { + tokens_ = std::move(jp.tokens_); + } + } + + basic_json_pointer(const basic_json_pointer&) = default; + + basic_json_pointer(basic_json_pointer&&) = default; + + static basic_json_pointer parse(const string_view_type& input, std::error_code& ec) + { + std::vector<string_type> tokens; + if (input.empty() || (input[0] == '#' && input.size() == 1)) + { + return basic_json_pointer<CharT>(); + } + + const char_type* p; + const char_type* pend; + string_type unescaped; + if (input[0] == '#') + { + unescaped = unescape_uri_string(input, ec); + p = unescaped.data() + 1; + pend = unescaped.data() + unescaped.size(); + } + else + { + p = input.data(); + pend = input.data() + input.size(); + } + + auto state = jsonpointer::detail::pointer_state::start; + string_type buffer; + + while (p < pend) + { + bool done = false; + while (p < pend && !done) + { + switch (state) + { + case jsonpointer::detail::pointer_state::start: + switch (*p) + { + case '/': + state = jsonpointer::detail::pointer_state::delim; + break; + default: + ec = jsonpointer_errc::expected_slash; + return basic_json_pointer(); + }; + break; + case jsonpointer::detail::pointer_state::delim: + switch (*p) + { + case '/': + done = true; + break; + case '~': + state = jsonpointer::detail::pointer_state::escaped; + break; + default: + buffer.push_back(*p); + break; + }; + break; + case jsonpointer::detail::pointer_state::escaped: + switch (*p) + { + case '0': + buffer.push_back('~'); + state = jsonpointer::detail::pointer_state::delim; + break; + case '1': + buffer.push_back('/'); + state = jsonpointer::detail::pointer_state::delim; + break; + default: + ec = jsonpointer_errc::expected_0_or_1; + return basic_json_pointer(); + }; + break; + } + ++p; + } + tokens.push_back(buffer); + buffer.clear(); + } + if (!buffer.empty()) + { + tokens.push_back(buffer); + } + return basic_json_pointer(tokens); + } + + static string_type escape_uri_string(const string_type& s) + { + string_type escaped; + for (auto ch : s) + { + switch (ch) + { + case '%': + escaped.append(string_type{'%','2','5'}); + break; + case '^': + escaped.append(string_type{'%','5','E'}); + break; + case '|': + escaped.append(string_type{'%','7','C'}); + break; + case '\\': + escaped.append(string_type{'%','5','C'}); + break; + case '\"': + escaped.append(string_type{'%','2','2'}); + break; + case ' ': + escaped.append(string_type{'%','2','0'}); + break; + default: + escaped.push_back(ch); + break; + } + } + + return escaped; + } + + static string_type unescape_uri_string(const string_view_type& s, std::error_code& ec) + { + if (s.size() < 3) + { + return string_type(s); + } + string_type unescaped; + std::size_t last = s.size() - 2; + std::size_t pos = 0; + while (pos < last) + { + if (s[pos] == '%') + { + uint8_t ch; + auto result = jsoncons::detail::to_integer_base16(s.data() + (pos+1), 2, ch); + if (!result) + { + ec = jsonpointer_errc::invalid_uri_escaped_data; + return string_type(s); + } + unescaped.push_back(ch); + pos += 3; + } + else + { + unescaped.push_back(s[pos]); + ++pos; + } + } + while (pos < s.size()) + { + unescaped.push_back(s[pos]); + ++pos; + } + return unescaped; + } + + // operator= + basic_json_pointer& operator=(const basic_json_pointer&) = default; + + basic_json_pointer& operator=(basic_json_pointer&&) = default; + + // Modifiers + + void clear() + { + tokens_.clear(); + } + + basic_json_pointer& operator/=(const string_type& s) + { + tokens_.push_back(s); + return *this; + } + + template <class IntegerType> + typename std::enable_if<type_traits::is_integer<IntegerType>::value, basic_json_pointer&>::type + operator/=(IntegerType val) + { + string_type s; + jsoncons::detail::from_integer(val, s); + tokens_.push_back(s); + + return *this; + } + + basic_json_pointer& operator+=(const basic_json_pointer& p) + { + for (const auto& s : p.tokens_) + { + tokens_.push_back(s); + } + return *this; + } + + // Accessors + bool empty() const + { + return tokens_.empty(); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + JSONCONS_DEPRECATED_MSG("Instead, use to_string()") + string_type string() const + { + return to_string(); + } +#endif + string_type to_string() const + { + string_type buffer; + for (const auto& token : tokens_) + { + buffer.push_back('/'); + for (auto c : token) + { + switch (c) + { + case '~': + buffer.push_back('~'); + buffer.push_back('0'); + break; + case '/': + buffer.push_back('~'); + buffer.push_back('1'); + break; + default: + buffer.push_back(c); + break; + } + } + } + return buffer; + } + + string_type to_uri_fragment() const + { + string_type buffer{'#'}; + for (const auto& token : tokens_) + { + buffer.push_back('/'); + string_type s = escape_uri_string(token); + for (auto c : s) + { + switch (c) + { + case '~': + buffer.push_back('~'); + buffer.push_back('0'); + break; + case '/': + buffer.push_back('~'); + buffer.push_back('1'); + break; + default: + buffer.push_back(c); + break; + } + } + } + return buffer; + } + + // Iterators + iterator begin() const + { + return tokens_.begin(); + } + iterator end() const + { + return tokens_.end(); + } + + reverse_iterator rbegin() const + { + return tokens_.rbegin(); + } + reverse_iterator rend() const + { + return tokens_.rend(); + } + + // Non-member functions + friend basic_json_pointer<CharT> operator/(const basic_json_pointer<CharT>& lhs, const string_type& rhs) + { + basic_json_pointer<CharT> p(lhs); + p /= rhs; + return p; + } + + friend basic_json_pointer<CharT> operator+( const basic_json_pointer<CharT>& lhs, const basic_json_pointer<CharT>& rhs ) + { + basic_json_pointer<CharT> p(lhs); + p += rhs; + return p; + } + + friend bool operator==( const basic_json_pointer& lhs, const basic_json_pointer& rhs ) + { + return lhs.tokens_ == rhs.okens_; + } + + friend bool operator!=( const basic_json_pointer& lhs, const basic_json_pointer& rhs ) + { + return lhs.tokens_ != rhs.tokens_; + } + + friend std::basic_ostream<CharT>& + operator<<( std::basic_ostream<CharT>& os, const basic_json_pointer<CharT>& p ) + { + os << p.to_string(); + return os; + } + }; + + template <class CharT,class IntegerType> + typename std::enable_if<type_traits::is_integer<IntegerType>::value, basic_json_pointer<CharT>>::type + operator/(const basic_json_pointer<CharT>& lhs, IntegerType rhs) + { + basic_json_pointer<CharT> p(lhs); + p /= rhs; + return p; + } + + using json_pointer = basic_json_pointer<char>; + using wjson_pointer = basic_json_pointer<wchar_t>; + + #if !defined(JSONCONS_NO_DEPRECATED) + template<class CharT> + using basic_address = basic_json_pointer<CharT>; + template<class CharT> + using basic_json_ptr = basic_json_pointer<CharT>; + JSONCONS_DEPRECATED_MSG("Instead, use json_pointer") typedef json_pointer address; + JSONCONS_DEPRECATED_MSG("Instead, use json_pointer") typedef json_pointer json_ptr; + JSONCONS_DEPRECATED_MSG("Instead, use wjson_pointer") typedef json_pointer wjson_ptr; + #endif + + namespace detail { + + template <class Json> + const Json* resolve(const Json* current, const typename Json::string_view_type& buffer, std::error_code& ec) + { + if (current->is_array()) + { + if (buffer.size() == 1 && buffer[0] == '-') + { + ec = jsonpointer_errc::index_exceeds_array_size; + return current; + } + std::size_t index{0}; + auto result = jsoncons::detail::to_integer_decimal(buffer.data(), buffer.length(), index); + if (!result) + { + ec = jsonpointer_errc::invalid_index; + return current; + } + if (index >= current->size()) + { + ec = jsonpointer_errc::index_exceeds_array_size; + return current; + } + current = std::addressof(current->at(index)); + } + else if (current->is_object()) + { + if (!current->contains(buffer)) + { + ec = jsonpointer_errc::key_not_found; + return current; + } + current = std::addressof(current->at(buffer)); + } + else + { + ec = jsonpointer_errc::expected_object_or_array; + return current; + } + return current; + } + + template <class Json> + Json* resolve(Json* current, const typename Json::string_view_type& buffer, bool create_if_missing, std::error_code& ec) + { + if (current->is_array()) + { + if (buffer.size() == 1 && buffer[0] == '-') + { + ec = jsonpointer_errc::index_exceeds_array_size; + return current; + } + std::size_t index{0}; + auto result = jsoncons::detail::to_integer_decimal(buffer.data(), buffer.length(), index); + if (!result) + { + ec = jsonpointer_errc::invalid_index; + return current; + } + if (index >= current->size()) + { + ec = jsonpointer_errc::index_exceeds_array_size; + return current; + } + current = std::addressof(current->at(index)); + } + else if (current->is_object()) + { + if (!current->contains(buffer)) + { + if (create_if_missing) + { + auto r = current->try_emplace(buffer, Json()); + current = std::addressof(r.first->value()); + } + else + { + ec = jsonpointer_errc::key_not_found; + return current; + } + } + else + { + current = std::addressof(current->at(buffer)); + } + } + else + { + ec = jsonpointer_errc::expected_object_or_array; + return current; + } + return current; + } + + } // namespace detail + + // get + + template<class Json> + Json& get(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + bool create_if_missing, + std::error_code& ec) + { + if (location.empty()) + { + return root; + } + + Json* current = std::addressof(root); + auto it = location.begin(); + auto end = location.end(); + while (it != end) + { + current = jsoncons::jsonpointer::detail::resolve(current, *it, create_if_missing, ec); + if (ec) + return *current; + ++it; + } + return *current; + } + + template<class Json, class StringSource> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,Json&>::type + get(Json& root, + const StringSource& location_str, + bool create_if_missing, + std::error_code& ec) + { + auto jsonptr = basic_json_pointer<typename Json::char_type>::parse(location_str, ec); + if (ec) + { + return root; + } + return get(root, jsonptr, create_if_missing, ec); + } + + template<class Json> + const Json& get(const Json& root, + const basic_json_pointer<typename Json::char_type>& location, + std::error_code& ec) + { + if (location.empty()) + { + return root; + } + + const Json* current = std::addressof(root); + auto it = location.begin(); + auto end = location.end(); + while (it != end) + { + current = jsoncons::jsonpointer::detail::resolve(current, *it, ec); + if (ec) + return *current; + ++it; + } + return *current; + } + + template<class Json, class StringSource> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,const Json&>::type + get(const Json& root, + const StringSource& location_str, + std::error_code& ec) + { + auto jsonptr = basic_json_pointer<typename Json::char_type>::parse(location_str, ec); + if (ec) + { + return root; + } + return get(root, jsonptr, ec); + } + + template<class Json> + Json& get(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + std::error_code& ec) + { + return get(root, location, false, ec); + } + + template<class Json, class StringSource> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,Json&>::type + get(Json& root, + const StringSource& location_str, + std::error_code& ec) + { + return get(root, location_str, false, ec); + } + + template<class Json> + Json& get(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + bool create_if_missing = false) + { + std::error_code ec; + Json& j = get(root, location, create_if_missing, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + return j; + } + + template<class Json, class StringSource> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,Json&>::type + get(Json& root, + const StringSource& location_str, + bool create_if_missing = false) + { + std::error_code ec; + Json& result = get(root, location_str, create_if_missing, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + return result; + } + + template<class Json> + const Json& get(const Json& root, const basic_json_pointer<typename Json::char_type>& location) + { + std::error_code ec; + const Json& j = get(root, location, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + return j; + } + + template<class Json, class StringSource> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,const Json&>::type + get(const Json& root, const StringSource& location_str) + { + std::error_code ec; + const Json& j = get(root, location_str, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + return j; + } + + // contains + + template<class Json> + bool contains(const Json& root, const basic_json_pointer<typename Json::char_type>& location) + { + std::error_code ec; + get(root, location, ec); + return !ec ? true : false; + } + + template<class Json, class StringSource> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,bool>::type + contains(const Json& root, const StringSource& location_str) + { + std::error_code ec; + get(root, location_str, ec); + return !ec ? true : false; + } + + template<class Json,class T> + void add(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + bool create_if_missing, + std::error_code& ec) + { + Json* current = std::addressof(root); + + std::basic_string<typename Json::char_type> buffer; + auto it = location.begin(); + auto end = location.end(); + while (it != end) + { + buffer = *it; + ++it; + if (it != end) + { + current = jsoncons::jsonpointer::detail::resolve(current, buffer, create_if_missing, ec); + if (ec) + return; + } + } + if (current->is_array()) + { + if (buffer.size() == 1 && buffer[0] == '-') + { + current->emplace_back(std::forward<T>(value)); + current = std::addressof(current->at(current->size()-1)); + } + else + { + std::size_t index{0}; + auto result = jsoncons::detail::to_integer_decimal(buffer.data(), buffer.length(), index); + if (!result) + { + ec = jsonpointer_errc::invalid_index; + return; + } + if (index > current->size()) + { + ec = jsonpointer_errc::index_exceeds_array_size; + return; + } + if (index == current->size()) + { + current->emplace_back(std::forward<T>(value)); + current = std::addressof(current->at(current->size()-1)); + } + else + { + auto it2 = current->insert(current->array_range().begin()+index,std::forward<T>(value)); + current = std::addressof(*it2); + } + } + } + else if (current->is_object()) + { + auto r = current->insert_or_assign(buffer,std::forward<T>(value)); + current = std::addressof(r.first->value()); + } + else + { + ec = jsonpointer_errc::expected_object_or_array; + return; + } + } + + // add + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + add(Json& root, + const StringSource& location_str, + T&& value, + bool create_if_missing, + std::error_code& ec) + { + auto jsonptr = basic_json_pointer<typename Json::char_type>::parse(location_str, ec); + if (ec) + { + return; + } + add(root, jsonptr, std::forward<T>(value), create_if_missing, ec); + } + + template<class Json,class T> + void add(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + std::error_code& ec) + { + add(root, location, std::forward<T>(value), false, ec); + } + + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + add(Json& root, + const StringSource& location_str, + T&& value, + std::error_code& ec) + { + add(root, location_str, std::forward<T>(value), false, ec); + } + + template<class Json,class T> + void add(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + bool create_if_missing = false) + { + std::error_code ec; + add(root, location, std::forward<T>(value), create_if_missing, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + } + + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + add(Json& root, + const StringSource& location_str, + T&& value, + bool create_if_missing = false) + { + std::error_code ec; + add(root, location_str, std::forward<T>(value), create_if_missing, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + } + + // add_if_absent + + template<class Json, class T> + void add_if_absent(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + bool create_if_missing, + std::error_code& ec) + { + Json* current = std::addressof(root); + + std::basic_string<typename Json::char_type> buffer; + auto it = location.begin(); + auto end = location.end(); + + while (it != end) + { + buffer = *it; + ++it; + if (it != end) + { + current = jsoncons::jsonpointer::detail::resolve(current, buffer, create_if_missing, ec); + if (ec) + return; + } + } + if (current->is_array()) + { + if (buffer.size() == 1 && buffer[0] == '-') + { + current->emplace_back(std::forward<T>(value)); + current = std::addressof(current->at(current->size()-1)); + } + else + { + std::size_t index{0}; + auto result = jsoncons::detail::to_integer_decimal(buffer.data(), buffer.length(), index); + if (!result) + { + ec = jsonpointer_errc::invalid_index; + return; + } + if (index > current->size()) + { + ec = jsonpointer_errc::index_exceeds_array_size; + return; + } + if (index == current->size()) + { + current->emplace_back(std::forward<T>(value)); + current = std::addressof(current->at(current->size()-1)); + } + else + { + auto it2 = current->insert(current->array_range().begin()+index,std::forward<T>(value)); + current = std::addressof(*it2); + } + } + } + else if (current->is_object()) + { + if (current->contains(buffer)) + { + ec = jsonpointer_errc::key_already_exists; + return; + } + else + { + auto r = current->try_emplace(buffer,std::forward<T>(value)); + current = std::addressof(r.first->value()); + } + } + else + { + ec = jsonpointer_errc::expected_object_or_array; + return; + } + } + + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + add_if_absent(Json& root, + const StringSource& location_str, + T&& value, + bool create_if_missing, + std::error_code& ec) + { + auto jsonptr = basic_json_pointer<typename Json::char_type>::parse(location_str, ec); + if (ec) + { + return; + } + add_if_absent(root, jsonptr, std::forward<T>(value), create_if_missing, ec); + } + + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + add_if_absent(Json& root, + const StringSource& location, + T&& value, + std::error_code& ec) + { + add_if_absent(root, location, std::forward<T>(value), false, ec); + } + + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + add_if_absent(Json& root, + const StringSource& location_str, + T&& value, + bool create_if_missing = false) + { + std::error_code ec; + add_if_absent(root, location_str, std::forward<T>(value), create_if_missing, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + } + + template<class Json, class T> + void add_if_absent(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + std::error_code& ec) + { + add_if_absent(root, location, std::forward<T>(value), false, ec); + } + + template<class Json, class T> + void add_if_absent(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + bool create_if_missing = false) + { + std::error_code ec; + add_if_absent(root, location, std::forward<T>(value), create_if_missing, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + } + + // remove + + template<class Json> + void remove(Json& root, const basic_json_pointer<typename Json::char_type>& location, std::error_code& ec) + { + Json* current = std::addressof(root); + + std::basic_string<typename Json::char_type> buffer; + auto it = location.begin(); + auto end = location.end(); + + while (it != end) + { + buffer = *it; + ++it; + if (it != end) + { + current = jsoncons::jsonpointer::detail::resolve(current, buffer, false, ec); + if (ec) + return; + } + } + if (current->is_array()) + { + if (buffer.size() == 1 && buffer[0] == '-') + { + ec = jsonpointer_errc::index_exceeds_array_size; + return; + } + else + { + std::size_t index{0}; + auto result = jsoncons::detail::to_integer_decimal(buffer.data(), buffer.length(), index); + if (!result) + { + ec = jsonpointer_errc::invalid_index; + return; + } + if (index >= current->size()) + { + ec = jsonpointer_errc::index_exceeds_array_size; + return; + } + current->erase(current->array_range().begin()+index); + } + } + else if (current->is_object()) + { + if (!current->contains(buffer)) + { + ec = jsonpointer_errc::key_not_found; + return; + } + else + { + current->erase(buffer); + } + } + else + { + ec = jsonpointer_errc::expected_object_or_array; + return; + } + } + + template<class Json, class StringSource> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + remove(Json& root, const StringSource& location_str, std::error_code& ec) + { + auto jsonptr = basic_json_pointer<typename Json::char_type>::parse(location_str, ec); + if (ec) + { + return; + } + remove(root, jsonptr, ec); + } + + template<class Json, class StringSource> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + remove(Json& root, const StringSource& location_str) + { + std::error_code ec; + remove(root, location_str, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + } + + template<class Json> + void remove(Json& root, const basic_json_pointer<typename Json::char_type>& location) + { + std::error_code ec; + remove(root, location, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + } + + // replace + + template<class Json, class T> + void replace(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + bool create_if_missing, + std::error_code& ec) + { + Json* current = std::addressof(root); + + std::basic_string<typename Json::char_type> buffer; + auto it = location.begin(); + auto end = location.end(); + + while (it != end) + { + buffer = *it; + ++it; + if (it != end) + { + current = jsoncons::jsonpointer::detail::resolve(current, buffer, create_if_missing, ec); + if (ec) + return; + } + } + if (current->is_array()) + { + if (buffer.size() == 1 && buffer[0] == '-') + { + ec = jsonpointer_errc::index_exceeds_array_size; + return; + } + else + { + std::size_t index{}; + auto result = jsoncons::detail::to_integer_decimal(buffer.data(), buffer.length(), index); + if (!result) + { + ec = jsonpointer_errc::invalid_index; + return; + } + if (index >= current->size()) + { + ec = jsonpointer_errc::index_exceeds_array_size; + return; + } + current->at(index) = std::forward<T>(value); + } + } + else if (current->is_object()) + { + if (!current->contains(buffer)) + { + if (create_if_missing) + { + current->try_emplace(buffer,std::forward<T>(value)); + } + else + { + ec = jsonpointer_errc::key_not_found; + return; + } + } + else + { + auto r = current->insert_or_assign(buffer,std::forward<T>(value)); + current = std::addressof(r.first->value()); + } + } + else + { + ec = jsonpointer_errc::expected_object_or_array; + return; + } + } + + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + replace(Json& root, + const StringSource& location_str, + T&& value, + bool create_if_missing, + std::error_code& ec) + { + auto jsonptr = basic_json_pointer<typename Json::char_type>::parse(location_str, ec); + if (ec) + { + return; + } + replace(root, jsonptr, std::forward<T>(value), create_if_missing, ec); + } + + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + replace(Json& root, + const StringSource& location_str, + T&& value, + std::error_code& ec) + { + replace(root, location_str, std::forward<T>(value), false, ec); + } + + template<class Json, class StringSource, class T> + typename std::enable_if<std::is_convertible<StringSource,jsoncons::basic_string_view<typename Json::char_type>>::value,void>::type + replace(Json& root, + const StringSource& location_str, + T&& value, + bool create_if_missing = false) + { + std::error_code ec; + replace(root, location_str, std::forward<T>(value), create_if_missing, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + } + + template<class Json, class T> + void replace(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + std::error_code& ec) + { + replace(root, location, std::forward<T>(value), false, ec); + } + + template<class Json, class T> + void replace(Json& root, + const basic_json_pointer<typename Json::char_type>& location, + T&& value, + bool create_if_missing = false) + { + std::error_code ec; + replace(root, location, std::forward<T>(value), create_if_missing, ec); + if (ec) + { + JSONCONS_THROW(jsonpointer_error(ec)); + } + } + + template <class String,class Result> + typename std::enable_if<std::is_convertible<typename String::value_type,typename Result::value_type>::value>::type + escape(const String& s, Result& result) + { + for (auto c : s) + { + if (c == '~') + { + result.push_back('~'); + result.push_back('0'); + } + else if (c == '/') + { + result.push_back('~'); + result.push_back('1'); + } + else + { + result.push_back(c); + } + } + } + + template <class CharT> + std::basic_string<CharT> escape(const jsoncons::basic_string_view<CharT>& s) + { + std::basic_string<CharT> result; + + for (auto c : s) + { + if (c == '~') + { + result.push_back('~'); + result.push_back('0'); + } + else if (c == '/') + { + result.push_back('~'); + result.push_back('1'); + } + else + { + result.push_back(c); + } + } + return result; + } + + // flatten + + template<class Json> + void flatten_(const std::basic_string<typename Json::char_type>& parent_key, + const Json& parent_value, + Json& result) + { + using char_type = typename Json::char_type; + using string_type = std::basic_string<char_type>; + + switch (parent_value.type()) + { + case json_type::array_value: + { + if (parent_value.empty()) + { + // Flatten empty array to null + //result.try_emplace(parent_key, null_type{}); + //result[parent_key] = parent_value; + result.try_emplace(parent_key, parent_value); + } + else + { + for (std::size_t i = 0; i < parent_value.size(); ++i) + { + string_type key(parent_key); + key.push_back('/'); + jsoncons::detail::from_integer(i,key); + flatten_(key, parent_value.at(i), result); + } + } + break; + } + + case json_type::object_value: + { + if (parent_value.empty()) + { + // Flatten empty object to null + //result.try_emplace(parent_key, null_type{}); + //result[parent_key] = parent_value; + result.try_emplace(parent_key, parent_value); + } + else + { + for (const auto& item : parent_value.object_range()) + { + string_type key(parent_key); + key.push_back('/'); + escape(jsoncons::basic_string_view<char_type>(item.key().data(),item.key().size()), key); + flatten_(key, item.value(), result); + } + } + break; + } + + default: + { + // add primitive parent_value with its reference string + //result[parent_key] = parent_value; + result.try_emplace(parent_key, parent_value); + break; + } + } + } + + template<class Json> + Json flatten(const Json& value) + { + Json result; + std::basic_string<typename Json::char_type> parent_key; + flatten_(parent_key, value, result); + return result; + } + + + // unflatten + + enum class unflatten_options {none,assume_object = 1 + #if !defined(JSONCONS_NO_DEPRECATED) +,object = assume_object +#endif +}; + + template<class Json> + Json safe_unflatten (Json& value) + { + if (!value.is_object() || value.empty()) + { + return value; + } + bool safe = true; + std::size_t index = 0; + for (const auto& item : value.object_range()) + { + std::size_t n; + auto r = jsoncons::detail::to_integer_decimal(item.key().data(),item.key().size(), n); + if (!r || (index++ != n)) + { + safe = false; + break; + } + } + + if (safe) + { + Json j(json_array_arg); + j.reserve(value.size()); + for (auto& item : value.object_range()) + { + j.emplace_back(std::move(item.value())); + } + Json a(json_array_arg); + for (auto& item : j.array_range()) + { + a.emplace_back(safe_unflatten (item)); + } + return a; + } + else + { + Json o(json_object_arg); + for (auto& item : value.object_range()) + { + o.try_emplace(item.key(), safe_unflatten (item.value())); + } + return o; + } + } + + template<class Json> + jsoncons::optional<Json> try_unflatten_array(const Json& value) + { + using char_type = typename Json::char_type; + + if (JSONCONS_UNLIKELY(!value.is_object())) + { + JSONCONS_THROW(jsonpointer_error(jsonpointer_errc::argument_to_unflatten_invalid)); + } + Json result; + + for (const auto& item: value.object_range()) + { + Json* part = &result; + basic_json_pointer<char_type> ptr(item.key()); + std::size_t index = 0; + for (auto it = ptr.begin(); it != ptr.end(); ) + { + auto s = *it; + size_t n{0}; + auto r = jsoncons::detail::to_integer_decimal(s.data(), s.size(), n); + if (r.ec == jsoncons::detail::to_integer_errc() && (index++ == n)) + { + if (!part->is_array()) + { + *part = Json(json_array_arg); + } + if (++it != ptr.end()) + { + if (n+1 > part->size()) + { + Json& ref = part->emplace_back(); + part = std::addressof(ref); + } + else + { + part = &part->at(n); + } + } + else + { + Json& ref = part->emplace_back(item.value()); + part = std::addressof(ref); + } + } + else if (part->is_object()) + { + if (++it != ptr.end()) + { + auto res = part->try_emplace(s,Json()); + part = &(res.first->value()); + } + else + { + auto res = part->try_emplace(s, item.value()); + part = &(res.first->value()); + } + } + else + { + return jsoncons::optional<Json>(); + } + } + } + + return result; + } + + template<class Json> + Json unflatten_to_object(const Json& value, unflatten_options options = unflatten_options::none) + { + using char_type = typename Json::char_type; + + if (JSONCONS_UNLIKELY(!value.is_object())) + { + JSONCONS_THROW(jsonpointer_error(jsonpointer_errc::argument_to_unflatten_invalid)); + } + Json result; + + for (const auto& item: value.object_range()) + { + Json* part = &result; + basic_json_pointer<char_type> ptr(item.key()); + for (auto it = ptr.begin(); it != ptr.end(); ) + { + auto s = *it; + if (++it != ptr.end()) + { + auto res = part->try_emplace(s,Json()); + part = &(res.first->value()); + } + else + { + auto res = part->try_emplace(s, item.value()); + part = &(res.first->value()); + } + } + } + + return options == unflatten_options::none ? safe_unflatten (result) : result; + } + + template<class Json> + Json unflatten(const Json& value, unflatten_options options = unflatten_options::none) + { + if (options == unflatten_options::none) + { + jsoncons::optional<Json> j = try_unflatten_array(value); + return j ? *j : unflatten_to_object(value,options); + } + else + { + return unflatten_to_object(value,options); + } + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + template<class Json> + JSONCONS_DEPRECATED_MSG("Instead, use add(Json&, const typename Json::string_view_type&, const Json&)") + void insert_or_assign(Json& root, const std::basic_string<typename Json::char_type>& location, const Json& value) + { + add(root, location, value); + } + + template<class Json> + JSONCONS_DEPRECATED_MSG("Instead, use add(Json&, const typename Json::string_view_type&, const Json&, std::error_code&)") + void insert_or_assign(Json& root, const std::basic_string<typename Json::char_type>& location, const Json& value, std::error_code& ec) + { + add(root, location, value, ec); + } + template<class Json, class T> + void insert(Json& root, + const std::basic_string<typename Json::char_type>& location, + T&& value, + bool create_if_missing, + std::error_code& ec) + { + add_if_absent(root,location,std::forward<T>(value),create_if_missing,ec); + } + + template<class Json, class T> + void insert(Json& root, + const std::basic_string<typename Json::char_type>& location, + T&& value, + std::error_code& ec) + { + add_if_absent(root, location, std::forward<T>(value), ec); + } + + template<class Json, class T> + void insert(Json& root, + const std::basic_string<typename Json::char_type>& location, + T&& value, + bool create_if_missing = false) + { + add_if_absent(root, location, std::forward<T>(value), create_if_missing); + } +#endif + +} // namespace jsonpointer +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/jsonpointer/jsonpointer_error.hpp b/include/jsoncons_ext/jsonpointer/jsonpointer_error.hpp new file mode 100644 index 0000000..a0cfeff --- /dev/null +++ b/include/jsoncons_ext/jsonpointer/jsonpointer_error.hpp @@ -0,0 +1,119 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONPOINTER_JSONPOINTER_ERROR_HPP +#define JSONCONS_JSONPOINTER_JSONPOINTER_ERROR_HPP + +#include <jsoncons/json_exception.hpp> +#include <system_error> + +namespace jsoncons { namespace jsonpointer { + +class jsonpointer_error : public std::system_error, public virtual json_exception +{ +public: + jsonpointer_error(const std::error_code& ec) + : std::system_error(ec) + { + } + jsonpointer_error(const std::error_code& ec, const std::string& what_arg) + : std::system_error(ec, what_arg) + { + } + jsonpointer_error(const std::error_code& ec, const char* what_arg) + : std::system_error(ec, what_arg) + { + } + jsonpointer_error(const jsonpointer_error& other) = default; + + jsonpointer_error(jsonpointer_error&& other) = default; + + const char* what() const noexcept override + { + return std::system_error::what(); + } +}; + +enum class jsonpointer_errc +{ + success = 0, + expected_slash = 1, + index_exceeds_array_size, + expected_0_or_1, + invalid_index, + key_not_found, + key_already_exists, + expected_object_or_array, + end_of_input, + unexpected_end_of_input, + argument_to_unflatten_invalid, + invalid_flattened_key, + invalid_uri_escaped_data +}; + +class jsonpointer_error_category_impl + : public std::error_category +{ +public: + const char* name() const noexcept override + { + return "jsoncons/jsonpointer"; + } + std::string message(int ev) const override + { + switch (static_cast<jsonpointer_errc>(ev)) + { + case jsonpointer_errc::expected_slash: + return "Expected /"; + case jsonpointer_errc::index_exceeds_array_size: + return "Index exceeds array size"; + case jsonpointer_errc::expected_0_or_1: + return "Expected '0' or '1' after escape character '~'"; + case jsonpointer_errc::key_not_found: + return "Key not found"; + case jsonpointer_errc::invalid_index: + return "Invalid array index"; + case jsonpointer_errc::key_already_exists: + return "Key already exists"; + case jsonpointer_errc::expected_object_or_array: + return "Expected object or array"; + case jsonpointer_errc::end_of_input: + return "Unexpected end of input"; + case jsonpointer_errc::unexpected_end_of_input: + return "Unexpected end of jsonpointer input"; + case jsonpointer_errc::argument_to_unflatten_invalid: + return "Argument to unflatten must be an object"; + case jsonpointer_errc::invalid_flattened_key: + return "Flattened key is invalid"; + default: + return "Unknown jsonpointer error"; + } + } +}; + +inline +const std::error_category& jsonpointer_error_category() +{ + static jsonpointer_error_category_impl instance; + return instance; +} + +inline +std::error_code make_error_code(jsonpointer_errc result) +{ + return std::error_code(static_cast<int>(result),jsonpointer_error_category()); +} + +}} + +namespace std { + template<> + struct is_error_code_enum<jsoncons::jsonpointer::jsonpointer_errc> : public true_type + { + }; +} + +#endif diff --git a/include/jsoncons_ext/jsonschema/format_validator.hpp b/include/jsoncons_ext/jsonschema/format_validator.hpp new file mode 100644 index 0000000..312bf41 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/format_validator.hpp @@ -0,0 +1,968 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_FORMAT_VALIDATOR_HPP +#define JSONCONS_JSONSCHEMA_FORMAT_VALIDATOR_HPP + +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/uri.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpointer/jsonpointer.hpp> +#include <jsoncons_ext/jsonschema/subschema.hpp> +#include <cassert> +#include <set> +#include <sstream> +#include <iostream> +#include <cassert> +#if defined(JSONCONS_HAS_STD_REGEX) +#include <regex> +#endif + +namespace jsoncons { +namespace jsonschema { + + inline + bool is_atext( char c) + { + switch (c) + { + case '!': + case '#': + case '$': + case '%': + case '&': + case '\'': + case '*': + case '+': + case '-': + case '/': + case '=': + case '?': + case '^': + case '_': + case '`': + case '{': + case '|': + case '}': + case '~': + return true; + default: + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + } + } + + inline + bool is_dtext( char c) + { + return (c >= 33 && c <= 90) || (c >= 94 && c <= 126); + } + + // RFC 5322, section 3.4.1 + inline + bool validate_email_rfc5322(const std::string& s) + { + enum class state_t {local_part,atom,dot_atom,quoted_string,amp,domain}; + + state_t state = state_t::local_part; + std::size_t part_length = 0; + + for (char c : s) + { + switch (state) + { + case state_t::local_part: + { + if (is_atext(c)) + { + state = state_t::atom; + } + else if (c == '"') + { + state = state_t::quoted_string; + } + else + { + return false; + } + break; + } + case state_t::dot_atom: + { + if (is_atext(c)) + { + ++part_length; + state = state_t::atom; + } + else + return false; + break; + } + case state_t::atom: + { + switch (c) + { + case '@': + state = state_t::domain; + part_length = 0; + break; + case '.': + state = state_t::dot_atom; + ++part_length; + break; + default: + if (is_atext(c)) + ++part_length; + else + return false; + break; + } + break; + } + case state_t::quoted_string: + { + if (c == '\"') + { + state = state_t::amp; + } + else + { + ++part_length; + } + break; + } + case state_t::amp: + { + if (c == '@') + { + state = state_t::domain; + part_length = 0; + } + else + { + return false; + } + break; + } + case state_t::domain: + { + if (is_dtext(c)) + { + ++part_length; + } + else + { + return false; + } + break; + } + } + } + + return state == state_t::domain && part_length > 0; + } + + // RFC 2673, Section 3.2 + + inline + bool validate_ipv6_rfc2373(const std::string& s) + { + enum class state_t{start,expect_hexdig_or_unspecified, + hexdig, decdig,expect_unspecified, unspecified}; + + state_t state = state_t::start; + + std::size_t digit_count = 0; + std::size_t piece_count = 0; + std::size_t piece_count2 = 0; + bool has_unspecified = false; + std::size_t dec_value = 0; + + for (std::size_t i = 0; i < s.length(); ++i) + { + char c = s[i]; + switch (state) + { + case state_t::start: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + case 'a':case 'b':case 'c':case 'd':case 'e':case 'f': + state = state_t::hexdig; + ++digit_count; + piece_count = 0; + break; + case ':': + if (!has_unspecified) + { + state = state_t::expect_unspecified; + } + else + { + return false; + } + break; + default: + return false; + } + break; + } + case state_t::expect_hexdig_or_unspecified: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + dec_value = dec_value*10 + static_cast<std::size_t>(c - '0'); // just in case this piece is followed by a dot + state = state_t::hexdig; + ++digit_count; + break; + case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + case 'a':case 'b':case 'c':case 'd':case 'e':case 'f': + state = state_t::hexdig; + ++digit_count; + break; + case ':': + if (!has_unspecified) + { + has_unspecified = true; + state = state_t::unspecified; + } + else + { + return false; + } + break; + default: + return false; + } + break; + } + case state_t::expect_unspecified: + { + if (c == ':') + { + has_unspecified = true; + state = state_t::unspecified; + } + else + { + return false; + } + break; + } + case state_t::hexdig: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + case 'a':case 'b':case 'c':case 'd':case 'e':case 'f': + ++digit_count; + break; + case ':': + if (digit_count <= 4) + { + ++piece_count; + digit_count = 0; + dec_value = 0; + state = state_t::expect_hexdig_or_unspecified; + } + else + { + return false; + } + break; + case '.': + if (piece_count == 6 || has_unspecified) + { + ++piece_count2; + state = state_t::decdig; + dec_value = 0; + } + else + { + return false; + } + break; + default: + return false; + } + break; + } + case state_t::decdig: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + dec_value = dec_value*10 + static_cast<std::size_t>(c - '0'); + ++digit_count; + break; + case '.': + if (dec_value > 0xff) + { + return false; + } + digit_count = 0; + dec_value = 0; + ++piece_count2; + break; + default: + return false; + } + break; + } + case state_t::unspecified: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + case 'a':case 'b':case 'c':case 'd':case 'e':case 'f': + state = state_t::hexdig; + ++digit_count; + break; + default: + return false; + } + break; + } + default: + return false; + } + } + + switch (state) + { + case state_t::unspecified: + return piece_count <= 8; + case state_t::hexdig: + if (digit_count <= 4) + { + ++piece_count; + return digit_count > 0 && (piece_count == 8 || (has_unspecified && piece_count <= 8)); + } + else + { + return false; + } + case state_t::decdig: + ++piece_count2; + if (dec_value > 0xff) + { + return false; + } + return digit_count > 0 && piece_count2 == 4; + default: + return false; + } + } + + // RFC 2673, Section 3.2 + + inline + bool validate_ipv4_rfc2673(const std::string& s) + { + enum class state_t {expect_indicator_or_dotted_quad,decbyte, + bindig, octdig, hexdig}; + + state_t state = state_t::expect_indicator_or_dotted_quad; + + std::size_t digit_count = 0; + std::size_t decbyte_count = 0; + std::size_t value = 0; + + for (std::size_t i = 0; i < s.length(); ++i) + { + char c = s[i]; + switch (state) + { + case state_t::expect_indicator_or_dotted_quad: + { + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + state = state_t::decbyte; + decbyte_count = 0; + digit_count = 1; + value = 0; + break; + case 'b': + state = state_t::bindig; + digit_count = 0; + break; + case 'o': + state = state_t::octdig; + digit_count = 0; + break; + case 'x': + state = state_t::hexdig; + digit_count = 0; + break; + default: + return false; + } + break; + } + case state_t::bindig: + { + if (digit_count >= 256) + { + return false; + } + switch (c) + { + case '0':case '1': + ++digit_count; + break; + default: + return false; + } + break; + } + case state_t::octdig: + { + if (digit_count >= 86) + { + return false; + } + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7': + ++digit_count; + break; + default: + return false; + } + break; + } + case state_t::hexdig: + { + if (digit_count >= 64) + { + return false; + } + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + case 'A':case 'B':case 'C':case 'D':case 'E':case 'F': + case 'a':case 'b':case 'c':case 'd':case 'e':case 'f': + ++digit_count; + break; + default: + return false; + } + break; + } + case state_t::decbyte: + { + if (decbyte_count >= 4) + { + return false; + } + switch (c) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9': + { + if (digit_count >= 3) + { + return false; + } + ++digit_count; + value = value*10 + static_cast<std::size_t>(c - '0'); + if (value > 255) + { + return false; + } + break; + } + case '.': + if (decbyte_count > 3) + { + return false; + } + ++decbyte_count; + digit_count = 0; + value = 0; + break; + default: + return false; + } + break; + } + default: + return false; + } + } + + switch (state) + { + case state_t::decbyte: + if (digit_count > 0) + { + ++decbyte_count; + } + else + { + return false; + } + return (decbyte_count == 4) ? true : false; + case state_t::bindig: + return digit_count > 0 ? true : false; + case state_t::octdig: + return digit_count > 0 ? true : false; + case state_t::hexdig: + return digit_count > 0 ? true : false; + default: + return false; + } + } + + // RFC 1034, Section 3.1 + inline + bool validate_hostname_rfc1034(const std::string& hostname) + { + enum class state_t {start_label,expect_letter_or_digit_or_hyphen_or_dot}; + + state_t state = state_t::start_label; + std::size_t length = hostname.length() - 1; + std::size_t label_length = 0; + + for (std::size_t i = 0; i < length; ++i) + { + char c = hostname[i]; + switch (state) + { + case state_t::start_label: + { + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + { + ++label_length; + state = state_t::expect_letter_or_digit_or_hyphen_or_dot; + } + else + { + return false; + } + break; + } + case state_t::expect_letter_or_digit_or_hyphen_or_dot: + { + if (c == '.') + { + label_length = 0; + state = state_t::start_label; + } + else if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c < '9') || c == '-')) + { + return false; + } + if (++label_length > 63) + { + return false; + } + break; + } + } + } + + char last = hostname.back(); + if (!((last >= 'a' && last <= 'z') || (last >= 'A' && last <= 'Z') || (last >= '0' && last < '9'))) + { + return false; + } + return true; + } + + inline + bool is_leap_year(std::size_t year) + { + return (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0)); + } + + inline + std::size_t days_in_month(std::size_t year, std::size_t month) + { + switch (month) + { + case 1: return 31; + case 2: return is_leap_year(year) ? 29 : 28; + case 3: return 31; + case 4: return 30; + case 5: return 31; + case 6: return 30; + case 7: return 31; + case 8: return 31; + case 9: return 30; + case 10: return 31; + case 11: return 30; + case 12: return 31; + default: + JSONCONS_UNREACHABLE(); + break; + } + } + + enum class date_time_type {date_time,date,time}; + // RFC 3339, Section 5.6 + inline + bool validate_date_time_rfc3339(const std::string& s, date_time_type type) + { + enum class state_t {fullyear,month,mday,hour,minute,second,secfrac,z,offset_hour,offset_minute}; + + std::size_t piece_length = 0; + std::size_t year = 0; + std::size_t month = 0; + std::size_t mday = 0; + std::size_t value = 0; + state_t state = (type == date_time_type::time) ? state_t::hour : state_t::fullyear; + + for (char c : s) + { + switch (state) + { + case state_t::fullyear: + { + if (piece_length < 4 && (c >= '0' && c <= '9')) + { + piece_length++; + year = year*10 + static_cast<std::size_t>(c - '0'); + } + else if (c == '-' && piece_length == 4) + { + state = state_t::month; + piece_length = 0; + } + else + { + return false; + } + break; + } + case state_t::month: + { + if (piece_length < 2 && (c >= '0' && c <= '9')) + { + piece_length++; + month = month*10 + static_cast<std::size_t>(c - '0'); + } + else if (c == '-' && piece_length == 2 && (month >=1 && month <= 12)) + { + state = state_t::mday; + piece_length = 0; + } + else + { + return false; + } + break; + } + case state_t::mday: + { + if (piece_length < 2 && (c >= '0' && c <= '9')) + { + piece_length++; + mday = mday *10 + static_cast<std::size_t>(c - '0'); + } + else if ((c == 'T' || c == 't') && piece_length == 2 && (mday <= days_in_month(year, month))) + { + piece_length = 0; + state = state_t::hour; + } + else + { + return false; + } + break; + } + case state_t::hour: + { + if (piece_length < 2 && (c >= '0' && c <= '9')) + { + piece_length++; + value = value*10 + static_cast<std::size_t>(c - '0'); + } + else if (c == ':' && piece_length == 2 && (/*value >=0 && */ value <= 23)) + { + state = state_t::minute; + value = 0; + piece_length = 0; + } + else + { + return false; + } + break; + } + case state_t::minute: + { + if (piece_length < 2 && (c >= '0' && c <= '9')) + { + piece_length++; + value = value*10 + static_cast<std::size_t>(c - '0'); + } + else if (c == ':' && piece_length == 2 && (/*value >=0 && */value <= 59)) + { + state = state_t::second; + value = 0; + piece_length = 0; + } + else + { + return false; + } + break; + } + case state_t::second: + { + if (piece_length < 2 && (c >= '0' && c <= '9')) + { + piece_length++; + value = value*10 + static_cast<std::size_t>(c - '0'); + } + else if (piece_length == 2 && (/*value >=0 && */value <= 60)) // 00-58, 00-59, 00-60 based on leap second rules + { + switch (c) + { + case '.': + value = 0; + state = state_t::secfrac; + break; + case '+': + case '-': + value = 0; + piece_length = 0; + state = state_t::offset_hour; + break; + case 'Z': + case 'z': + state = state_t::z; + break; + default: + return false; + } + } + else + { + return false; + } + break; + } + case state_t::secfrac: + { + if (c >= '0' && c <= '9') + { + value = value*10 + static_cast<std::size_t>(c - '0'); + } + else + { + switch (c) + { + case '+': + case '-': + value = 0; + piece_length = 0; + state = state_t::offset_hour; + break; + case 'Z': + case 'z': + state = state_t::z; + break; + default: + return false; + } + } + break; + } + case state_t::offset_hour: + { + if (piece_length < 2 && (c >= '0' && c <= '9')) + { + piece_length++; + value = value*10 + static_cast<std::size_t>(c - '0'); + } + else if (c == ':' && piece_length == 2 && (/*value >=0 && */value <= 23)) + { + value = 0; + piece_length = 0; + state = state_t::offset_minute; + } + else + { + return false; + } + break; + } + case state_t::offset_minute: + { + if (piece_length < 2 && (c >= '0' && c <= '9')) + { + piece_length++; + value = value*10 + static_cast<std::size_t>(c - '0'); + } + else if (c == ':' && piece_length == 2 && (/*value >=0 && */value <= 59)) + { + value = 0; + piece_length = 0; + } + else + { + return false; + } + break; + } + case state_t::z: + return false; + } + } + + if (type == date_time_type::date) + { + return state == state_t::mday && piece_length == 2 && (mday >= 1 && mday <= days_in_month(year, month)); + } + else + { + return state == state_t::offset_minute || state == state_t::z || state == state_t::secfrac; + } + } + + // format checkers + using format_checker = std::function<void(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string&, + error_reporter& reporter)>; + + inline + void rfc3339_date_check(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string& value, + error_reporter& reporter) + { + if (!validate_date_time_rfc3339(value,date_time_type::date)) + { + reporter.error(validation_output("date", + absolute_keyword_location, + instance_location.to_uri_fragment(), + "\"" + value + "\" is not a RFC 3339 date string")); + } + } + + inline + void rfc3339_time_check(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string &value, + error_reporter& reporter) + { + if (!validate_date_time_rfc3339(value, date_time_type::time)) + { + reporter.error(validation_output("time", + absolute_keyword_location, + instance_location.to_uri_fragment(), + "\"" + value + "\" is not a RFC 3339 time string")); + } + } + + inline + void rfc3339_date_time_check(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string &value, + error_reporter& reporter) + { + if (!validate_date_time_rfc3339(value, date_time_type::date_time)) + { + reporter.error(validation_output("date-time", + absolute_keyword_location, + instance_location.to_uri_fragment(), + "\"" + value + "\" is not a RFC 3339 date-time string")); + } + } + + inline + void email_check(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string& value, + error_reporter& reporter) + { + if (!validate_email_rfc5322(value)) + { + reporter.error(validation_output("email", + absolute_keyword_location, + instance_location.to_uri_fragment(), + "\"" + value + "\" is not a valid email address as defined by RFC 5322")); + } + } + + inline + void hostname_check(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string& value, + error_reporter& reporter) + { + if (!validate_hostname_rfc1034(value)) + { + reporter.error(validation_output("hostname", + absolute_keyword_location, + instance_location.to_uri_fragment(), + "\"" + value + "\" is not a valid hostname as defined by RFC 3986 Appendix A")); + } + } + + inline + void ipv4_check(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string& value, + error_reporter& reporter) + { + if (!validate_ipv4_rfc2673(value)) + { + reporter.error(validation_output("ipv4", + absolute_keyword_location, + instance_location.to_uri_fragment(), + "\"" + value + "\" is not a valid IPv4 address as defined by RFC 2673")); + } + } + + inline + void ipv6_check(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string& value, + error_reporter& reporter) + { + if (!validate_ipv6_rfc2373(value)) + { + reporter.error(validation_output("ipv6", + absolute_keyword_location, + instance_location.to_uri_fragment(), + "\"" + value + "\" is not a valid IPv6 address as defined by RFC 2373")); + } + } + + inline + void regex_check(const std::string& absolute_keyword_location, + const jsonpointer::json_pointer& instance_location, + const std::string& value, + error_reporter& reporter) + { +#if defined(JSONCONS_HAS_STD_REGEX) + try + { + std::regex re(value, std::regex::ECMAScript); + } + catch (const std::exception& e) + { + reporter.error(validation_output("pattern", + absolute_keyword_location, + instance_location.to_uri_fragment(), + "\"" + value + "\" is not a valid ECMAScript regular expression. " + e.what())); + } +#endif + } + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_FORMAT_CHECKERS_HPP diff --git a/include/jsoncons_ext/jsonschema/json_validator.hpp b/include/jsoncons_ext/jsonschema/json_validator.hpp new file mode 100644 index 0000000..87bec58 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/json_validator.hpp @@ -0,0 +1,120 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_JSON_VALIDATOR_HPP +#define JSONCONS_JSONSCHEMA_JSON_VALIDATOR_HPP + +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/uri.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpointer/jsonpointer.hpp> +#include <jsoncons_ext/jsonschema/keyword_validator_factory.hpp> +#include <cassert> +#include <set> +#include <sstream> +#include <iostream> +#include <cassert> +#include <functional> + +namespace jsoncons { +namespace jsonschema { + + class throwing_error_reporter : public error_reporter + { + void do_error(const validation_output& o) override + { + JSONCONS_THROW(validation_error(o.message())); + } + }; + + class fail_early_reporter : public error_reporter + { + void do_error(const validation_output&) override + { + } + public: + fail_early_reporter() + : error_reporter(true) + { + } + }; + + using error_reporter_t = std::function<void(const validation_output& o)>; + + struct error_reporter_adaptor : public error_reporter + { + error_reporter_t reporter_; + + error_reporter_adaptor(const error_reporter_t& reporter) + : reporter_(reporter) + { + } + private: + void do_error(const validation_output& e) override + { + reporter_(e); + } + }; + + template <class Json> + class json_validator + { + std::shared_ptr<json_schema<Json>> root_; + + public: + json_validator(std::shared_ptr<json_schema<Json>> root) + : root_(root) + { + } + + json_validator(json_validator &&) = default; + json_validator &operator=(json_validator &&) = default; + + json_validator(json_validator const &) = delete; + json_validator &operator=(json_validator const &) = delete; + + ~json_validator() = default; + + // Validate input JSON against a JSON Schema with a default throwing error reporter + Json validate(const Json& instance) const + { + throwing_error_reporter reporter; + jsonpointer::json_pointer instance_location("#"); + Json patch(json_array_arg); + + root_->validate(instance, instance_location, reporter, patch); + return patch; + } + + // Validate input JSON against a JSON Schema + bool is_valid(const Json& instance) const + { + fail_early_reporter reporter; + jsonpointer::json_pointer instance_location("#"); + Json patch(json_array_arg); + + root_->validate(instance, instance_location, reporter, patch); + return reporter.error_count() == 0; + } + + // Validate input JSON against a JSON Schema with a provided error reporter + template <class Reporter> + typename std::enable_if<type_traits::is_unary_function_object_exact<Reporter,void,validation_output>::value,Json>::type + validate(const Json& instance, const Reporter& reporter) const + { + jsonpointer::json_pointer instance_location("#"); + Json patch(json_array_arg); + + error_reporter_adaptor adaptor(reporter); + root_->validate(instance, instance_location, adaptor, patch); + return patch; + } + }; + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_JSON_VALIDATOR_HPP diff --git a/include/jsoncons_ext/jsonschema/jsonschema.hpp b/include/jsoncons_ext/jsonschema/jsonschema.hpp new file mode 100644 index 0000000..e2c4210 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/jsonschema.hpp @@ -0,0 +1,13 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_JSONSCHEMA_HPP +#define JSONCONS_JSONSCHEMA_JSONSCHEMA_HPP + +#include <jsoncons_ext/jsonschema/keyword_validator.hpp> +#include <jsoncons_ext/jsonschema/json_validator.hpp> + +#endif // JSONCONS_JSONSCHEMA_JSONSCHEMA_HPP diff --git a/include/jsoncons_ext/jsonschema/jsonschema_error.hpp b/include/jsoncons_ext/jsonschema/jsonschema_error.hpp new file mode 100644 index 0000000..7cb1061 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/jsonschema_error.hpp @@ -0,0 +1,105 @@ +/// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_JSONSCHEMA_ERROR_HPP +#define JSONCONS_JSONSCHEMA_JSONSCHEMA_ERROR_HPP + +#include <jsoncons/json_exception.hpp> +#include <system_error> + +namespace jsoncons { +namespace jsonschema { + + class schema_error : public std::runtime_error, public virtual json_exception + { + public: + schema_error(const std::string& message) + : std::runtime_error(message) + { + } + + const char* what() const noexcept override + { + return std::runtime_error::what(); + } + }; + + class validation_error : public std::runtime_error, public virtual json_exception + { + public: + validation_error(const std::string& message) + : std::runtime_error(message) + { + } + + const char* what() const noexcept override + { + return std::runtime_error::what(); + } + }; + + class validation_output + { + std::string keyword_; + std::string absolute_keyword_location_; + std::string instance_location_; + std::string message_; + std::vector<validation_output> nested_errors_; + public: + validation_output(std::string keyword, + std::string absolute_keyword_location, + std::string instance_location, + std::string message) + : keyword_(std::move(keyword)), + absolute_keyword_location_(std::move(absolute_keyword_location)), + instance_location_(std::move(instance_location)), + message_(std::move(message)) + { + } + + validation_output(const std::string& keyword, + const std::string& absolute_keyword_location, + const std::string& instance_location, + const std::string& message, + const std::vector<validation_output>& nested_errors) + : keyword_(keyword), + absolute_keyword_location_(absolute_keyword_location), + instance_location_(instance_location), + message_(message), + nested_errors_(nested_errors) + { + } + + const std::string& instance_location() const + { + return instance_location_; + } + + const std::string& message() const + { + return message_; + } + + const std::string& absolute_keyword_location() const + { + return absolute_keyword_location_; + } + + const std::string& keyword() const + { + return keyword_; + } + + const std::vector<validation_output>& nested_errors() const + { + return nested_errors_; + } + }; + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_JSONSCHEMA_ERROR_HPP diff --git a/include/jsoncons_ext/jsonschema/jsonschema_version.hpp b/include/jsoncons_ext/jsonschema/jsonschema_version.hpp new file mode 100644 index 0000000..bf0afff --- /dev/null +++ b/include/jsoncons_ext/jsonschema/jsonschema_version.hpp @@ -0,0 +1,18 @@ +// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_JSONSCHEMA_VERSION_HPP +#define JSONCONS_JSONSCHEMA_JSONSCHEMA_VERSION_HPP + +#include <jsoncons/json.hpp> + +namespace jsoncons { +namespace jsonschema { + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_JSONSCHEMA_VERSION_HPP diff --git a/include/jsoncons_ext/jsonschema/keyword_validator.hpp b/include/jsoncons_ext/jsonschema/keyword_validator.hpp new file mode 100644 index 0000000..249c7d0 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/keyword_validator.hpp @@ -0,0 +1,1745 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_KEYWORD_VALIDATOR_HPP +#define JSONCONS_JSONSCHEMA_KEYWORD_VALIDATOR_HPP + +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/uri.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpointer/jsonpointer.hpp> +#include <jsoncons_ext/jsonschema/subschema.hpp> +#include <jsoncons_ext/jsonschema/format_validator.hpp> +#include <cassert> +#include <set> +#include <sstream> +#include <iostream> +#include <cassert> +#if defined(JSONCONS_HAS_STD_REGEX) +#include <regex> +#endif + +namespace jsoncons { +namespace jsonschema { + + template <class Json> + class abstract_keyword_validator_factory + { + public: + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + virtual ~abstract_keyword_validator_factory() = default; + + virtual validator_pointer make_keyword_validator(const Json& schema, + const std::vector<schema_location>& uris, + const std::vector<std::string>& keys) = 0; + virtual validator_pointer make_required_validator(const std::vector<schema_location>& uris, + const std::vector<std::string>& items) = 0; + + virtual validator_pointer make_null_validator(const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_true_validator(const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_false_validator(const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_object_validator(const Json& sch, + const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_array_validator(const Json& sch, + const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_string_validator(const Json& sch, + const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_boolean_validator(const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_integer_validator(const Json& sch, + const std::vector<schema_location>& uris, + std::set<std::string>& keywords) = 0; + + virtual validator_pointer make_number_validator(const Json& sch, + const std::vector<schema_location>& uris, + std::set<std::string>& keywords) = 0; + + virtual validator_pointer make_not_validator(const Json& schema, + const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_all_of_validator(const Json& schema, + const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_any_of_validator(const Json& schema, + const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_one_of_validator(const Json& schema, + const std::vector<schema_location>& uris) = 0; + + virtual validator_pointer make_type_validator(const Json& schema, + const std::vector<schema_location>& uris) = 0; + }; + + struct collecting_error_reporter : public error_reporter + { + std::vector<validation_output> errors; + + private: + void do_error(const validation_output& o) override + { + errors.push_back(o); + } + }; + + // string keyword_validator + + inline + std::string make_absolute_keyword_location(const std::vector<schema_location>& uris, + const std::string& keyword) + { + for (auto it = uris.rbegin(); it != uris.rend(); ++it) + { + if (!it->has_identifier() && it->is_absolute()) + { + return it->append(keyword).string(); + } + } + return ""; + } + + template <class Json> + class string_validator : public keyword_validator<Json> + { + jsoncons::optional<std::size_t> max_length_; + std::string max_length_location_; + jsoncons::optional<std::size_t> min_length_; + std::string min_length_location_; + + #if defined(JSONCONS_HAS_STD_REGEX) + jsoncons::optional<std::regex> pattern_; + std::string pattern_string_; + std::string pattern_location_; + #endif + + format_checker format_check_; + std::string format_location_; + + jsoncons::optional<std::string> content_encoding_; + std::string content_encoding_location_; + jsoncons::optional<std::string> content_media_type_; + std::string content_media_type_location_; + + public: + string_validator(const Json& sch, const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), max_length_(), min_length_(), + #if defined(JSONCONS_HAS_STD_REGEX) + pattern_(), + #endif + content_encoding_(), content_media_type_() + { + auto it = sch.find("maxLength"); + if (it != sch.object_range().end()) + { + max_length_ = it->value().template as<std::size_t>(); + max_length_location_ = make_absolute_keyword_location(uris, "maxLength"); + } + + it = sch.find("minLength"); + if (it != sch.object_range().end()) + { + min_length_ = it->value().template as<std::size_t>(); + min_length_location_ = make_absolute_keyword_location(uris, "minLength"); + } + + it = sch.find("contentEncoding"); + if (it != sch.object_range().end()) + { + content_encoding_ = it->value().template as<std::string>(); + content_encoding_location_ = make_absolute_keyword_location(uris, "contentEncoding"); + // If "contentEncoding" is set to "binary", a Json value + // of type json_type::byte_string_value is accepted. + } + + it = sch.find("contentMediaType"); + if (it != sch.object_range().end()) + { + content_media_type_ = it->value().template as<std::string>(); + content_media_type_location_ = make_absolute_keyword_location(uris, "contentMediaType"); + } + + #if defined(JSONCONS_HAS_STD_REGEX) + it = sch.find("pattern"); + if (it != sch.object_range().end()) + { + pattern_string_ = it->value().template as<std::string>(); + pattern_ = std::regex(it->value().template as<std::string>(),std::regex::ECMAScript); + pattern_location_ = make_absolute_keyword_location(uris, "pattern"); + } + #endif + + it = sch.find("format"); + if (it != sch.object_range().end()) + { + format_location_ = make_absolute_keyword_location(uris, "format"); + std::string format = it->value().template as<std::string>(); + if (format == "date-time") + { + format_check_ = rfc3339_date_time_check; + } + else if (format == "date") + { + format_check_ = rfc3339_date_check; + } + else if (format == "time") + { + format_check_ = rfc3339_time_check; + } + else if (format == "email") + { + format_check_ = email_check; + } + else if (format == "hostname") + { + format_check_ = hostname_check; + } + else if (format == "ipv4") + { + format_check_ = ipv4_check; + } + else if (format == "ipv6") + { + format_check_ = ipv6_check; + } + else if (format == "regex") + { + format_check_ = regex_check; + } + else + { + // Not supported - ignore + } + } + } + + private: + + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json&) const override + { + std::string content; + if (content_encoding_) + { + if (*content_encoding_ == "base64") + { + auto s = instance.template as<jsoncons::string_view>(); + auto retval = jsoncons::decode_base64(s.begin(), s.end(), content); + if (retval.ec != jsoncons::conv_errc::success) + { + reporter.error(validation_output("contentEncoding", + content_encoding_location_, + instance_location.to_uri_fragment(), + "Content is not a base64 string")); + if (reporter.fail_early()) + { + return; + } + } + } + else if (!content_encoding_->empty()) + { + reporter.error(validation_output("contentEncoding", + content_encoding_location_, + instance_location.to_uri_fragment(), + "unable to check for contentEncoding '" + *content_encoding_ + "'")); + if (reporter.fail_early()) + { + return; + } + } + } + else + { + content = instance.template as<std::string>(); + } + + if (content_media_type_) + { + if (content_media_type_ == "application/Json") + { + json_string_reader reader(content); + std::error_code ec; + reader.read(ec); + + if (ec) + { + reporter.error(validation_output("contentMediaType", + content_media_type_location_, + instance_location.to_uri_fragment(), + std::string("Content is not JSON: ") + ec.message())); + } + } + } + else if (instance.type() == json_type::byte_string_value) + { + reporter.error(validation_output("contentMediaType", + content_media_type_location_, + instance_location.to_uri_fragment(), + "Expected string, but is byte string")); + if (reporter.fail_early()) + { + return; + } + } + + if (instance.type() != json_type::string_value) + { + return; + } + + if (min_length_) + { + std::size_t length = unicode_traits::count_codepoints(content.data(), content.size()); + if (length < *min_length_) + { + reporter.error(validation_output("minLength", + min_length_location_, + instance_location.to_uri_fragment(), + std::string("Expected minLength: ") + std::to_string(*min_length_) + + ", actual: " + std::to_string(length))); + if (reporter.fail_early()) + { + return; + } + } + } + + if (max_length_) + { + std::size_t length = unicode_traits::count_codepoints(content.data(), content.size()); + if (length > *max_length_) + { + reporter.error(validation_output("maxLength", + max_length_location_, + instance_location.to_uri_fragment(), + std::string("Expected maxLength: ") + std::to_string(*max_length_) + + ", actual: " + std::to_string(length))); + if (reporter.fail_early()) + { + return; + } + } + } + + #if defined(JSONCONS_HAS_STD_REGEX) + if (pattern_) + { + if (!std::regex_search(content, *pattern_)) + { + std::string message("String \""); + message.append(instance.template as<std::string>()); + message.append("\" does not match pattern \""); + message.append(pattern_string_); + message.append("\""); + reporter.error(validation_output("pattern", + pattern_location_, + instance_location.to_uri_fragment(), + std::move(message))); + if (reporter.fail_early()) + { + return; + } + } + } + + #endif + + if (format_check_ != nullptr) + { + format_check_(format_location_, instance_location, content, reporter); + if (reporter.error_count() > 0 && reporter.fail_early()) + { + return; + } + } + } + }; + + // not_validator + + template <class Json> + class not_validator : public keyword_validator<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + validator_pointer rule_; + + public: + not_validator(abstract_keyword_validator_factory<Json>* builder, + const Json& sch, + const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : "") + { + rule_ = builder->make_keyword_validator(sch, uris, {"not"}); + } + + private: + + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const final + { + collecting_error_reporter local_reporter; + rule_->validate(instance, instance_location, local_reporter, patch); + + if (local_reporter.errors.empty()) + { + reporter.error(validation_output("not", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Instance must not be valid against schema")); + } + } + + jsoncons::optional<Json> get_default_value(const jsonpointer::json_pointer& instance_location, + const Json& instance, + error_reporter& reporter) const override + { + return rule_->get_default_value(instance_location, instance, reporter); + } + }; + + template <class Json> + struct all_of_criterion + { + static const std::string& key() + { + static const std::string k("allOf"); + return k; + } + + static bool is_complete(const Json&, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + const collecting_error_reporter& local_reporter, + std::size_t) + { + if (!local_reporter.errors.empty()) + reporter.error(validation_output("allOf", + "", + instance_location.to_uri_fragment(), + "At least one schema failed to match, but all are required to match. ", + local_reporter.errors)); + return !local_reporter.errors.empty(); + } + }; + + template <class Json> + struct any_of_criterion + { + static const std::string& key() + { + static const std::string k("anyOf"); + return k; + } + + static bool is_complete(const Json&, + const jsonpointer::json_pointer&, + error_reporter&, + const collecting_error_reporter&, + std::size_t count) + { + return count == 1; + } + }; + + template <class Json> + struct one_of_criterion + { + static const std::string& key() + { + static const std::string k("oneOf"); + return k; + } + + static bool is_complete(const Json&, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + const collecting_error_reporter&, + std::size_t count) + { + if (count > 1) + { + std::string message(std::to_string(count)); + message.append(" subschemas matched, but exactly one is required to match"); + reporter.error(validation_output("oneOf", + "", + instance_location.to_uri_fragment(), + std::move(message))); + } + return count > 1; + } + }; + + template <class Json,class Criterion> + class combining_validator : public keyword_validator<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + std::vector<validator_pointer> subschemas_; + + public: + combining_validator(abstract_keyword_validator_factory<Json>* builder, + const Json& sch, + const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : "") + { + size_t c = 0; + for (const auto& subsch : sch.array_range()) + { + subschemas_.push_back(builder->make_keyword_validator(subsch, uris, {Criterion::key(), std::to_string(c++)})); + } + + // Validate value of allOf, anyOf, and oneOf "MUST be a non-empty array" + } + + private: + + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const final + { + size_t count = 0; + + collecting_error_reporter local_reporter; + for (auto& s : subschemas_) + { + std::size_t mark = local_reporter.errors.size(); + s->validate(instance, instance_location, local_reporter, patch); + if (mark == local_reporter.errors.size()) + count++; + + if (Criterion::is_complete(instance, instance_location, reporter, local_reporter, count)) + return; + } + + if (count == 0) + { + reporter.error(validation_output("combined", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "No schema matched, but one of them is required to match", + local_reporter.errors)); + } + } + }; + + template <class T, class Json> + T get_number(const Json& val, const string_view& keyword) + { + if (!val.is_number()) + { + std::string message(keyword); + message.append(" must be a number value"); + JSONCONS_THROW(schema_error(message)); + } + return val.template as<T>(); + } + + template <class Json,class T> + class numeric_validator_base : public keyword_validator<Json> + { + jsoncons::optional<T> maximum_; + std::string absolute_maximum_location_; + jsoncons::optional<T> minimum_; + std::string absolute_minimum_location_; + jsoncons::optional<T> exclusive_maximum_; + std::string absolute_exclusive_maximum_location_; + jsoncons::optional<T> exclusive_minimum_; + std::string absolute_exclusive_minimum_location_; + jsoncons::optional<double> multiple_of_; + std::string absolute_multiple_of_location_; + + public: + numeric_validator_base(const Json& sch, + const std::vector<schema_location>& uris, + std::set<std::string>& keywords) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), + maximum_(), minimum_(),exclusive_maximum_(), exclusive_minimum_(), multiple_of_() + { + auto it = sch.find("maximum"); + if (it != sch.object_range().end()) + { + maximum_ = get_number<T>(it->value(), "maximum"); + absolute_maximum_location_ = make_absolute_keyword_location(uris,"maximum"); + keywords.insert("maximum"); + } + + it = sch.find("minimum"); + if (it != sch.object_range().end()) + { + minimum_ = get_number<T>(it->value(), "minimum"); + absolute_minimum_location_ = make_absolute_keyword_location(uris,"minimum"); + keywords.insert("minimum"); + } + + it = sch.find("exclusiveMaximum"); + if (it != sch.object_range().end()) + { + exclusive_maximum_ = get_number<T>(it->value(), "exclusiveMaximum"); + absolute_exclusive_maximum_location_ = make_absolute_keyword_location(uris,"exclusiveMaximum"); + keywords.insert("exclusiveMaximum"); + } + + it = sch.find("exclusiveMinimum"); + if (it != sch.object_range().end()) + { + exclusive_minimum_ = get_number<T>(it->value(), "exclusiveMinimum"); + absolute_exclusive_minimum_location_ = make_absolute_keyword_location(uris,"exclusiveMinimum"); + keywords.insert("exclusiveMinimum"); + } + + it = sch.find("multipleOf"); + if (it != sch.object_range().end()) + { + multiple_of_ = get_number<double>(it->value(), "multipleOf"); + absolute_multiple_of_location_ = make_absolute_keyword_location(uris,"multipleOf"); + keywords.insert("multipleOf"); + } + } + + protected: + + void apply_kewords(T value, + const jsonpointer::json_pointer& instance_location, + const Json& instance, + error_reporter& reporter) const + { + if (multiple_of_ && value != 0) // exclude zero + { + if (!is_multiple_of(value, *multiple_of_)) + { + reporter.error(validation_output("multipleOf", + absolute_multiple_of_location_, + instance_location.to_uri_fragment(), + instance.template as<std::string>() + " is not a multiple of " + std::to_string(*multiple_of_))); + if (reporter.fail_early()) + { + return; + } + } + } + + if (maximum_) + { + if (value > *maximum_) + { + reporter.error(validation_output("maximum", + absolute_maximum_location_, + instance_location.to_uri_fragment(), + instance.template as<std::string>() + " exceeds maximum of " + std::to_string(*maximum_))); + if (reporter.fail_early()) + { + return; + } + } + } + + if (minimum_) + { + if (value < *minimum_) + { + reporter.error(validation_output("minimum", + absolute_minimum_location_, + instance_location.to_uri_fragment(), + instance.template as<std::string>() + " is below minimum of " + std::to_string(*minimum_))); + if (reporter.fail_early()) + { + return; + } + } + } + + if (exclusive_maximum_) + { + if (value >= *exclusive_maximum_) + { + reporter.error(validation_output("exclusiveMaximum", + absolute_exclusive_maximum_location_, + instance_location.to_uri_fragment(), + instance.template as<std::string>() + " exceeds maximum of " + std::to_string(*exclusive_maximum_))); + if (reporter.fail_early()) + { + return; + } + } + } + + if (exclusive_minimum_) + { + if (value <= *exclusive_minimum_) + { + reporter.error(validation_output("exclusiveMinimum", + absolute_exclusive_minimum_location_, + instance_location.to_uri_fragment(), + instance.template as<std::string>() + " is below minimum of " + std::to_string(*exclusive_minimum_))); + if (reporter.fail_early()) + { + return; + } + } + } + } + private: + static bool is_multiple_of(T x, double multiple_of) + { + double rem = std::remainder(x, multiple_of); + double eps = std::nextafter(x, 0) - x; + return std::fabs(rem) < std::fabs(eps); + } + }; + + template <class Json> + class integer_validator : public numeric_validator_base<Json,int64_t> + { + public: + integer_validator(const Json& sch, + const std::vector<schema_location>& uris, + std::set<std::string>& keywords) + : numeric_validator_base<Json, int64_t>(sch, uris, keywords) + { + } + private: + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json&) const + { + if (!(instance.template is_integer<int64_t>() || (instance.is_double() && static_cast<double>(instance.template as<int64_t>()) == instance.template as<double>()))) + { + reporter.error(validation_output("integer", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Instance is not an integer")); + if (reporter.fail_early()) + { + return; + } + } + int64_t value = instance.template as<int64_t>(); + this->apply_kewords(value, instance_location, instance, reporter); + } + }; + + template <class Json> + class number_validator : public numeric_validator_base<Json,double> + { + public: + number_validator(const Json& sch, + const std::vector<schema_location>& uris, + std::set<std::string>& keywords) + : numeric_validator_base<Json, double>(sch, uris, keywords) + { + } + private: + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json&) const + { + if (!(instance.template is_integer<int64_t>() || instance.is_double())) + { + reporter.error(validation_output("number", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Instance is not a number")); + if (reporter.fail_early()) + { + return; + } + } + double value = instance.template as<double>(); + this->apply_kewords(value, instance_location, instance, reporter); + } + }; + + // null_validator + + template <class Json> + class null_validator : public keyword_validator<Json> + { + public: + null_validator(const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : "") + { + } + private: + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json&) const override + { + if (!instance.is_null()) + { + reporter.error(validation_output("null", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Expected to be null")); + } + } + }; + + template <class Json> + class boolean_validator : public keyword_validator<Json> + { + public: + boolean_validator(const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : "") + { + } + private: + void do_validate(const Json&, + const jsonpointer::json_pointer&, + error_reporter&, + Json&) const override + { + } + + }; + + template <class Json> + class true_validator : public keyword_validator<Json> + { + public: + true_validator(const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : "") + { + } + private: + void do_validate(const Json&, + const jsonpointer::json_pointer&, + error_reporter&, + Json&) const override + { + } + }; + + template <class Json> + class false_validator : public keyword_validator<Json> + { + public: + false_validator(const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : "") + { + } + private: + void do_validate(const Json&, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json&) const override + { + reporter.error(validation_output("false", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "False schema always fails")); + } + }; + + template <class Json> + class required_validator : public keyword_validator<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + std::vector<std::string> items_; + + public: + required_validator(const std::vector<schema_location>& uris, + const std::vector<std::string>& items) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), items_(items) {} + required_validator(const std::string& absolute_keyword_location, const std::vector<std::string>& items) + : keyword_validator<Json>(absolute_keyword_location), items_(items) {} + + required_validator(const required_validator&) = delete; + required_validator(required_validator&&) = default; + required_validator& operator=(const required_validator&) = delete; + required_validator& operator=(required_validator&&) = default; + private: + + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json&) const override final + { + for (const auto& key : items_) + { + if (instance.find(key) == instance.object_range().end()) + { + reporter.error(validation_output("required", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Required property \"" + key + "\" not found")); + if (reporter.fail_early()) + { + return; + } + } + } + } + }; + + template <class Json> + class object_validator : public keyword_validator<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + jsoncons::optional<std::size_t> max_properties_; + std::string absolute_max_properties_location_; + jsoncons::optional<std::size_t> min_properties_; + std::string absolute_min_properties_location_; + jsoncons::optional<required_validator<Json>> required_; + + std::map<std::string, validator_pointer> properties_; + #if defined(JSONCONS_HAS_STD_REGEX) + std::vector<std::pair<std::regex, validator_pointer>> pattern_properties_; + #endif + validator_pointer additional_properties_; + + std::map<std::string, validator_pointer> dependencies_; + + validator_pointer property_name_validator_; + + public: + object_validator(abstract_keyword_validator_factory<Json>* builder, + const Json& sch, + const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), + max_properties_(), min_properties_(), + additional_properties_(nullptr), + property_name_validator_(nullptr) + { + auto it = sch.find("maxProperties"); + if (it != sch.object_range().end()) + { + max_properties_ = it->value().template as<std::size_t>(); + absolute_max_properties_location_ = make_absolute_keyword_location(uris, "maxProperties"); + } + + it = sch.find("minProperties"); + if (it != sch.object_range().end()) + { + min_properties_ = it->value().template as<std::size_t>(); + absolute_min_properties_location_ = make_absolute_keyword_location(uris, "minProperties"); + } + + it = sch.find("required"); + if (it != sch.object_range().end()) + { + auto location = make_absolute_keyword_location(uris, "required"); + required_ = required_validator<Json>(location, + it->value().template as<std::vector<std::string>>()); + } + + it = sch.find("properties"); + if (it != sch.object_range().end()) + { + for (const auto& prop : it->value().object_range()) + properties_.emplace( + std::make_pair( + prop.key(), + builder->make_keyword_validator(prop.value(), uris, {"properties", prop.key()}))); + } + + #if defined(JSONCONS_HAS_STD_REGEX) + it = sch.find("patternProperties"); + if (it != sch.object_range().end()) + { + for (const auto& prop : it->value().object_range()) + pattern_properties_.emplace_back( + std::make_pair( + std::regex(prop.key(), std::regex::ECMAScript), + builder->make_keyword_validator(prop.value(), uris, {prop.key()}))); + } + #endif + + it = sch.find("additionalProperties"); + if (it != sch.object_range().end()) + { + additional_properties_ = builder->make_keyword_validator(it->value(), uris, {"additionalProperties"}); + } + + it = sch.find("dependencies"); + if (it != sch.object_range().end()) + { + for (const auto& dep : it->value().object_range()) + { + switch (dep.value().type()) + { + case json_type::array_value: + { + auto location = make_absolute_keyword_location(uris, "dependencies"); + dependencies_.emplace(dep.key(), + builder->make_required_validator({location}, + dep.value().template as<std::vector<std::string>>())); + break; + } + default: + { + dependencies_.emplace(dep.key(), + builder->make_keyword_validator(dep.value(), uris, {"dependencies", dep.key()})); + break; + } + } + } + } + + auto property_names_it = sch.find("propertyNames"); + if (property_names_it != sch.object_range().end()) + { + property_name_validator_ = builder->make_keyword_validator(property_names_it->value(), uris, {"propertyNames"}); + } + } + private: + + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const override + { + if (max_properties_ && instance.size() > *max_properties_) + { + std::string message("Maximum properties: " + std::to_string(*max_properties_)); + message.append(", found: " + std::to_string(instance.size())); + reporter.error(validation_output("maxProperties", + absolute_max_properties_location_, + instance_location.to_uri_fragment(), + std::move(message))); + if (reporter.fail_early()) + { + return; + } + } + + if (min_properties_ && instance.size() < *min_properties_) + { + std::string message("Minimum properties: " + std::to_string(*min_properties_)); + message.append(", found: " + std::to_string(instance.size())); + reporter.error(validation_output("minProperties", + absolute_min_properties_location_, + instance_location.to_uri_fragment(), + std::move(message))); + if (reporter.fail_early()) + { + return; + } + } + + if (required_) + required_->validate(instance, instance_location, reporter, patch); + + for (const auto& property : instance.object_range()) + { + if (property_name_validator_) + property_name_validator_->validate(property.key(), instance_location, reporter, patch); + + bool a_prop_or_pattern_matched = false; + auto properties_it = properties_.find(property.key()); + + // check if it is in "properties" + if (properties_it != properties_.end()) + { + a_prop_or_pattern_matched = true; + jsonpointer::json_pointer pointer(instance_location); + pointer /= property.key(); + properties_it->second->validate(property.value(), pointer, reporter, patch); + } + + #if defined(JSONCONS_HAS_STD_REGEX) + + // check all matching "patternProperties" + for (auto& schema_pp : pattern_properties_) + if (std::regex_search(property.key(), schema_pp.first)) + { + a_prop_or_pattern_matched = true; + jsonpointer::json_pointer pointer(instance_location); + pointer /= property.key(); + schema_pp.second->validate(property.value(), pointer, reporter, patch); + } + #endif + + // finally, check "additionalProperties" + if (!a_prop_or_pattern_matched && additional_properties_) + { + collecting_error_reporter local_reporter; + + jsonpointer::json_pointer pointer(instance_location); + pointer /= property.key(); + additional_properties_->validate(property.value(), pointer, local_reporter, patch); + if (!local_reporter.errors.empty()) + { + reporter.error(validation_output("additionalProperties", + additional_properties_->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Additional property \"" + property.key() + "\" found but was invalid.")); + if (reporter.fail_early()) + { + return; + } + } + } + } + + // reverse search + for (auto const& prop : properties_) + { + const auto finding = instance.find(prop.first); + if (finding == instance.object_range().end()) + { + // If property is not in instance + auto default_value = prop.second->get_default_value(instance_location, instance, reporter); + if (default_value) + { + // If default value is available, update patch + jsonpointer::json_pointer pointer(instance_location); + pointer /= prop.first; + + update_patch(patch, pointer, std::move(*default_value)); + } + } + } + + for (const auto& dep : dependencies_) + { + auto prop = instance.find(dep.first); + if (prop != instance.object_range().end()) + { + // if dependency-property is present in instance + jsonpointer::json_pointer pointer(instance_location); + pointer /= dep.first; + dep.second->validate(instance, pointer, reporter, patch); // validate + } + } + } + + void update_patch(Json& patch, const jsonpointer::json_pointer& instance_location, Json&& default_value) const + { + Json j; + j.try_emplace("op", "add"); + j.try_emplace("path", instance_location.to_uri_fragment()); + j.try_emplace("value", std::forward<Json>(default_value)); + patch.push_back(std::move(j)); + } + }; + + // array_validator + + template <class Json> + class array_validator : public keyword_validator<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + jsoncons::optional<std::size_t> max_items_; + std::string absolute_max_items_location_; + jsoncons::optional<std::size_t> min_items_; + std::string absolute_min_items_location_; + bool unique_items_ = false; + validator_pointer items_validator_; + std::vector<validator_pointer> item_validators_; + validator_pointer additional_items_validator_; + validator_pointer contains_validator_; + + public: + array_validator(abstract_keyword_validator_factory<Json>* builder, + const Json& sch, + const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), + max_items_(), min_items_(), items_validator_(nullptr), additional_items_validator_(nullptr), contains_validator_(nullptr) + { + { + auto it = sch.find("maxItems"); + if (it != sch.object_range().end()) + { + max_items_ = it->value().template as<std::size_t>(); + absolute_max_items_location_ = make_absolute_keyword_location(uris, "maxItems"); + } + } + + { + auto it = sch.find("minItems"); + if (it != sch.object_range().end()) + { + min_items_ = it->value().template as<std::size_t>(); + absolute_min_items_location_ = make_absolute_keyword_location(uris, "minItems"); + } + } + + { + auto it = sch.find("uniqueItems"); + if (it != sch.object_range().end()) + { + unique_items_ = it->value().template as<bool>(); + } + } + + { + auto it = sch.find("items"); + if (it != sch.object_range().end()) + { + + if (it->value().type() == json_type::array_value) + { + size_t c = 0; + for (const auto& subsch : it->value().array_range()) + item_validators_.push_back(builder->make_keyword_validator(subsch, uris, {"items", std::to_string(c++)})); + + auto attr_add = sch.find("additionalItems"); + if (attr_add != sch.object_range().end()) + { + additional_items_validator_ = builder->make_keyword_validator(attr_add->value(), uris, {"additionalItems"}); + } + + } + else if (it->value().type() == json_type::object_value || + it->value().type() == json_type::bool_value) + { + items_validator_ = builder->make_keyword_validator(it->value(), uris, {"items"}); + } + + } + } + + { + auto it = sch.find("contains"); + if (it != sch.object_range().end()) + { + contains_validator_ = builder->make_keyword_validator(it->value(), uris, {"contains"}); + } + } + } + private: + + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const override + { + if (max_items_) + { + if (instance.size() > *max_items_) + { + std::string message("Expected maximum item count: " + std::to_string(*max_items_)); + message.append(", found: " + std::to_string(instance.size())); + reporter.error(validation_output("maxItems", + absolute_max_items_location_, + instance_location.to_uri_fragment(), + std::move(message))); + if (reporter.fail_early()) + { + return; + } + } + } + + if (min_items_) + { + if (instance.size() < *min_items_) + { + std::string message("Expected minimum item count: " + std::to_string(*min_items_)); + message.append(", found: " + std::to_string(instance.size())); + reporter.error(validation_output("minItems", + absolute_min_items_location_, + instance_location.to_uri_fragment(), + std::move(message))); + if (reporter.fail_early()) + { + return; + } + } + } + + if (unique_items_) + { + if (!array_has_unique_items(instance)) + { + reporter.error(validation_output("uniqueItems", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Array items are not unique")); + if (reporter.fail_early()) + { + return; + } + } + } + + size_t index = 0; + if (items_validator_) + { + for (const auto& i : instance.array_range()) + { + jsonpointer::json_pointer pointer(instance_location); + pointer /= index; + items_validator_->validate(i, pointer, reporter, patch); + index++; + } + } + else + { + auto validator_it = item_validators_.cbegin(); + for (const auto& item : instance.array_range()) + { + validator_pointer item_validator = nullptr; + if (validator_it != item_validators_.cend()) + { + item_validator = *validator_it; + ++validator_it; + } + else if (additional_items_validator_ != nullptr) + { + item_validator = additional_items_validator_; + } + else + break; + + jsonpointer::json_pointer pointer(instance_location); + pointer /= index; + item_validator->validate(item, pointer, reporter, patch); + } + } + + if (contains_validator_) + { + bool contained = false; + collecting_error_reporter local_reporter; + for (const auto& item : instance.array_range()) + { + std::size_t mark = local_reporter.errors.size(); + contains_validator_->validate(item, instance_location, local_reporter, patch); + if (mark == local_reporter.errors.size()) + { + contained = true; + break; + } + } + if (!contained) + { + reporter.error(validation_output("contains", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Expected at least one array item to match \"contains\" schema", + local_reporter.errors)); + if (reporter.fail_early()) + { + return; + } + } + } + } + + static bool array_has_unique_items(const Json& a) + { + for (auto it = a.array_range().begin(); it != a.array_range().end(); ++it) + { + for (auto jt = it+1; jt != a.array_range().end(); ++jt) + { + if (*it == *jt) + { + return false; // contains duplicates + } + } + } + return true; // elements are unique + } + }; + + template <class Json> + class conditional_validator : public keyword_validator<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + validator_pointer if_validator_; + validator_pointer then_validator_; + validator_pointer else_validator_; + + public: + conditional_validator(abstract_keyword_validator_factory<Json>* builder, + const Json& sch_if, + const Json& sch, + const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), if_validator_(nullptr), then_validator_(nullptr), else_validator_(nullptr) + { + auto then_it = sch.find("then"); + auto else_it = sch.find("else"); + + if (then_it != sch.object_range().end() || else_it != sch.object_range().end()) + { + if_validator_ = builder->make_keyword_validator(sch_if, uris, {"if"}); + + if (then_it != sch.object_range().end()) + { + then_validator_ = builder->make_keyword_validator(then_it->value(), uris, {"then"}); + } + + if (else_it != sch.object_range().end()) + { + else_validator_ = builder->make_keyword_validator(else_it->value(), uris, {"else"}); + } + } + } + private: + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const final + { + if (if_validator_) + { + collecting_error_reporter local_reporter; + + if_validator_->validate(instance, instance_location, local_reporter, patch); + if (local_reporter.errors.empty()) + { + if (then_validator_) + then_validator_->validate(instance, instance_location, reporter, patch); + } + else + { + if (else_validator_) + else_validator_->validate(instance, instance_location, reporter, patch); + } + } + } + }; + + // enum_validator + + template <class Json> + class enum_validator : public keyword_validator<Json> + { + Json enum_validator_; + + public: + enum_validator(const Json& sch, + const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), enum_validator_(sch) + { + } + private: + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json&) const final + { + bool in_range = false; + for (const auto& item : enum_validator_.array_range()) + { + if (item == instance) + { + in_range = true; + break; + } + } + + if (!in_range) + { + reporter.error(validation_output("enum", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + instance.template as<std::string>() + " is not a valid enum value")); + if (reporter.fail_early()) + { + return; + } + } + } + }; + + // const_keyword + + template <class Json> + class const_keyword : public keyword_validator<Json> + { + Json const_validator_; + + public: + const_keyword(const Json& sch, const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), const_validator_(sch) + { + } + private: + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json&) const final + { + if (const_validator_ != instance) + reporter.error(validation_output("const", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Instance is not const")); + } + }; + + template <class Json> + class type_validator : public keyword_validator<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + Json default_value_; + std::vector<validator_pointer> type_mapping_; + jsoncons::optional<enum_validator<Json>> enum_validator_; + jsoncons::optional<const_keyword<Json>> const_validator_; + std::vector<validator_pointer> combined_validators_; + jsoncons::optional<conditional_validator<Json>> conditional_validator_; + std::vector<std::string> expected_types_; + + public: + type_validator(const type_validator&) = delete; + type_validator& operator=(const type_validator&) = delete; + type_validator(type_validator&&) = default; + type_validator& operator=(type_validator&&) = default; + + type_validator(abstract_keyword_validator_factory<Json>* builder, + const Json& sch, + const std::vector<schema_location>& uris) + : keyword_validator<Json>((!uris.empty() && uris.back().is_absolute()) ? uris.back().string() : ""), default_value_(jsoncons::null_type()), + type_mapping_((uint8_t)(json_type::object_value)+1), + enum_validator_(), const_validator_() + { + //std::cout << uris.size() << " uris: "; + //for (const auto& uri : uris) + //{ + // std::cout << uri.string() << ", "; + //} + //std::cout << "\n"; + std::set<std::string> known_keywords; + + auto it = sch.find("type"); + if (it == sch.object_range().end()) + { + initialize_type_mapping(builder, "", sch, uris, known_keywords); + } + else + { + switch (it->value().type()) + { + case json_type::string_value: + { + auto type = it->value().template as<std::string>(); + initialize_type_mapping(builder, type, sch, uris, known_keywords); + expected_types_.emplace_back(std::move(type)); + break; + } + + case json_type::array_value: // "type": ["type1", "type2"] + { + for (const auto& item : it->value().array_range()) + { + auto type = item.template as<std::string>(); + initialize_type_mapping(builder, type, sch, uris, known_keywords); + expected_types_.emplace_back(std::move(type)); + } + break; + } + default: + break; + } + } + + const auto default_it = sch.find("default"); + if (default_it != sch.object_range().end()) + { + default_value_ = default_it->value(); + } + + it = sch.find("enum"); + if (it != sch.object_range().end()) + { + enum_validator_ = enum_validator<Json >(it->value(), uris); + } + + it = sch.find("const"); + if (it != sch.object_range().end()) + { + const_validator_ = const_keyword<Json>(it->value(), uris); + } + + it = sch.find("not"); + if (it != sch.object_range().end()) + { + combined_validators_.push_back(builder->make_not_validator(it->value(), uris)); + } + + it = sch.find("allOf"); + if (it != sch.object_range().end()) + { + combined_validators_.push_back(builder->make_all_of_validator(it->value(), uris)); + } + + it = sch.find("anyOf"); + if (it != sch.object_range().end()) + { + combined_validators_.push_back(builder->make_any_of_validator(it->value(), uris)); + } + + it = sch.find("oneOf"); + if (it != sch.object_range().end()) + { + combined_validators_.push_back(builder->make_one_of_validator(it->value(), uris)); + } + + it = sch.find("if"); + if (it != sch.object_range().end()) + { + conditional_validator_ = conditional_validator<Json>(builder, it->value(), sch, uris); + } + } + private: + + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const override final + { + auto type = type_mapping_[(uint8_t) instance.type()]; + + if (type) + type->validate(instance, instance_location, reporter, patch); + else + { + std::ostringstream ss; + ss << "Expected "; + for (std::size_t i = 0; i < expected_types_.size(); ++i) + { + if (i > 0) + { + ss << ", "; + if (i+1 == expected_types_.size()) + { + ss << "or "; + } + } + ss << expected_types_[i]; + } + ss << ", found " << instance.type(); + + reporter.error(validation_output("type", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + ss.str())); + if (reporter.fail_early()) + { + return; + } + } + + if (enum_validator_) + { + enum_validator_->validate(instance, instance_location, reporter, patch); + if (reporter.error_count() > 0 && reporter.fail_early()) + { + return; + } + } + + if (const_validator_) + { + const_validator_->validate(instance, instance_location, reporter, patch); + if (reporter.error_count() > 0 && reporter.fail_early()) + { + return; + } + } + + for (const auto& validator : combined_validators_) + { + validator->validate(instance, instance_location, reporter, patch); + if (reporter.error_count() > 0 && reporter.fail_early()) + { + return; + } + } + + + if (conditional_validator_) + { + conditional_validator_->validate(instance, instance_location, reporter, patch); + if (reporter.error_count() > 0 && reporter.fail_early()) + { + return; + } + } + } + + jsoncons::optional<Json> get_default_value(const jsonpointer::json_pointer&, + const Json&, + error_reporter&) const override + { + return default_value_; + } + + void initialize_type_mapping(abstract_keyword_validator_factory<Json>* builder, + const std::string& type, + const Json& sch, + const std::vector<schema_location>& uris, + std::set<std::string>& keywords) + { + if (type == "null") + { + type_mapping_[(uint8_t)json_type::null_value] = builder->make_null_validator(uris); + } + else if (type == "object") + { + type_mapping_[(uint8_t)json_type::object_value] = builder->make_object_validator(sch, uris); + } + else if (type == "array") + { + type_mapping_[(uint8_t)json_type::array_value] = builder->make_array_validator(sch, uris); + } + else if (type == "string") + { + type_mapping_[(uint8_t)json_type::string_value] = builder->make_string_validator(sch, uris); + // For binary types + type_mapping_[(uint8_t) json_type::byte_string_value] = type_mapping_[(uint8_t) json_type::string_value]; + } + else if (type == "boolean") + { + type_mapping_[(uint8_t)json_type::bool_value] = builder->make_boolean_validator(uris); + } + else if (type == "integer") + { + type_mapping_[(uint8_t)json_type::int64_value] = builder->make_integer_validator(sch, uris, keywords); + type_mapping_[(uint8_t)json_type::uint64_value] = type_mapping_[(uint8_t)json_type::int64_value]; + type_mapping_[(uint8_t)json_type::double_value] = type_mapping_[(uint8_t)json_type::int64_value]; + } + else if (type == "number") + { + type_mapping_[(uint8_t)json_type::double_value] = builder->make_number_validator(sch, uris, keywords); + type_mapping_[(uint8_t)json_type::int64_value] = type_mapping_[(uint8_t)json_type::double_value]; + type_mapping_[(uint8_t)json_type::uint64_value] = type_mapping_[(uint8_t)json_type::double_value]; + } + else if (type.empty()) + { + type_mapping_[(uint8_t)json_type::null_value] = builder->make_null_validator(uris); + type_mapping_[(uint8_t)json_type::object_value] = builder->make_object_validator(sch, uris); + type_mapping_[(uint8_t)json_type::array_value] = builder->make_array_validator(sch, uris); + type_mapping_[(uint8_t)json_type::string_value] = builder->make_string_validator(sch, uris); + // For binary types + type_mapping_[(uint8_t) json_type::byte_string_value] = type_mapping_[(uint8_t) json_type::string_value]; + type_mapping_[(uint8_t)json_type::bool_value] = builder->make_boolean_validator(uris); + type_mapping_[(uint8_t)json_type::int64_value] = builder->make_integer_validator(sch, uris, keywords); + type_mapping_[(uint8_t)json_type::uint64_value] = type_mapping_[(uint8_t)json_type::int64_value]; + type_mapping_[(uint8_t)json_type::double_value] = type_mapping_[(uint8_t)json_type::int64_value]; + type_mapping_[(uint8_t)json_type::double_value] = builder->make_number_validator(sch, uris, keywords); + type_mapping_[(uint8_t)json_type::int64_value] = type_mapping_[(uint8_t)json_type::double_value]; + type_mapping_[(uint8_t)json_type::uint64_value] = type_mapping_[(uint8_t)json_type::double_value]; + } + } + }; + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_VALUE_RULES_HPP diff --git a/include/jsoncons_ext/jsonschema/keyword_validator_factory.hpp b/include/jsoncons_ext/jsonschema/keyword_validator_factory.hpp new file mode 100644 index 0000000..f538105 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/keyword_validator_factory.hpp @@ -0,0 +1,556 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_KEYWORD_VALIDATOR_FACTORY_HPP +#define JSONCONS_JSONSCHEMA_KEYWORD_VALIDATOR_FACTORY_HPP + +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/uri.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpointer/jsonpointer.hpp> +#include <jsoncons_ext/jsonschema/subschema.hpp> +#include <jsoncons_ext/jsonschema/keyword_validator.hpp> +#include <jsoncons_ext/jsonschema/schema_draft7.hpp> +#include <jsoncons_ext/jsonschema/schema_version.hpp> +#include <cassert> +#include <set> +#include <sstream> +#include <iostream> +#include <cassert> +#if defined(JSONCONS_HAS_STD_REGEX) +#include <regex> +#endif + +namespace jsoncons { +namespace jsonschema { + + template <class Json> + using uri_resolver = std::function<Json(const jsoncons::uri & /*id*/)>; + + template <class Json> + class reference_schema : public keyword_validator<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + validator_pointer referred_schema_; + + public: + reference_schema(const std::string& id) + : keyword_validator<Json>(id), referred_schema_(nullptr) {} + + void set_referred_schema(validator_pointer target) { referred_schema_ = target; } + + private: + + void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const override + { + if (!referred_schema_) + { + reporter.error(validation_output("", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Unresolved schema reference " + this->absolute_keyword_location())); + return; + } + + referred_schema_->validate(instance, instance_location, reporter, patch); + } + + jsoncons::optional<Json> get_default_value(const jsonpointer::json_pointer& instance_location, + const Json& instance, + error_reporter& reporter) const override + { + if (!referred_schema_) + { + reporter.error(validation_output("", + this->absolute_keyword_location(), + instance_location.to_uri_fragment(), + "Unresolved schema reference " + this->absolute_keyword_location())); + return jsoncons::optional<Json>(); + } + + return referred_schema_->get_default_value(instance_location, instance, reporter); + } + }; + + template <class Json> + class keyword_validator_factory; + + template <class Json> + class json_schema + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + friend class keyword_validator_factory<Json>; + + std::vector<std::unique_ptr<keyword_validator<Json>>> subschemas_; + validator_pointer root_; + public: + json_schema(std::vector<std::unique_ptr<keyword_validator<Json>>>&& subschemas, + validator_pointer root) + : subschemas_(std::move(subschemas)), root_(root) + { + if (root_ == nullptr) + JSONCONS_THROW(schema_error("There is no root schema to validate an instance against")); + } + + json_schema(const json_schema&) = delete; + json_schema(json_schema&&) = default; + json_schema& operator=(const json_schema&) = delete; + json_schema& operator=(json_schema&&) = default; + + void validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const + { + JSONCONS_ASSERT(root_ != nullptr); + root_->validate(instance, instance_location, reporter, patch); + } + }; + + template <class Json> + struct default_uri_resolver + { + Json operator()(const jsoncons::uri& uri) + { + if (uri.path() == "/draft-07/schema") + { + return jsoncons::jsonschema::schema_draft7<Json>::get_schema(); + } + + JSONCONS_THROW(jsonschema::schema_error("Don't know how to load JSON Schema " + std::string(uri.base()))); + } + }; + + template <class Json> + class keyword_validator_factory : public abstract_keyword_validator_factory<Json> + { + using validator_pointer = typename keyword_validator<Json>::self_pointer; + + struct subschema_registry + { + std::map<std::string, validator_pointer> schemas; // schemas + std::map<std::string, reference_schema<Json>*> unresolved; // unresolved references + std::map<std::string, Json> unprocessed_keywords; + }; + + uri_resolver<Json> resolver_; + validator_pointer root_; + + // Owns all schemas + std::vector<std::unique_ptr<keyword_validator<Json>>> subschemas_; + + // Map location to subschema_registry + std::map<std::string, subschema_registry> subschema_registries_; + + public: + keyword_validator_factory(uri_resolver<Json>&& resolver) noexcept + + : resolver_(std::move(resolver)) + { + } + + keyword_validator_factory(const keyword_validator_factory&) = delete; + keyword_validator_factory& operator=(const keyword_validator_factory&) = delete; + keyword_validator_factory(keyword_validator_factory&&) = default; + keyword_validator_factory& operator=(keyword_validator_factory&&) = default; + + std::shared_ptr<json_schema<Json>> get_schema() + { + return std::make_shared<json_schema<Json>>(std::move(subschemas_), root_); + } + + validator_pointer make_required_validator(const std::vector<schema_location>& uris, + const std::vector<std::string>& r) override + { + auto sch_orig = jsoncons::make_unique<required_validator<Json>>(uris, r); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_null_validator(const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<null_validator<Json>>(uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_true_validator(const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<true_validator<Json>>(uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_false_validator(const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<false_validator<Json>>(uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_object_validator(const Json& schema, + const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<object_validator<Json>>(this, schema, uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_array_validator(const Json& schema, + const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<array_validator<Json>>(this, schema, uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_string_validator(const Json& schema, + const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<string_validator<Json>>(schema, uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_boolean_validator(const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<boolean_validator<Json>>(uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_integer_validator(const Json& schema, + const std::vector<schema_location>& uris, + std::set<std::string>& keywords) override + { + auto sch_orig = jsoncons::make_unique<integer_validator<Json>>(schema, uris, keywords); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_number_validator(const Json& schema, + const std::vector<schema_location>& uris, + std::set<std::string>& keywords) override + { + auto sch_orig = jsoncons::make_unique<number_validator<Json>>(schema, uris, keywords); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_not_validator(const Json& schema, + const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<not_validator<Json>>(this, schema, uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_all_of_validator(const Json& schema, + const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<combining_validator<Json,all_of_criterion<Json>>>(this, schema, uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_any_of_validator(const Json& schema, + const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<combining_validator<Json,any_of_criterion<Json>>>(this, schema, uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_one_of_validator(const Json& schema, + const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<combining_validator<Json,one_of_criterion<Json>>>(this, schema, uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_type_validator(const Json& schema, + const std::vector<schema_location>& uris) override + { + auto sch_orig = jsoncons::make_unique<type_validator<Json>>(this, schema, uris); + auto sch = sch_orig.get(); + subschemas_.emplace_back(std::move(sch_orig)); + return sch; + } + + validator_pointer make_keyword_validator(const Json& schema, + const std::vector<schema_location>& uris, + const std::vector<std::string>& keys) override + { + std::vector<schema_location> new_uris = update_uris(schema, uris, keys); + + validator_pointer sch = nullptr; + + switch (schema.type()) + { + case json_type::bool_value: + if (schema.template as<bool>()) + { + sch = make_true_validator(new_uris); + } + else + { + sch = make_false_validator(new_uris); + } + break; + case json_type::object_value: + { + auto it = schema.find("definitions"); + if (it != schema.object_range().end()) + { + for (const auto& def : it->value().object_range()) + make_keyword_validator(def.value(), new_uris, {"definitions", def.key()}); + } + + it = schema.find("$ref"); + if (it != schema.object_range().end()) // this schema is a reference + { + schema_location relative(it->value().template as<std::string>()); + schema_location id = relative.resolve(new_uris.back()); + sch = get_or_create_reference(id); + } + else + { + sch = make_type_validator(schema, new_uris); + } + break; + } + default: + JSONCONS_THROW(schema_error("invalid JSON-type for a schema for " + new_uris[0].string() + ", expected: boolean or object")); + break; + } + + for (const auto& uri : new_uris) + { + insert(uri, sch); + + if (schema.type() == json_type::object_value) + { + for (const auto& item : schema.object_range()) + insert_unknown_keyword(uri, item.key(), item.value()); // save unknown keywords for later reference + } + } + return sch; + } + + void load_root(const Json& sch) + { + if (sch.is_object()) + { + auto it = sch.find("$schema"); + if (it != sch.object_range().end()) + { + auto sv = it->value().as_string_view(); + if (!schema_version::contains(sv)) + { + std::string message("Unsupported schema version "); + message.append(sv.data(), sv.size()); + JSONCONS_THROW(schema_error(message)); + } + } + } + load(sch); + } + + void load(const Json& sch) + { + subschema_registries_.clear(); + root_ = make_keyword_validator(sch, {{"#"}}, {}); + + // load all external schemas that have not already been loaded + + std::size_t loaded_count = 0; + do + { + loaded_count = 0; + + std::vector<std::string> locations; + for (const auto& item : subschema_registries_) + locations.push_back(item.first); + + for (const auto& loc : locations) + { + if (subschema_registries_[loc].schemas.empty()) // registry for this file is empty + { + if (resolver_) + { + Json external_schema = resolver_(loc); + make_keyword_validator(external_schema, {{loc}}, {}); + ++loaded_count; + } + else + { + JSONCONS_THROW(schema_error("External schema reference '" + loc + "' needs to be loaded, but no resolver provided")); + } + } + } + } + while (loaded_count > 0); + + for (const auto &file : subschema_registries_) + { + if (!file.second.unresolved.empty()) + { + JSONCONS_THROW(schema_error("after all files have been parsed, '" + + (file.first == "" ? "<root>" : file.first) + + "' has still undefined references.")); + } + } + } + + private: + + void insert(const schema_location& uri, validator_pointer s) + { + auto& file = get_or_create_file(std::string(uri.base())); + auto schemas_it = file.schemas.find(std::string(uri.fragment())); + if (schemas_it != file.schemas.end()) + { + JSONCONS_THROW(schema_error("schema with " + uri.string() + " already inserted")); + return; + } + + file.schemas.insert({std::string(uri.fragment()), s}); + + // is there an unresolved reference to this newly inserted schema? + auto unresolved_it = file.unresolved.find(std::string(uri.fragment())); + if (unresolved_it != file.unresolved.end()) + { + unresolved_it->second->set_referred_schema(s); + file.unresolved.erase(unresolved_it); + + } + } + + void insert_unknown_keyword(const schema_location& uri, + const std::string& key, + const Json& value) + { + auto &file = get_or_create_file(std::string(uri.base())); + auto new_u = uri.append(key); + schema_location new_uri(new_u); + + if (new_uri.has_fragment() && !new_uri.has_identifier()) + { + auto fragment = std::string(new_uri.fragment()); + // is there a reference looking for this unknown-keyword, which is thus no longer a unknown keyword but a schema + auto unresolved = file.unresolved.find(fragment); + if (unresolved != file.unresolved.end()) + make_keyword_validator(value, {{new_uri}}, {}); + else // no, nothing ref'd it, keep for later + file.unprocessed_keywords[fragment] = value; + + // recursively add possible subschemas of unknown keywords + if (value.type() == json_type::object_value) + for (const auto& subsch : value.object_range()) + { + insert_unknown_keyword(new_uri, subsch.key(), subsch.value()); + } + } + } + + validator_pointer get_or_create_reference(const schema_location& uri) + { + auto &file = get_or_create_file(std::string(uri.base())); + + // a schema already exists + auto sch = file.schemas.find(std::string(uri.fragment())); + if (sch != file.schemas.end()) + return sch->second; + + // referencing an unknown keyword, turn it into schema + // + // an unknown keyword can only be referenced by a JSONPointer, + // not by a plain name identifier + if (uri.has_fragment() && !uri.has_identifier()) + { + std::string fragment = std::string(uri.fragment()); + auto unprocessed_keywords_it = file.unprocessed_keywords.find(fragment); + if (unprocessed_keywords_it != file.unprocessed_keywords.end()) + { + auto &subsch = unprocessed_keywords_it->second; + auto s = make_keyword_validator(subsch, {{uri}}, {}); // A JSON Schema MUST be an object or a boolean. + file.unprocessed_keywords.erase(unprocessed_keywords_it); + return s; + } + } + + // get or create a reference_schema + auto ref = file.unresolved.find(std::string(uri.fragment())); + if (ref != file.unresolved.end()) + { + return ref->second; // unresolved, use existing reference + } + else + { + auto orig = jsoncons::make_unique<reference_schema<Json>>(uri.string()); + auto p = file.unresolved.insert(ref, + {std::string(uri.fragment()), orig.get()}) + ->second; // unresolved, create new reference + + subschemas_.emplace_back(std::move(orig)); + return p; + } + } + + subschema_registry& get_or_create_file(const std::string& loc) + { + auto file = subschema_registries_.find(loc); + if (file != subschema_registries_.end()) + return file->second; + else + return subschema_registries_.insert(file, {loc, {}})->second; + } + + }; + + template <class Json> + std::shared_ptr<json_schema<Json>> make_schema(const Json& schema) + { + keyword_validator_factory<Json> loader{default_uri_resolver<Json>()}; + loader.load_root(schema); + + return loader.get_schema(); + } + + template <class Json,class URIResolver> + typename std::enable_if<type_traits::is_unary_function_object_exact<URIResolver,Json,std::string>::value,std::shared_ptr<json_schema<Json>>>::type + make_schema(const Json& schema, const URIResolver& resolver) + { + keyword_validator_factory<Json> loader(resolver); + loader.load_root(schema); + + return loader.get_schema(); + } + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_SCHEMA_LOADER_HPP diff --git a/include/jsoncons_ext/jsonschema/schema_draft7.hpp b/include/jsoncons_ext/jsonschema/schema_draft7.hpp new file mode 100644 index 0000000..c6f6fbc --- /dev/null +++ b/include/jsoncons_ext/jsonschema/schema_draft7.hpp @@ -0,0 +1,198 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_SCHEMA_DRAFT7_HPP +#define JSONCONS_JSONSCHEMA_SCHEMA_DRAFT7_HPP + +#include <jsoncons/json.hpp> + +namespace jsoncons { +namespace jsonschema { + + template <class Json> + struct schema_draft7 + { + static Json get_schema() + { + static Json schema = Json::parse(R"( + { + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://json-schema.org/draft-07/schema#", + "title": "Core schema meta-schema", + "definitions": { + "schemaArray": { + "type": "array", + "minItems": 1, + "items": { "$ref": "#" } + }, + "nonNegativeInteger": { + "type": "integer", + "minimum": 0 + }, + "nonNegativeIntegerDefault0": { + "allOf": [ + { "$ref": "#/definitions/nonNegativeInteger" }, + { "default": 0 } + ] + }, + "simpleTypes": { + "enum": [ + "array", + "boolean", + "integer", + "null", + "number", + "object", + "string" + ] + }, + "stringArray": { + "type": "array", + "items": { "type": "string" }, + "uniqueItems": true, + "default": [] + } + }, + "type": ["object", "boolean"], + "properties": { + "$id": { + "type": "string", + "format": "uri-reference" + }, + "$schema": { + "type": "string", + "format": "uri" + }, + "$ref": { + "type": "string", + "format": "uri-reference" + }, + "$comment": { + "type": "string" + }, + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "default": true, + "readOnly": { + "type": "boolean", + "default": false + }, + "examples": { + "type": "array", + "items": true + }, + "multipleOf": { + "type": "number", + "exclusiveMinimum": 0 + }, + "maximum": { + "type": "number" + }, + "exclusiveMaximum": { + "type": "number" + }, + "minimum": { + "type": "number" + }, + "exclusiveMinimum": { + "type": "number" + }, + "maxLength": { "$ref": "#/definitions/nonNegativeInteger" }, + "minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, + "pattern": { + "type": "string", + "format": "regex" + }, + "additionalItems": { "$ref": "#" }, + "items": { + "anyOf": [ + { "$ref": "#" }, + { "$ref": "#/definitions/schemaArray" } + ], + "default": true + }, + "maxItems": { "$ref": "#/definitions/nonNegativeInteger" }, + "minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, + "uniqueItems": { + "type": "boolean", + "default": false + }, + "contains": { "$ref": "#" }, + "maxProperties": { "$ref": "#/definitions/nonNegativeInteger" }, + "minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" }, + "required": { "$ref": "#/definitions/stringArray" }, + "additionalProperties": { "$ref": "#" }, + "definitions": { + "type": "object", + "additionalProperties": { "$ref": "#" }, + "default": {} + }, + "properties": { + "type": "object", + "additionalProperties": { "$ref": "#" }, + "default": {} + }, + "patternProperties": { + "type": "object", + "additionalProperties": { "$ref": "#" }, + "propertyNames": { "format": "regex" }, + "default": {} + }, + "dependencies": { + "type": "object", + "additionalProperties": { + "anyOf": [ + { "$ref": "#" }, + { "$ref": "#/definitions/stringArray" } + ] + } + }, + "propertyNames": { "$ref": "#" }, + "const": true, + "enum": { + "type": "array", + "items": true, + "minItems": 1, + "uniqueItems": true + }, + "type": { + "anyOf": [ + { "$ref": "#/definitions/simpleTypes" }, + { + "type": "array", + "items": { "$ref": "#/definitions/simpleTypes" }, + "minItems": 1, + "uniqueItems": true + } + ] + }, + "format": { "type": "string" }, + "contentMediaType": { "type": "string" }, + "contentEncoding": { "type": "string" }, + "if": { "$ref": "#" }, + "then": { "$ref": "#" }, + "else": { "$ref": "#" }, + "allOf": { "$ref": "#/definitions/schemaArray" }, + "anyOf": { "$ref": "#/definitions/schemaArray" }, + "oneOf": { "$ref": "#/definitions/schemaArray" }, + "not": { "$ref": "#" } + }, + "default": true + } + )"); + + return schema; + } + }; + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_SCHEMA_DRAFT7_HPP diff --git a/include/jsoncons_ext/jsonschema/schema_location.hpp b/include/jsoncons_ext/jsonschema/schema_location.hpp new file mode 100644 index 0000000..fd1a743 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/schema_location.hpp @@ -0,0 +1,200 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_SCHEMA_LOCATION_HPP +#define JSONCONS_JSONSCHEMA_SCHEMA_LOCATION_HPP + +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/uri.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpointer/jsonpointer.hpp> +#include <jsoncons_ext/jsonschema/jsonschema_error.hpp> + +namespace jsoncons { +namespace jsonschema { + + class schema_location + { + jsoncons::uri uri_; + std::string identifier_; + public: + schema_location() + { + } + + schema_location(const std::string& uri) + { + auto pos = uri.find('#'); + if (pos != std::string::npos) + { + identifier_ = uri.substr(pos + 1); + unescape_percent(identifier_); + } + uri_ = jsoncons::uri(uri); + } + + jsoncons::uri uri() const + { + return uri_; + } + + bool has_fragment() const + { + return !identifier_.empty(); + } + + bool has_identifier() const + { + return !identifier_.empty() && identifier_.front() != '/'; + } + + jsoncons::string_view base() const + { + return uri_.base(); + } + + jsoncons::string_view path() const + { + return uri_.path(); + } + + bool is_absolute() const + { + return uri_.is_absolute(); + } + + std::string identifier() const + { + return identifier_; + } + + std::string fragment() const + { + return identifier_; + } + + schema_location resolve(const schema_location& uri) const + { + schema_location new_uri; + new_uri.identifier_ = identifier_; + new_uri.uri_ = uri_.resolve(uri.uri_); + return new_uri; + } + + int compare(const schema_location& other) const + { + int result = uri_.compare(other.uri_); + if (result != 0) + { + return result; + } + return result; + } + + schema_location append(const std::string& field) const + { + if (has_identifier()) + return *this; + + jsoncons::jsonpointer::json_pointer pointer(std::string(uri_.fragment())); + pointer /= field; + + jsoncons::uri new_uri(uri_.scheme(), + uri_.userinfo(), + uri_.host(), + uri_.port(), + uri_.path(), + uri_.query(), + pointer.to_string()); + + schema_location wrapper; + wrapper.uri_ = new_uri; + wrapper.identifier_ = pointer.to_string(); + + return wrapper; + } + + schema_location append(std::size_t index) const + { + if (has_identifier()) + return *this; + + jsoncons::jsonpointer::json_pointer pointer(std::string(uri_.fragment())); + pointer /= index; + + jsoncons::uri new_uri(uri_.scheme(), + uri_.userinfo(), + uri_.host(), + uri_.port(), + uri_.path(), + uri_.query(), + pointer.to_string()); + + schema_location wrapper; + wrapper.uri_ = new_uri; + wrapper.identifier_ = pointer.to_string(); + + return wrapper; + } + + std::string string() const + { + std::string s = uri_.string(); + return s; + } + + friend bool operator==(const schema_location& lhs, const schema_location& rhs) + { + return lhs.compare(rhs) == 0; + } + + friend bool operator!=(const schema_location& lhs, const schema_location& rhs) + { + return lhs.compare(rhs) != 0; + } + + friend bool operator<(const schema_location& lhs, const schema_location& rhs) + { + return lhs.compare(rhs) < 0; + } + + friend bool operator<=(const schema_location& lhs, const schema_location& rhs) + { + return lhs.compare(rhs) <= 0; + } + + friend bool operator>(const schema_location& lhs, const schema_location& rhs) + { + return lhs.compare(rhs) > 0; + } + + friend bool operator>=(const schema_location& lhs, const schema_location& rhs) + { + return lhs.compare(rhs) >= 0; + } + private: + static void unescape_percent(std::string& s) + { + if (s.size() >= 3) + { + std::size_t pos = s.size() - 2; + while (pos-- >= 1) + { + if (s[pos] == '%') + { + std::string hex = s.substr(pos + 1, 2); + char ch = (char) std::strtoul(hex.c_str(), nullptr, 16); + s.replace(pos, 3, 1, ch); + } + } + } + } + }; + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_RULE_HPP diff --git a/include/jsoncons_ext/jsonschema/schema_version.hpp b/include/jsoncons_ext/jsonschema/schema_version.hpp new file mode 100644 index 0000000..b804712 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/schema_version.hpp @@ -0,0 +1,35 @@ +// Copyright 2021 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_SCHEMA_VERSION_HPP +#define JSONCONS_JSONSCHEMA_SCHEMA_VERSION_HPP + +#include <jsoncons/json.hpp> + +namespace jsoncons { +namespace jsonschema { + + class schema_version + { + public: + static bool contains(const string_view& url) + { + if (url.find("json-schema.org/draft-07/schema#") != string_view::npos) + { + return true; + } + else + { + return false; + } + } + }; + + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_JSONSCHEMA_VERSION_HPP diff --git a/include/jsoncons_ext/jsonschema/subschema.hpp b/include/jsoncons_ext/jsonschema/subschema.hpp new file mode 100644 index 0000000..cbe0af4 --- /dev/null +++ b/include/jsoncons_ext/jsonschema/subschema.hpp @@ -0,0 +1,144 @@ +// Copyright 2020 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONSCHEMA_SUBSCHEMA_HPP +#define JSONCONS_JSONSCHEMA_SUBSCHEMA_HPP + +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/uri.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons_ext/jsonpointer/jsonpointer.hpp> +#include <jsoncons_ext/jsonschema/jsonschema_error.hpp> +#include <jsoncons_ext/jsonschema/schema_location.hpp> + +namespace jsoncons { +namespace jsonschema { + + // Interface for validation error handlers + class error_reporter + { + bool fail_early_; + std::size_t error_count_; + public: + error_reporter(bool fail_early = false) + : fail_early_(fail_early), error_count_(0) + { + } + + virtual ~error_reporter() = default; + + void error(const validation_output& o) + { + ++error_count_; + do_error(o); + } + + std::size_t error_count() const + { + return error_count_; + } + + bool fail_early() const + { + return fail_early_; + } + + private: + virtual void do_error(const validation_output& /* e */) = 0; + }; + + template <class Json> + class keyword_validator + { + std::string absolute_keyword_location_; + public: + using self_pointer = keyword_validator<Json>*; + + keyword_validator(const std::string& absolute_keyword_location) + : absolute_keyword_location_(absolute_keyword_location) + { + } + + keyword_validator(const keyword_validator&) = delete; + keyword_validator(keyword_validator&&) = default; + keyword_validator& operator=(const keyword_validator&) = delete; + keyword_validator& operator=(keyword_validator&&) = default; + + virtual ~keyword_validator() = default; + + const std::string& absolute_keyword_location() const + { + return absolute_keyword_location_; + } + + void validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const + { + do_validate(instance, + instance_location, + reporter, + patch); + } + + virtual jsoncons::optional<Json> get_default_value(const jsonpointer::json_pointer&, const Json&, error_reporter&) const + { + return jsoncons::optional<Json>(); + } + + private: + virtual void do_validate(const Json& instance, + const jsonpointer::json_pointer& instance_location, + error_reporter& reporter, + Json& patch) const = 0; + }; + + template <class Json> + std::vector<schema_location> update_uris(const Json& schema, + const std::vector<schema_location>& uris, + const std::vector<std::string>& keys) + { + // Exclude uri's that are not plain name identifiers + std::vector<schema_location> new_uris; + for (const auto& uri : uris) + { + if (!uri.has_identifier()) + new_uris.push_back(uri); + } + + // Append the keys for this sub-schema to the uri's + for (const auto& key : keys) + { + for (auto& uri : new_uris) + { + auto new_u = uri.append(key); + uri = schema_location(new_u); + } + } + if (schema.type() == json_type::object_value) + { + auto it = schema.find("$id"); // If $id is found, this schema can be referenced by the id + if (it != schema.object_range().end()) + { + std::string id = it->value().template as<std::string>(); + // Add it to the list if it is not already there + if (std::find(new_uris.begin(), new_uris.end(), id) == new_uris.end()) + { + schema_location relative(id); + schema_location new_uri = relative.resolve(new_uris.back()); + new_uris.emplace_back(new_uri); + } + } + } + + return new_uris; + } + +} // namespace jsonschema +} // namespace jsoncons + +#endif // JSONCONS_JSONSCHEMA_RULE_HPP diff --git a/include/jsoncons_ext/mergepatch/mergepatch.hpp b/include/jsoncons_ext/mergepatch/mergepatch.hpp new file mode 100644 index 0000000..c5fdefe --- /dev/null +++ b/include/jsoncons_ext/mergepatch/mergepatch.hpp @@ -0,0 +1,103 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_JSONMERGEPATCH_JSONMERGEPATCH_HPP +#define JSONCONS_JSONMERGEPATCH_JSONMERGEPATCH_HPP + +#include <string> +#include <vector> +#include <memory> +#include <algorithm> // std::min +#include <utility> // std::move +#include <jsoncons/json.hpp> + +namespace jsoncons { +namespace mergepatch { + + template <class Json> + Json from_diff(const Json& source, const Json& target) + { + if (!source.is_object() || !target.is_object()) + { + return target; + } + Json result(json_object_arg); + + for (const auto& member : source.object_range()) + { + auto it = target.find(member.key()); + if (it != target.object_range().end()) + { + if (member.value() != it->value()) + { + result.try_emplace(member.key(), from_diff(member.value(), it->value())); + } + } + else + { + result.try_emplace(member.key(), Json::null()); + } + } + + for (const auto& member : target.object_range()) + { + auto it = source.find(member.key()); + if (it == source.object_range().end()) + { + result.try_emplace(member.key(), member.value()); + } + } + + return result; + } + + namespace detail { + template <class Json> + Json apply_merge_patch_(Json& target, const Json& patch) + { + if (patch.is_object()) + { + if (!target.is_object()) + { + target = Json(json_object_arg); + } + for (auto& member : patch.object_range()) + { + auto it = target.find(member.key()); + if (it != target.object_range().end()) + { + Json item = it->value(); + target.erase(it); + if (!member.value().is_null()) + { + target.try_emplace(member.key(), apply_merge_patch_(item, member.value())); + } + } + else if (!member.value().is_null()) + { + Json item(json_object_arg); + target.try_emplace(member.key(), apply_merge_patch_(item, member.value())); + } + } + return target; + } + else + { + return patch; + } + } + } // namespace detail + + template <class Json> + void apply_merge_patch(Json& target, const Json& patch) + { + target = detail::apply_merge_patch_(target, patch); + } + +} // namespace mergepatch +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/msgpack/decode_msgpack.hpp b/include/jsoncons_ext/msgpack/decode_msgpack.hpp new file mode 100644 index 0000000..614af3d --- /dev/null +++ b/include/jsoncons_ext/msgpack/decode_msgpack.hpp @@ -0,0 +1,202 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_DECODE_MSGPACK_HPP +#define JSONCONS_MSGPACK_DECODE_MSGPACK_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/msgpack/msgpack_encoder.hpp> +#include <jsoncons_ext/msgpack/msgpack_reader.hpp> +#include <jsoncons_ext/msgpack/msgpack_cursor.hpp> + +namespace jsoncons { +namespace msgpack { + + template<class T, class Source> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_msgpack(const Source& v, + const msgpack_decode_options& options = msgpack_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_msgpack_reader<jsoncons::bytes_source> reader(v, adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class Source> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_msgpack(const Source& v, + const msgpack_decode_options& options = msgpack_decode_options()) + { + basic_msgpack_cursor<bytes_source> cursor(v, options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_msgpack(std::istream& is, + const msgpack_decode_options& options = msgpack_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + msgpack_stream_reader reader(is, adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_msgpack(std::istream& is, + const msgpack_decode_options& options = msgpack_decode_options()) + { + basic_msgpack_cursor<binary_stream_source> cursor(is, options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T, class InputIt> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_msgpack(InputIt first, InputIt last, + const msgpack_decode_options& options = msgpack_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_msgpack_reader<binary_iterator_source<InputIt>> reader(binary_iterator_source<InputIt>(first, last), adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class InputIt> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_msgpack(InputIt first, InputIt last, + const msgpack_decode_options& options = msgpack_decode_options()) + { + basic_msgpack_cursor<binary_iterator_source<InputIt>> cursor(binary_iterator_source<InputIt>(first, last), options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + // With leading allocator parameter + + template<class T, class Source, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_msgpack(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& v, + const msgpack_decode_options& options = msgpack_decode_options()) + { + json_decoder<T,TempAllocator> decoder(temp_alloc); + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_msgpack_reader<jsoncons::bytes_source,TempAllocator> reader(v, adaptor, options, temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class Source, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_msgpack(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& v, + const msgpack_decode_options& options = msgpack_decode_options()) + { + basic_msgpack_cursor<bytes_source,TempAllocator> cursor(v, options, temp_alloc); + json_decoder<basic_json<char,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_msgpack(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::istream& is, + const msgpack_decode_options& options = msgpack_decode_options()) + { + json_decoder<T,TempAllocator> decoder(temp_alloc); + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_msgpack_reader<jsoncons::binary_stream_source,TempAllocator> reader(is, adaptor, options, temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_msgpack(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::istream& is, + const msgpack_decode_options& options = msgpack_decode_options()) + { + basic_msgpack_cursor<binary_stream_source,TempAllocator> cursor(is, options, temp_alloc); + json_decoder<basic_json<char,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + +} // msgpack +} // jsoncons + +#endif diff --git a/include/jsoncons_ext/msgpack/encode_msgpack.hpp b/include/jsoncons_ext/msgpack/encode_msgpack.hpp new file mode 100644 index 0000000..10a61e0 --- /dev/null +++ b/include/jsoncons_ext/msgpack/encode_msgpack.hpp @@ -0,0 +1,142 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_ENCODE_MSGPACK_HPP +#define JSONCONS_MSGPACK_ENCODE_MSGPACK_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/msgpack/msgpack_encoder.hpp> +#include <jsoncons_ext/msgpack/msgpack_reader.hpp> + +namespace jsoncons { +namespace msgpack { + + template<class T, class Container> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_msgpack(const T& j, + Container& v, + const msgpack_encode_options& options = msgpack_encode_options()) + { + using char_type = typename T::char_type; + basic_msgpack_encoder<jsoncons::bytes_sink<Container>> encoder(v, options); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T, class Container> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_msgpack(const T& val, + Container& v, + const msgpack_encode_options& options = msgpack_encode_options()) + { + basic_msgpack_encoder<jsoncons::bytes_sink<Container>> encoder(v, options); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template<class T> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_msgpack(const T& j, + std::ostream& os, + const msgpack_encode_options& options = msgpack_encode_options()) + { + using char_type = typename T::char_type; + msgpack_stream_encoder encoder(os, options); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_msgpack(const T& val, + std::ostream& os, + const msgpack_encode_options& options = msgpack_encode_options()) + { + msgpack_stream_encoder encoder(os, options); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + // with temp_allocator_arg_t + + template<class T, class Container, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_msgpack(temp_allocator_arg_t, const TempAllocator& temp_alloc, const T& j, + Container& v, + const msgpack_encode_options& options = msgpack_encode_options()) + { + using char_type = typename T::char_type; + basic_msgpack_encoder<jsoncons::bytes_sink<Container>,TempAllocator> encoder(v, options, temp_alloc); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T, class Container, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_msgpack(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, Container& v, + const msgpack_encode_options& options = msgpack_encode_options()) + { + basic_msgpack_encoder<jsoncons::bytes_sink<Container>,TempAllocator> encoder(v, options, temp_alloc); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template<class T,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_msgpack(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, + std::ostream& os, + const msgpack_encode_options& options = msgpack_encode_options()) + { + using char_type = typename T::char_type; + basic_msgpack_encoder<jsoncons::binary_stream_sink,TempAllocator> encoder(os, options, temp_alloc); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_msgpack(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, + std::ostream& os, + const msgpack_encode_options& options = msgpack_encode_options()) + { + basic_msgpack_encoder<jsoncons::binary_stream_sink,TempAllocator> encoder(os, options, temp_alloc); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + +} // msgpack +} // jsoncons + +#endif diff --git a/include/jsoncons_ext/msgpack/msgpack.hpp b/include/jsoncons_ext/msgpack/msgpack.hpp new file mode 100644 index 0000000..307aad2 --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack.hpp @@ -0,0 +1,24 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_HPP +#define JSONCONS_MSGPACK_MSGPACK_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/msgpack/msgpack_encoder.hpp> +#include <jsoncons_ext/msgpack/msgpack_reader.hpp> +#include <jsoncons_ext/msgpack/msgpack_cursor.hpp> +#include <jsoncons_ext/msgpack/encode_msgpack.hpp> +#include <jsoncons_ext/msgpack/decode_msgpack.hpp> + +#endif + diff --git a/include/jsoncons_ext/msgpack/msgpack_cursor.hpp b/include/jsoncons_ext/msgpack/msgpack_cursor.hpp new file mode 100644 index 0000000..a813429 --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack_cursor.hpp @@ -0,0 +1,343 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_CURSOR_HPP +#define JSONCONS_MSGPACK_MSGPACK_CURSOR_HPP + +#include <memory> // std::allocator +#include <string> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <ios> +#include <istream> // std::basic_istream +#include <jsoncons/byte_string.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/staj_cursor.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons_ext/msgpack/msgpack_parser.hpp> + +namespace jsoncons { +namespace msgpack { + +template<class Source=jsoncons::binary_stream_source,class Allocator=std::allocator<char>> +class basic_msgpack_cursor : public basic_staj_cursor<char>, private virtual ser_context +{ +public: + using source_type = Source; + using char_type = char; + using allocator_type = Allocator; +private: + basic_msgpack_parser<Source,Allocator> parser_; + basic_staj_visitor<char_type> cursor_visitor_; + basic_json_visitor2_to_visitor_adaptor<char_type,Allocator> cursor_handler_adaptor_; + bool eof_; + + // Noncopyable and nonmoveable + basic_msgpack_cursor(const basic_msgpack_cursor&) = delete; + basic_msgpack_cursor& operator=(const basic_msgpack_cursor&) = delete; + +public: + using string_view_type = string_view; + + template <class Sourceable> + basic_msgpack_cursor(Sourceable&& source, + const msgpack_decode_options& options = msgpack_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + cursor_handler_adaptor_(cursor_visitor_, alloc), + eof_(false) + { + if (!done()) + { + next(); + } + } + + // Constructors that set parse error codes + + template <class Sourceable> + basic_msgpack_cursor(Sourceable&& source, + std::error_code& ec) + : basic_msgpack_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + msgpack_decode_options(), + ec) + { + } + + template <class Sourceable> + basic_msgpack_cursor(Sourceable&& source, + const msgpack_decode_options& options, + std::error_code& ec) + : basic_msgpack_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + ec) + { + } + + template <class Sourceable> + basic_msgpack_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const msgpack_decode_options& options, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + cursor_handler_adaptor_(cursor_visitor_, alloc), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + void reset() + { + parser_.reset(); + cursor_visitor_.reset(); + cursor_handler_adaptor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + cursor_handler_adaptor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + void reset(std::error_code& ec) + { + parser_.reset(); + cursor_visitor_.reset(); + cursor_handler_adaptor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + void reset(Sourceable&& source, std::error_code& ec) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + cursor_handler_adaptor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + bool done() const override + { + return parser_.done(); + } + + const staj_event& current() const override + { + return cursor_visitor_.event(); + } + + void read_to(basic_json_visitor<char_type>& visitor) override + { + std::error_code ec; + read_to(visitor, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read_to(basic_json_visitor<char_type>& visitor, + std::error_code& ec) override + { + if (cursor_visitor_.dump(visitor, *this, ec)) + { + read_next(visitor, ec); + } + } + + void next() override + { + std::error_code ec; + next(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void next(std::error_code& ec) override + { + read_next(ec); + } + + const ser_context& context() const override + { + return *this; + } + + bool eof() const + { + return eof_; + } + + std::size_t line() const override + { + return parser_.line(); + } + + std::size_t column() const override + { + return parser_.column(); + } + + friend + staj_filter_view operator|(basic_msgpack_cursor& cursor, + std::function<bool(const staj_event&, const ser_context&)> pred) + { + return staj_filter_view(cursor, pred); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_msgpack_cursor(Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + const msgpack_decode_options& options = msgpack_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(filter), + cursor_handler_adaptor_(cursor_visitor_, alloc), + eof_(false) + { + if (!done()) + { + next(); + } + } + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_msgpack_cursor(Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + std::error_code& ec) + : basic_msgpack_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), filter, ec) + { + } + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_msgpack_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), alloc), + cursor_visitor_(filter), + cursor_handler_adaptor_(cursor_visitor_, alloc), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<char_type>&)") + void read(basic_json_visitor<char_type>& visitor) + { + read_to(visitor); + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<char_type>&, std::error_code&)") + void read(basic_json_visitor<char_type>& visitor, + std::error_code& ec) + { + read_to(visitor, ec); + } +#endif +private: + static bool accept_all(const staj_event&, const ser_context&) + { + return true; + } + + void read_next(std::error_code& ec) + { + if (cursor_visitor_.in_available()) + { + cursor_visitor_.send_available(ec); + } + else + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(cursor_handler_adaptor_, ec); + if (ec) return; + } + } + } + + void read_next(basic_json_visitor<char_type>& visitor, std::error_code& ec) + { + { + struct resource_wrapper + { + basic_json_visitor2_to_visitor_adaptor<char_type,Allocator>& adaptor; + basic_json_visitor<char_type>& original; + + resource_wrapper(basic_json_visitor2_to_visitor_adaptor<char_type,Allocator>& adaptor, + basic_json_visitor<char_type>& visitor) + : adaptor(adaptor), original(adaptor.destination()) + { + adaptor.destination(visitor); + } + + ~resource_wrapper() + { + adaptor.destination(original); + } + } wrapper(cursor_handler_adaptor_, visitor); + + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(cursor_handler_adaptor_, ec); + if (ec) return; + } + } + } +}; + +using msgpack_stream_cursor = basic_msgpack_cursor<jsoncons::binary_stream_source>; +using msgpack_bytes_cursor = basic_msgpack_cursor<jsoncons::bytes_source>; + +} // namespace msgpack +} // namespace jsoncons + +#endif + diff --git a/include/jsoncons_ext/msgpack/msgpack_cursor2.hpp b/include/jsoncons_ext/msgpack/msgpack_cursor2.hpp new file mode 100644 index 0000000..8fce1ca --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack_cursor2.hpp @@ -0,0 +1,259 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_CURSOR2_HPP +#define JSONCONS_MSGPACK_MSGPACK_CURSOR2_HPP + +#include <memory> // std::allocator +#include <string> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <ios> +#include <istream> // std::basic_istream +#include <jsoncons/byte_string.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_visitor2.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/staj2_cursor.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons_ext/msgpack/msgpack_parser.hpp> + +namespace jsoncons { +namespace msgpack { + + template<class Source=jsoncons::binary_stream_source,class Allocator=std::allocator<char>> + class basic_msgpack_cursor2 : public basic_staj2_cursor<char>, private virtual ser_context + { + public: + using source_type = Source; + using char_type = char; + using allocator_type = Allocator; + private: + basic_msgpack_parser<Source,Allocator> parser_; + basic_staj2_visitor<char_type> cursor_visitor_; + bool eof_; + + // Noncopyable and nonmoveable + basic_msgpack_cursor2(const basic_msgpack_cursor2&) = delete; + basic_msgpack_cursor2& operator=(const basic_msgpack_cursor2&) = delete; + + public: + using string_view_type = string_view; + + template <class Sourceable> + basic_msgpack_cursor2(Sourceable&& source, + const msgpack_decode_options& options = msgpack_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + eof_(false) + { + if (!done()) + { + next(); + } + } + + // Constructors that set parse error codes + + template <class Sourceable> + basic_msgpack_cursor2(Sourceable&& source, + std::error_code& ec) + : basic_msgpack_cursor2(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + msgpack_decode_options(), + ec) + { + } + + template <class Sourceable> + basic_msgpack_cursor2(Sourceable&& source, + const msgpack_decode_options& options, + std::error_code& ec) + : basic_msgpack_cursor2(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + ec) + { + } + + template <class Sourceable> + basic_msgpack_cursor2(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const msgpack_decode_options& options, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + void reset() + { + parser_.reset(); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + void reset(std::error_code& ec) + { + parser_.reset(); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + void reset(Sourceable&& source, std::error_code& ec) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + bool done() const override + { + return parser_.done(); + } + + const staj2_event& current() const override + { + return cursor_visitor_.event(); + } + + void read_to(basic_json_visitor2<char_type>& visitor) override + { + std::error_code ec; + read_to(visitor, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read_to(basic_json_visitor2<char_type>& visitor, + std::error_code& ec) override + { + if (cursor_visitor_.dump(visitor, *this, ec)) + { + read_next(visitor, ec); + } + } + + void next() override + { + std::error_code ec; + next(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void next(std::error_code& ec) override + { + read_next(ec); + } + + const ser_context& context() const override + { + return *this; + } + + bool eof() const + { + return eof_; + } + + std::size_t line() const override + { + return parser_.line(); + } + + std::size_t column() const override + { + return parser_.column(); + } + + friend + staj2_filter_view operator|(basic_msgpack_cursor2& cursor, + std::function<bool(const staj2_event&, const ser_context&)> pred) + { + return staj2_filter_view(cursor, pred); + } + + private: + static bool accept_all(const staj2_event&, const ser_context&) + { + return true; + } + + void read_next(std::error_code& ec) + { + if (cursor_visitor_.in_available()) + { + cursor_visitor_.send_available(ec); + } + else + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(cursor_visitor_, ec); + if (ec) return; + } + } + } + + void read_next(basic_json_visitor2<char_type>& visitor, std::error_code& ec) + { + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(visitor, ec); + if (ec) return; + } + } + } + }; + + using msgpack_stream_cursor2 = basic_msgpack_cursor2<jsoncons::binary_stream_source>; + using msgpack_bytes_cursor2 = basic_msgpack_cursor2<jsoncons::bytes_source>; + +} // namespace msgpack +} // namespace jsoncons + +#endif + diff --git a/include/jsoncons_ext/msgpack/msgpack_encoder.hpp b/include/jsoncons_ext/msgpack/msgpack_encoder.hpp new file mode 100644 index 0000000..34e882b --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack_encoder.hpp @@ -0,0 +1,753 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_ENCODER_HPP +#define JSONCONS_MSGPACK_MSGPACK_ENCODER_HPP + +#include <string> +#include <vector> +#include <limits> // std::numeric_limits +#include <memory> +#include <utility> // std::move +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/sink.hpp> +#include <jsoncons/detail/parse_number.hpp> +#include <jsoncons_ext/msgpack/msgpack_type.hpp> +#include <jsoncons_ext/msgpack/msgpack_error.hpp> +#include <jsoncons_ext/msgpack/msgpack_options.hpp> + +namespace jsoncons { +namespace msgpack { + + enum class msgpack_container_type {object, array}; + + template<class Sink=jsoncons::binary_stream_sink,class Allocator=std::allocator<char>> + class basic_msgpack_encoder final : public basic_json_visitor<char> + { + enum class decimal_parse_state { start, integer, exp1, exp2, fraction1 }; + + static constexpr int64_t nanos_in_milli = 1000000; + static constexpr int64_t nanos_in_second = 1000000000; + static constexpr int64_t millis_in_second = 1000; + public: + using allocator_type = Allocator; + using char_type = char; + using typename basic_json_visitor<char>::string_view_type; + using sink_type = Sink; + + private: + struct stack_item + { + msgpack_container_type type_; + std::size_t length_; + std::size_t count_; + + stack_item(msgpack_container_type type, std::size_t length = 0) noexcept + : type_(type), length_(length), count_(0) + { + } + + std::size_t length() const + { + return length_; + } + + std::size_t count() const + { + return count_; + } + + bool is_object() const + { + return type_ == msgpack_container_type::object; + } + }; + + Sink sink_; + const msgpack_encode_options options_; + allocator_type alloc_; + + std::vector<stack_item> stack_; + int nesting_depth_; + + // Noncopyable and nonmoveable + basic_msgpack_encoder(const basic_msgpack_encoder&) = delete; + basic_msgpack_encoder& operator=(const basic_msgpack_encoder&) = delete; + public: + explicit basic_msgpack_encoder(Sink&& sink, + const Allocator& alloc = Allocator()) + : basic_msgpack_encoder(std::forward<Sink>(sink), msgpack_encode_options(), alloc) + { + } + + explicit basic_msgpack_encoder(Sink&& sink, + const msgpack_encode_options& options, + const Allocator& alloc = Allocator()) + : sink_(std::forward<Sink>(sink)), + options_(options), + alloc_(alloc), + nesting_depth_(0) + { + } + + ~basic_msgpack_encoder() noexcept + { + sink_.flush(); + } + + void reset() + { + stack_.clear(); + nesting_depth_ = 0; + } + + void reset(Sink&& sink) + { + sink_ = std::move(sink); + reset(); + } + + private: + // Implementing methods + + void visit_flush() override + { + sink_.flush(); + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + ec = msgpack_errc::object_length_required; + return false; + } + + bool visit_begin_object(std::size_t length, semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = msgpack_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(msgpack_container_type::object, length); + + if (length <= 15) + { + // fixmap + sink_.push_back(jsoncons::msgpack::msgpack_type::fixmap_base_type | (length & 0xf)); + } + else if (length <= 65535) + { + // map 16 + sink_.push_back(jsoncons::msgpack::msgpack_type::map16_type); + binary::native_to_big(static_cast<uint16_t>(length), + std::back_inserter(sink_)); + } + else if (length <= 4294967295) + { + // map 32 + sink_.push_back(jsoncons::msgpack::msgpack_type::map32_type); + binary::native_to_big(static_cast<uint32_t>(length), + std::back_inserter(sink_)); + } + + return true; + } + + bool visit_end_object(const ser_context&, std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + --nesting_depth_; + + if (stack_.back().count() < stack_.back().length()) + { + ec = msgpack_errc::too_few_items; + return false; + } + else if (stack_.back().count() > stack_.back().length()) + { + ec = msgpack_errc::too_many_items; + return false; + } + + stack_.pop_back(); + end_value(); + return true; + } + + bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override + { + ec = msgpack_errc::array_length_required; + return false; + } + + bool visit_begin_array(std::size_t length, semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = msgpack_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(msgpack_container_type::array, length); + if (length <= 15) + { + // fixarray + sink_.push_back(jsoncons::msgpack::msgpack_type::fixarray_base_type | (length & 0xf)); + } + else if (length <= (std::numeric_limits<uint16_t>::max)()) + { + // array 16 + sink_.push_back(jsoncons::msgpack::msgpack_type::array16_type); + binary::native_to_big(static_cast<uint16_t>(length),std::back_inserter(sink_)); + } + else if (length <= (std::numeric_limits<uint32_t>::max)()) + { + // array 32 + sink_.push_back(jsoncons::msgpack::msgpack_type::array32_type); + binary::native_to_big(static_cast<uint32_t>(length),std::back_inserter(sink_)); + } + return true; + } + + bool visit_end_array(const ser_context&, std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + + --nesting_depth_; + + if (stack_.back().count() < stack_.back().length()) + { + ec = msgpack_errc::too_few_items; + return false; + } + else if (stack_.back().count() > stack_.back().length()) + { + ec = msgpack_errc::too_many_items; + return false; + } + + stack_.pop_back(); + end_value(); + return true; + } + + bool visit_key(const string_view_type& name, const ser_context&, std::error_code&) override + { + write_string_value(name); + return true; + } + + bool visit_null(semantic_tag, const ser_context&, std::error_code&) override + { + // nil + sink_.push_back(jsoncons::msgpack::msgpack_type::nil_type); + end_value(); + return true; + } + + void write_timestamp(int64_t seconds, int64_t nanoseconds) + { + if ((seconds >> 34) == 0) + { + uint64_t data64 = (nanoseconds << 34) | seconds; + if ((data64 & 0xffffffff00000000L) == 0) + { + // timestamp 32 + sink_.push_back(jsoncons::msgpack::msgpack_type::fixext4_type); + sink_.push_back(0xff); + binary::native_to_big(static_cast<uint32_t>(data64), std::back_inserter(sink_)); + } + else + { + // timestamp 64 + sink_.push_back(jsoncons::msgpack::msgpack_type::fixext8_type); + sink_.push_back(0xff); + binary::native_to_big(static_cast<uint64_t>(data64), std::back_inserter(sink_)); + } + } + else + { + // timestamp 96 + sink_.push_back(jsoncons::msgpack::msgpack_type::ext8_type); + sink_.push_back(0x0c); // 12 + sink_.push_back(0xff); + binary::native_to_big(static_cast<uint32_t>(nanoseconds), std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint64_t>(seconds), std::back_inserter(sink_)); + } + } + + bool visit_string(const string_view_type& sv, semantic_tag tag, const ser_context&, std::error_code& ec) override + { + switch (tag) + { + case semantic_tag::epoch_second: + { + int64_t seconds; + auto result = jsoncons::detail::to_integer(sv.data(), sv.length(), seconds); + if (!result) + { + ec = msgpack_errc::invalid_timestamp; + return false; + } + write_timestamp(seconds, 0); + break; + } + case semantic_tag::epoch_milli: + { + bigint n = bigint::from_string(sv.data(), sv.length()); + if (n != 0) + { + bigint q; + bigint rem; + n.divide(millis_in_second, q, rem, true); + int64_t seconds = static_cast<int64_t>(q); + int64_t nanoseconds = static_cast<int64_t>(rem) * nanos_in_milli; + if (nanoseconds < 0) + { + nanoseconds = -nanoseconds; + } + write_timestamp(seconds, nanoseconds); + } + else + { + write_timestamp(0, 0); + } + break; + } + case semantic_tag::epoch_nano: + { + bigint n = bigint::from_string(sv.data(), sv.length()); + if (n != 0) + { + bigint q; + bigint rem; + n.divide(nanos_in_second, q, rem, true); + int64_t seconds = static_cast<int64_t>(q); + int64_t nanoseconds = static_cast<int64_t>(rem); + if (nanoseconds < 0) + { + nanoseconds = -nanoseconds; + } + write_timestamp(seconds, nanoseconds); + } + else + { + write_timestamp(0, 0); + } + break; + } + default: + { + write_string_value(sv); + end_value(); + break; + } + } + return true; + } + + void write_string_value(const string_view_type& sv) + { + auto sink = unicode_traits::validate(sv.data(), sv.size()); + if (sink.ec != unicode_traits::conv_errc()) + { + JSONCONS_THROW(ser_error(msgpack_errc::invalid_utf8_text_string)); + } + + const size_t length = sv.length(); + if (length <= 31) + { + // fixstr stores a byte array whose length is upto 31 bytes + sink_.push_back(jsoncons::msgpack::msgpack_type::fixstr_base_type | static_cast<uint8_t>(length)); + } + else if (length <= (std::numeric_limits<uint8_t>::max)()) + { + // str 8 stores a byte array whose length is upto (2^8)-1 bytes + sink_.push_back(jsoncons::msgpack::msgpack_type::str8_type); + sink_.push_back(static_cast<uint8_t>(length)); + } + else if (length <= (std::numeric_limits<uint16_t>::max)()) + { + // str 16 stores a byte array whose length is upto (2^16)-1 bytes + sink_.push_back(jsoncons::msgpack::msgpack_type::str16_type); + binary::native_to_big(static_cast<uint16_t>(length), std::back_inserter(sink_)); + } + else if (length <= (std::numeric_limits<uint32_t>::max)()) + { + // str 32 stores a byte array whose length is upto (2^32)-1 bytes + sink_.push_back(jsoncons::msgpack::msgpack_type::str32_type); + binary::native_to_big(static_cast<uint32_t>(length),std::back_inserter(sink_)); + } + + for (auto c : sv) + { + sink_.push_back(c); + } + } + + bool visit_byte_string(const byte_string_view& b, + semantic_tag, + const ser_context&, + std::error_code&) override + { + + const std::size_t length = b.size(); + if (length <= (std::numeric_limits<uint8_t>::max)()) + { + // bin 8 stores a byte array whose length is upto (2^8)-1 bytes + sink_.push_back(jsoncons::msgpack::msgpack_type::bin8_type); + sink_.push_back(static_cast<uint8_t>(length)); + } + else if (length <= (std::numeric_limits<uint16_t>::max)()) + { + // bin 16 stores a byte array whose length is upto (2^16)-1 bytes + sink_.push_back(jsoncons::msgpack::msgpack_type::bin16_type); + binary::native_to_big(static_cast<uint16_t>(length), std::back_inserter(sink_)); + } + else if (length <= (std::numeric_limits<uint32_t>::max)()) + { + // bin 32 stores a byte array whose length is upto (2^32)-1 bytes + sink_.push_back(jsoncons::msgpack::msgpack_type::bin32_type); + binary::native_to_big(static_cast<uint32_t>(length),std::back_inserter(sink_)); + } + + for (auto c : b) + { + sink_.push_back(c); + } + + end_value(); + return true; + } + + bool visit_byte_string(const byte_string_view& b, + uint64_t ext_tag, + const ser_context&, + std::error_code&) override + { + const std::size_t length = b.size(); + switch (length) + { + case 1: + sink_.push_back(jsoncons::msgpack::msgpack_type::fixext1_type); + sink_.push_back(static_cast<uint8_t>(ext_tag)); + break; + case 2: + sink_.push_back(jsoncons::msgpack::msgpack_type::fixext2_type); + sink_.push_back(static_cast<uint8_t>(ext_tag)); + break; + case 4: + sink_.push_back(jsoncons::msgpack::msgpack_type::fixext4_type); + sink_.push_back(static_cast<uint8_t>(ext_tag)); + break; + case 8: + sink_.push_back(jsoncons::msgpack::msgpack_type::fixext8_type); + sink_.push_back(static_cast<uint8_t>(ext_tag)); + break; + case 16: + sink_.push_back(jsoncons::msgpack::msgpack_type::fixext16_type); + sink_.push_back(static_cast<uint8_t>(ext_tag)); + break; + default: + if (length <= (std::numeric_limits<uint8_t>::max)()) + { + sink_.push_back(jsoncons::msgpack::msgpack_type::ext8_type); + sink_.push_back(static_cast<uint8_t>(length)); + sink_.push_back(static_cast<uint8_t>(ext_tag)); + } + else if (length <= (std::numeric_limits<uint16_t>::max)()) + { + sink_.push_back(jsoncons::msgpack::msgpack_type::ext16_type); + binary::native_to_big(static_cast<uint16_t>(length), std::back_inserter(sink_)); + sink_.push_back(static_cast<uint8_t>(ext_tag)); + } + else if (length <= (std::numeric_limits<uint32_t>::max)()) + { + sink_.push_back(jsoncons::msgpack::msgpack_type::ext32_type); + binary::native_to_big(static_cast<uint32_t>(length),std::back_inserter(sink_)); + sink_.push_back(static_cast<uint8_t>(ext_tag)); + } + break; + } + + for (auto c : b) + { + sink_.push_back(c); + } + + end_value(); + return true; + } + + bool visit_double(double val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + float valf = (float)val; + if ((double)valf == val) + { + // float 32 + sink_.push_back(jsoncons::msgpack::msgpack_type::float32_type); + binary::native_to_big(valf,std::back_inserter(sink_)); + } + else + { + // float 64 + sink_.push_back(jsoncons::msgpack::msgpack_type::float64_type); + binary::native_to_big(val,std::back_inserter(sink_)); + } + + // write double + + end_value(); + return true; + } + + bool visit_int64(int64_t val, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + switch (tag) + { + case semantic_tag::epoch_second: + write_timestamp(val, 0); + break; + case semantic_tag::epoch_milli: + { + if (val != 0) + { + auto dv = std::div(val,millis_in_second); + int64_t seconds = dv.quot; + int64_t nanoseconds = dv.rem*nanos_in_milli; + if (nanoseconds < 0) + { + nanoseconds = -nanoseconds; + } + write_timestamp(seconds, nanoseconds); + } + else + { + write_timestamp(0, 0); + } + break; + } + case semantic_tag::epoch_nano: + { + if (val != 0) + { + auto dv = std::div(val,static_cast<int64_t>(nanos_in_second)); + int64_t seconds = dv.quot; + int64_t nanoseconds = dv.rem; + if (nanoseconds < 0) + { + nanoseconds = -nanoseconds; + } + write_timestamp(seconds, nanoseconds); + } + else + { + write_timestamp(0, 0); + } + break; + } + default: + { + if (val >= 0) + { + if (val <= 0x7f) + { + // positive fixnum stores 7-bit positive integer + sink_.push_back(static_cast<uint8_t>(val)); + } + else if (val <= (std::numeric_limits<uint8_t>::max)()) + { + // uint 8 stores a 8-bit unsigned integer + sink_.push_back(jsoncons::msgpack::msgpack_type::uint8_type); + sink_.push_back(static_cast<uint8_t>(val)); + } + else if (val <= (std::numeric_limits<uint16_t>::max)()) + { + // uint 16 stores a 16-bit big-endian unsigned integer + sink_.push_back(jsoncons::msgpack::msgpack_type::uint16_type); + binary::native_to_big(static_cast<uint16_t>(val),std::back_inserter(sink_)); + } + else if (val <= (std::numeric_limits<uint32_t>::max)()) + { + // uint 32 stores a 32-bit big-endian unsigned integer + sink_.push_back(jsoncons::msgpack::msgpack_type::uint32_type); + binary::native_to_big(static_cast<uint32_t>(val),std::back_inserter(sink_)); + } + else if (val <= (std::numeric_limits<int64_t>::max)()) + { + // int 64 stores a 64-bit big-endian signed integer + sink_.push_back(jsoncons::msgpack::msgpack_type::uint64_type); + binary::native_to_big(static_cast<uint64_t>(val),std::back_inserter(sink_)); + } + } + else + { + if (val >= -32) + { + // negative fixnum stores 5-bit negative integer + binary::native_to_big(static_cast<int8_t>(val), std::back_inserter(sink_)); + } + else if (val >= (std::numeric_limits<int8_t>::lowest)()) + { + // int 8 stores a 8-bit signed integer + sink_.push_back(jsoncons::msgpack::msgpack_type::int8_type); + binary::native_to_big(static_cast<int8_t>(val),std::back_inserter(sink_)); + } + else if (val >= (std::numeric_limits<int16_t>::lowest)()) + { + // int 16 stores a 16-bit big-endian signed integer + sink_.push_back(jsoncons::msgpack::msgpack_type::int16_type); + binary::native_to_big(static_cast<int16_t>(val),std::back_inserter(sink_)); + } + else if (val >= (std::numeric_limits<int32_t>::lowest)()) + { + // int 32 stores a 32-bit big-endian signed integer + sink_.push_back(jsoncons::msgpack::msgpack_type::int32_type); + binary::native_to_big(static_cast<int32_t>(val),std::back_inserter(sink_)); + } + else if (val >= (std::numeric_limits<int64_t>::lowest)()) + { + // int 64 stores a 64-bit big-endian signed integer + sink_.push_back(jsoncons::msgpack::msgpack_type::int64_type); + binary::native_to_big(static_cast<int64_t>(val),std::back_inserter(sink_)); + } + } + } + break; + } + end_value(); + return true; + } + + bool visit_uint64(uint64_t val, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + switch (tag) + { + case semantic_tag::epoch_second: + write_timestamp(static_cast<int64_t>(val), 0); + break; + case semantic_tag::epoch_milli: + { + if (val != 0) + { + auto dv = std::div(static_cast<int64_t>(val), static_cast<int64_t>(millis_in_second)); + int64_t seconds = dv.quot; + int64_t nanoseconds = dv.rem*nanos_in_milli; + if (nanoseconds < 0) + { + nanoseconds = -nanoseconds; + } + write_timestamp(seconds, nanoseconds); + } + else + { + write_timestamp(0, 0); + } + break; + } + case semantic_tag::epoch_nano: + { + if (val != 0) + { + auto dv = std::div(static_cast<int64_t>(val), static_cast<int64_t>(nanos_in_second)); + int64_t seconds = dv.quot; + int64_t nanoseconds = dv.rem; + if (nanoseconds < 0) + { + nanoseconds = -nanoseconds; + } + write_timestamp(seconds, nanoseconds); + } + else + { + write_timestamp(0, 0); + } + break; + } + default: + { + if (val <= static_cast<uint64_t>((std::numeric_limits<int8_t>::max)())) + { + // positive fixnum stores 7-bit positive integer + sink_.push_back(static_cast<uint8_t>(val)); + } + else if (val <= (std::numeric_limits<uint8_t>::max)()) + { + // uint 8 stores a 8-bit unsigned integer + sink_.push_back(jsoncons::msgpack::msgpack_type::uint8_type); + sink_.push_back(static_cast<uint8_t>(val)); + } + else if (val <= (std::numeric_limits<uint16_t>::max)()) + { + // uint 16 stores a 16-bit big-endian unsigned integer + sink_.push_back(jsoncons::msgpack::msgpack_type::uint16_type); + binary::native_to_big(static_cast<uint16_t>(val),std::back_inserter(sink_)); + } + else if (val <= (std::numeric_limits<uint32_t>::max)()) + { + // uint 32 stores a 32-bit big-endian unsigned integer + sink_.push_back(jsoncons::msgpack::msgpack_type::uint32_type); + binary::native_to_big(static_cast<uint32_t>(val),std::back_inserter(sink_)); + } + else if (val <= (std::numeric_limits<uint64_t>::max)()) + { + // uint 64 stores a 64-bit big-endian unsigned integer + sink_.push_back(jsoncons::msgpack::msgpack_type::uint64_type); + binary::native_to_big(static_cast<uint64_t>(val),std::back_inserter(sink_)); + } + break; + } + } + end_value(); + return true; + } + + bool visit_bool(bool val, semantic_tag, const ser_context&, std::error_code&) override + { + // true and false + sink_.push_back(static_cast<uint8_t>(val ? jsoncons::msgpack::msgpack_type::true_type : jsoncons::msgpack::msgpack_type::false_type)); + + end_value(); + return true; + } + + void end_value() + { + if (!stack_.empty()) + { + ++stack_.back().count_; + } + } + }; + + using msgpack_stream_encoder = basic_msgpack_encoder<jsoncons::binary_stream_sink>; + using msgpack_bytes_encoder = basic_msgpack_encoder<jsoncons::bytes_sink<std::vector<uint8_t>>>; + + #if !defined(JSONCONS_NO_DEPRECATED) + JSONCONS_DEPRECATED_MSG("Instead, use msgpack_bytes_encoder") typedef msgpack_bytes_encoder msgpack_bytes_serializer; + + template<class Sink=jsoncons::binary_stream_sink> + using basic_msgpack_serializer = basic_msgpack_encoder<Sink>; + + JSONCONS_DEPRECATED_MSG("Instead, use msgpack_stream_encoder") typedef msgpack_stream_encoder msgpack_encoder; + JSONCONS_DEPRECATED_MSG("Instead, use msgpack_stream_encoder") typedef msgpack_stream_encoder msgpack_serializer; + JSONCONS_DEPRECATED_MSG("Instead, use msgpack_bytes_encoder") typedef msgpack_bytes_encoder msgpack_buffer_serializer; + #endif + +} // namespace msgpack +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/msgpack/msgpack_error.hpp b/include/jsoncons_ext/msgpack/msgpack_error.hpp new file mode 100644 index 0000000..80c76b6 --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack_error.hpp @@ -0,0 +1,94 @@ +/// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_ERROR_HPP +#define JSONCONS_MSGPACK_MSGPACK_ERROR_HPP + +#include <system_error> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace msgpack { + +enum class msgpack_errc +{ + success = 0, + unexpected_eof = 1, + source_error, + invalid_utf8_text_string, + array_length_required, + object_length_required, + too_many_items, + too_few_items, + max_nesting_depth_exceeded, + length_is_negative, + invalid_timestamp, + unknown_type +}; + +class msgpack_error_category_impl + : public std::error_category +{ +public: + const char* name() const noexcept override + { + return "jsoncons/msgpack"; + } + std::string message(int ev) const override + { + switch (static_cast<msgpack_errc>(ev)) + { + case msgpack_errc::unexpected_eof: + return "Unexpected end of file"; + case msgpack_errc::source_error: + return "Source error"; + case msgpack_errc::invalid_utf8_text_string: + return "Illegal UTF-8 encoding in text string"; + case msgpack_errc::array_length_required: + return "MessagePack encoder requires array length"; + case msgpack_errc::object_length_required: + return "MessagePack encoder requires object length"; + case msgpack_errc::too_many_items: + return "Too many items were added to a MessagePack object or array"; + case msgpack_errc::too_few_items: + return "Too few items were added to a MessagePack object or array"; + case msgpack_errc::max_nesting_depth_exceeded: + return "Data item nesting exceeds limit in options"; + case msgpack_errc::length_is_negative: + return "Request for the length of an array, map or string returned a negative result"; + case msgpack_errc::invalid_timestamp: + return "Invalid timestamp"; + case msgpack_errc::unknown_type: + return "An unknown type was found in the stream"; + default: + return "Unknown MessagePack parser error"; + } + } +}; + +inline +const std::error_category& msgpack_error_category() +{ + static msgpack_error_category_impl instance; + return instance; +} + +inline +std::error_code make_error_code(msgpack_errc e) +{ + return std::error_code(static_cast<int>(e),msgpack_error_category()); +} + + +}} + +namespace std { + template<> + struct is_error_code_enum<jsoncons::msgpack::msgpack_errc> : public true_type + { + }; +} + +#endif diff --git a/include/jsoncons_ext/msgpack/msgpack_options.hpp b/include/jsoncons_ext/msgpack/msgpack_options.hpp new file mode 100644 index 0000000..17bddf1 --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack_options.hpp @@ -0,0 +1,74 @@ +// Copyright 2019 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_OPTIONS_HPP +#define JSONCONS_MSGPACK_MSGPACK_OPTIONS_HPP + +#include <string> +#include <limits> // std::numeric_limits +#include <cwchar> +#include <jsoncons/json_exception.hpp> + +namespace jsoncons { namespace msgpack { + +class msgpack_options; + +class msgpack_options_common +{ + friend class msgpack_options; + + int max_nesting_depth_; +protected: + virtual ~msgpack_options_common() = default; + + msgpack_options_common() + : max_nesting_depth_(1024) + { + } + + msgpack_options_common(const msgpack_options_common&) = default; + msgpack_options_common& operator=(const msgpack_options_common&) = default; + msgpack_options_common(msgpack_options_common&&) = default; + msgpack_options_common& operator=(msgpack_options_common&&) = default; +public: + int max_nesting_depth() const + { + return max_nesting_depth_; + } +}; + +class msgpack_decode_options : public virtual msgpack_options_common +{ + friend class msgpack_options; +public: + msgpack_decode_options() + { + } +}; + +class msgpack_encode_options : public virtual msgpack_options_common +{ + friend class msgpack_options; +public: + msgpack_encode_options() + { + } +}; + +class msgpack_options final : public msgpack_decode_options, public msgpack_encode_options +{ +public: + using msgpack_options_common::max_nesting_depth; + + msgpack_options& max_nesting_depth(int value) + { + this->max_nesting_depth_ = value; + return *this; + } +}; + +}} +#endif diff --git a/include/jsoncons_ext/msgpack/msgpack_parser.hpp b/include/jsoncons_ext/msgpack/msgpack_parser.hpp new file mode 100644 index 0000000..cf2d507 --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack_parser.hpp @@ -0,0 +1,748 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_PARSER_HPP +#define JSONCONS_MSGPACK_MSGPACK_PARSER_HPP + +#include <string> +#include <vector> +#include <memory> +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/bigint.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/msgpack/msgpack_type.hpp> +#include <jsoncons_ext/msgpack/msgpack_error.hpp> +#include <jsoncons_ext/msgpack/msgpack_options.hpp> +#include <jsoncons/json_visitor2.hpp> + +namespace jsoncons { namespace msgpack { + +enum class parse_mode {root,accept,array,map_key,map_value}; + +struct parse_state +{ + parse_mode mode; + std::size_t length; + std::size_t index; + + parse_state(parse_mode mode, std::size_t length) noexcept + : mode(mode), length(length), index(0) + { + } + + parse_state(const parse_state&) = default; + parse_state(parse_state&&) = default; +}; + +template <class Source,class Allocator=std::allocator<char>> +class basic_msgpack_parser : public ser_context +{ + using char_type = char; + using char_traits_type = std::char_traits<char>; + using temp_allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<char_type>; + using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>; + using int64_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<int64_t>; + using parse_state_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_state>; + + static constexpr int64_t nanos_in_second = 1000000000; + + Source source_; + msgpack_decode_options options_; + bool more_; + bool done_; + std::basic_string<char,std::char_traits<char>,char_allocator_type> text_buffer_; + std::vector<uint8_t,byte_allocator_type> bytes_buffer_; + std::vector<parse_state,parse_state_allocator_type> state_stack_; + int nesting_depth_; + +public: + template <class Sourceable> + basic_msgpack_parser(Sourceable&& source, + const msgpack_decode_options& options = msgpack_decode_options(), + const Allocator alloc = Allocator()) + : source_(std::forward<Sourceable>(source)), + options_(options), + more_(true), + done_(false), + text_buffer_(alloc), + bytes_buffer_(alloc), + state_stack_(alloc), + nesting_depth_(0) + { + state_stack_.emplace_back(parse_mode::root,0); + } + + void restart() + { + more_ = true; + } + + void reset() + { + more_ = true; + done_ = false; + text_buffer_.clear(); + bytes_buffer_.clear(); + state_stack_.clear(); + state_stack_.emplace_back(parse_mode::root,0); + nesting_depth_ = 0; + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + source_ = std::forward<Sourceable>(source); + reset(); + } + + bool done() const + { + return done_; + } + + bool stopped() const + { + return !more_; + } + + std::size_t line() const override + { + return 0; + } + + std::size_t column() const override + { + return source_.position(); + } + + void parse(json_visitor2& visitor, std::error_code& ec) + { + while (!done_ && more_) + { + switch (state_stack_.back().mode) + { + case parse_mode::array: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_item(visitor, ec); + if (ec) + { + return; + } + } + else + { + end_array(visitor, ec); + } + break; + } + case parse_mode::map_key: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + state_stack_.back().mode = parse_mode::map_value; + read_item(visitor, ec); + if (ec) + { + return; + } + } + else + { + end_object(visitor, ec); + } + break; + } + case parse_mode::map_value: + { + state_stack_.back().mode = parse_mode::map_key; + read_item(visitor, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::root: + { + state_stack_.back().mode = parse_mode::accept; + read_item(visitor, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::accept: + { + JSONCONS_ASSERT(state_stack_.size() == 1); + state_stack_.clear(); + more_ = false; + done_ = true; + visitor.flush(); + break; + } + } + } + } +private: + + void read_item(json_visitor2& visitor, std::error_code& ec) + { + if (source_.is_error()) + { + ec = msgpack_errc::source_error; + more_ = false; + return; + } + + uint8_t type; + if (source_.read(&type, 1) == 0) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + + if (type <= 0xbf) + { + if (type <= 0x7f) + { + // positive fixint + more_ = visitor.uint64_value(type, semantic_tag::none, *this, ec); + } + else if (type <= 0x8f) + { + begin_object(visitor,type,ec); // fixmap + } + else if (type <= 0x9f) + { + begin_array(visitor,type,ec); // fixarray + } + else + { + // fixstr + const size_t len = type & 0x1f; + + text_buffer_.clear(); + + if (source_reader<Source>::read(source_,text_buffer_,len) != static_cast<std::size_t>(len)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = msgpack_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()), semantic_tag::none, *this, ec); + } + } + else if (type >= 0xe0) + { + // negative fixint + more_ = visitor.int64_value(static_cast<int8_t>(type), semantic_tag::none, *this, ec); + } + else + { + switch (type) + { + case jsoncons::msgpack::msgpack_type::nil_type: + { + more_ = visitor.null_value(semantic_tag::none, *this, ec); + break; + } + case jsoncons::msgpack::msgpack_type::true_type: + { + more_ = visitor.bool_value(true, semantic_tag::none, *this, ec); + break; + } + case jsoncons::msgpack::msgpack_type::false_type: + { + more_ = visitor.bool_value(false, semantic_tag::none, *this, ec); + break; + } + case jsoncons::msgpack::msgpack_type::float32_type: + { + uint8_t buf[sizeof(float)]; + if (source_.read(buf, sizeof(float)) != sizeof(float)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + float val = binary::big_to_native<float>(buf, sizeof(buf)); + more_ = visitor.double_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::float64_type: + { + uint8_t buf[sizeof(double)]; + if (source_.read(buf, sizeof(double)) != sizeof(double)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + double val = binary::big_to_native<double>(buf, sizeof(buf)); + more_ = visitor.double_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::uint8_type: + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + more_ = visitor.uint64_value(b, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::uint16_type: + { + uint8_t buf[sizeof(uint16_t)]; + if (source_.read(buf, sizeof(uint16_t)) !=sizeof(uint16_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + uint16_t val = binary::big_to_native<uint16_t>(buf, sizeof(buf)); + more_ = visitor.uint64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::uint32_type: + { + uint8_t buf[sizeof(uint32_t)]; + if (source_.read(buf, sizeof(uint32_t)) != sizeof(uint32_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + uint32_t val = binary::big_to_native<uint32_t>(buf, sizeof(buf)); + more_ = visitor.uint64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::uint64_type: + { + uint8_t buf[sizeof(uint64_t)]; + if (source_.read(buf, sizeof(uint64_t)) != sizeof(uint64_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + uint64_t val = binary::big_to_native<uint64_t>(buf, sizeof(buf)); + more_ = visitor.uint64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::int8_type: + { + uint8_t buf[sizeof(int8_t)]; + if (source_.read(buf, sizeof(int8_t)) != sizeof(int8_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + int8_t val = binary::big_to_native<int8_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::int16_type: + { + uint8_t buf[sizeof(int16_t)]; + if (source_.read(buf, sizeof(int16_t)) != sizeof(int16_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + int16_t val = binary::big_to_native<int16_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::int32_type: + { + uint8_t buf[sizeof(int32_t)]; + if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + int32_t val = binary::big_to_native<int32_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::int64_type: + { + uint8_t buf[sizeof(int64_t)]; + if (source_.read(buf, sizeof(int64_t)) != sizeof(int64_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + int64_t val = binary::big_to_native<int64_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::str8_type: + case jsoncons::msgpack::msgpack_type::str16_type: + case jsoncons::msgpack::msgpack_type::str32_type: + { + std::size_t len = get_size(type, ec); + if (!more_) + { + return; + } + + text_buffer_.clear(); + if (source_reader<Source>::read(source_,text_buffer_,len) != static_cast<std::size_t>(len)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = msgpack_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()), semantic_tag::none, *this, ec); + break; + } + + case jsoncons::msgpack::msgpack_type::bin8_type: + case jsoncons::msgpack::msgpack_type::bin16_type: + case jsoncons::msgpack::msgpack_type::bin32_type: + { + std::size_t len = get_size(type,ec); + if (!more_) + { + return; + } + bytes_buffer_.clear(); + if (source_reader<Source>::read(source_,bytes_buffer_,len) != static_cast<std::size_t>(len)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + + more_ = visitor.byte_string_value(byte_string_view(bytes_buffer_.data(),bytes_buffer_.size()), + semantic_tag::none, + *this, + ec); + break; + } + case jsoncons::msgpack::msgpack_type::fixext1_type: + case jsoncons::msgpack::msgpack_type::fixext2_type: + case jsoncons::msgpack::msgpack_type::fixext4_type: + case jsoncons::msgpack::msgpack_type::fixext8_type: + case jsoncons::msgpack::msgpack_type::fixext16_type: + case jsoncons::msgpack::msgpack_type::ext8_type: + case jsoncons::msgpack::msgpack_type::ext16_type: + case jsoncons::msgpack::msgpack_type::ext32_type: + { + std::size_t len = get_size(type,ec); + if (!more_) + { + return; + } + + // type + uint8_t buf[sizeof(int8_t)]; + if (source_.read(buf, sizeof(int8_t)) != sizeof(int8_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + + int8_t ext_type = binary::big_to_native<int8_t>(buf, sizeof(buf)); + + bool is_timestamp = false; + if (ext_type == -1) + { + is_timestamp = true;; + } + + // payload + if (is_timestamp && len == 4) + { + uint8_t buf32[sizeof(uint32_t)]; + if (source_.read(buf32, sizeof(uint32_t)) != sizeof(uint32_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + uint32_t val = binary::big_to_native<uint32_t>(buf32, sizeof(buf32)); + more_ = visitor.uint64_value(val, semantic_tag::epoch_second, *this, ec); + } + else if (is_timestamp && len == 8) + { + uint8_t buf64[sizeof(uint64_t)]; + if (source_.read(buf64, sizeof(uint64_t)) != sizeof(uint64_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + uint64_t data64 = binary::big_to_native<uint64_t>(buf64, sizeof(buf64)); + uint64_t sec = data64 & 0x00000003ffffffffL; + uint64_t nsec = data64 >> 34; + + bigint nano(sec); + nano *= uint64_t(nanos_in_second); + nano += nsec; + text_buffer_.clear(); + nano.write_string(text_buffer_); + more_ = visitor.string_value(text_buffer_, semantic_tag::epoch_nano, *this, ec); + if (!more_) return; + } + else if (is_timestamp && len == 12) + { + uint8_t buf1[sizeof(uint32_t)]; + if (source_.read(buf1, sizeof(uint32_t)) != sizeof(uint32_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + uint32_t nsec = binary::big_to_native<uint32_t>(buf1, sizeof(buf1)); + + uint8_t buf2[sizeof(int64_t)]; + if (source_.read(buf2, sizeof(int64_t)) != sizeof(int64_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + int64_t sec = binary::big_to_native<int64_t>(buf2, sizeof(buf2)); + + bigint nano(sec); + + nano *= uint64_t(nanos_in_second); + + if (nano < 0) + { + nano -= nsec; + } + else + { + nano += nsec; + } + + text_buffer_.clear(); + nano.write_string(text_buffer_); + more_ = visitor.string_value(text_buffer_, semantic_tag::epoch_nano, *this, ec); + if (!more_) return; + } + else + { + bytes_buffer_.clear(); + if (source_reader<Source>::read(source_,bytes_buffer_,len) != static_cast<std::size_t>(len)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return; + } + + more_ = visitor.byte_string_value(byte_string_view(bytes_buffer_.data(),bytes_buffer_.size()), + static_cast<uint8_t>(ext_type), + *this, + ec); + } + break; + } + + case jsoncons::msgpack::msgpack_type::array16_type: + case jsoncons::msgpack::msgpack_type::array32_type: + { + begin_array(visitor,type,ec); + break; + } + + case jsoncons::msgpack::msgpack_type::map16_type : + case jsoncons::msgpack::msgpack_type::map32_type : + { + begin_object(visitor, type, ec); + break; + } + + default: + { + ec = msgpack_errc::unknown_type; + more_ = false; + return; + } + } + } + } + + void begin_array(json_visitor2& visitor, uint8_t type, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = msgpack_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + std::size_t length = get_size(type, ec); + if (!more_) + { + return; + } + state_stack_.emplace_back(parse_mode::array,length); + more_ = visitor.begin_array(length, semantic_tag::none, *this, ec); + } + + void end_array(json_visitor2& visitor, std::error_code& ec) + { + --nesting_depth_; + + more_ = visitor.end_array(*this, ec); + state_stack_.pop_back(); + } + + void begin_object(json_visitor2& visitor, uint8_t type, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = msgpack_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + std::size_t length = get_size(type, ec); + if (!more_) + { + return; + } + state_stack_.emplace_back(parse_mode::map_key,length); + more_ = visitor.begin_object(length, semantic_tag::none, *this, ec); + } + + void end_object(json_visitor2& visitor, std::error_code& ec) + { + --nesting_depth_; + more_ = visitor.end_object(*this, ec); + state_stack_.pop_back(); + } + + std::size_t get_size(uint8_t type, std::error_code& ec) + { + switch (type) + { + case jsoncons::msgpack::msgpack_type::str8_type: + case jsoncons::msgpack::msgpack_type::bin8_type: + case jsoncons::msgpack::msgpack_type::ext8_type: + { + uint8_t buf[sizeof(int8_t)]; + if (source_.read(buf, sizeof(int8_t)) != sizeof(int8_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return 0; + } + uint8_t len = binary::big_to_native<uint8_t>(buf, sizeof(buf)); + return static_cast<std::size_t>(len); + } + + case jsoncons::msgpack::msgpack_type::str16_type: + case jsoncons::msgpack::msgpack_type::bin16_type: + case jsoncons::msgpack::msgpack_type::ext16_type: + case jsoncons::msgpack::msgpack_type::array16_type: + case jsoncons::msgpack::msgpack_type::map16_type: + { + uint8_t buf[sizeof(int16_t)]; + if (source_.read(buf, sizeof(int16_t)) != sizeof(int16_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return 0; + } + uint16_t len = binary::big_to_native<uint16_t>(buf, sizeof(buf)); + return static_cast<std::size_t>(len); + } + + case jsoncons::msgpack::msgpack_type::str32_type: + case jsoncons::msgpack::msgpack_type::bin32_type: + case jsoncons::msgpack::msgpack_type::ext32_type: + case jsoncons::msgpack::msgpack_type::array32_type: + case jsoncons::msgpack::msgpack_type::map32_type : + { + uint8_t buf[sizeof(int32_t)]; + if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) + { + ec = msgpack_errc::unexpected_eof; + more_ = false; + return 0; + } + uint32_t len = binary::big_to_native<uint32_t>(buf, sizeof(buf)); + return static_cast<std::size_t>(len); + } + case jsoncons::msgpack::msgpack_type::fixext1_type: + return 1; + case jsoncons::msgpack::msgpack_type::fixext2_type: + return 2; + case jsoncons::msgpack::msgpack_type::fixext4_type: + return 4; + case jsoncons::msgpack::msgpack_type::fixext8_type: + return 8; + case jsoncons::msgpack::msgpack_type::fixext16_type: + return 16; + default: + if ((type > 0x8f && type <= 0x9f) // fixarray + || (type > 0x7f && type <= 0x8f) // fixmap + ) + { + return type & 0x0f; + } + else + { + ec = msgpack_errc::unknown_type; + more_ = false; + return 0; + } + break; + } + } +}; + +}} + +#endif diff --git a/include/jsoncons_ext/msgpack/msgpack_reader.hpp b/include/jsoncons_ext/msgpack/msgpack_reader.hpp new file mode 100644 index 0000000..c0d788a --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack_reader.hpp @@ -0,0 +1,116 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_READER_HPP +#define JSONCONS_MSGPACK_MSGPACK_READER_HPP + +#include <string> +#include <vector> +#include <memory> +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/msgpack/msgpack_type.hpp> +#include <jsoncons_ext/msgpack/msgpack_error.hpp> +#include <jsoncons_ext/msgpack/msgpack_parser.hpp> + +namespace jsoncons { namespace msgpack { + +template <class Source,class Allocator=std::allocator<char>> +class basic_msgpack_reader +{ + using char_type = char; + + basic_msgpack_parser<Source,Allocator> parser_; + basic_json_visitor2_to_visitor_adaptor<char_type,Allocator> adaptor_; + json_visitor2& visitor_; +public: + template <class Sourceable> + basic_msgpack_reader(Sourceable&& source, + json_visitor& visitor, + const Allocator alloc) + : basic_msgpack_reader(std::forward<Sourceable>(source), + visitor, + msgpack_decode_options(), + alloc) + { + } + + template <class Sourceable> + basic_msgpack_reader(Sourceable&& source, + json_visitor& visitor, + const msgpack_decode_options& options = msgpack_decode_options(), + const Allocator alloc=Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + adaptor_(visitor, alloc), visitor_(adaptor_) + { + } + template <class Sourceable> + basic_msgpack_reader(Sourceable&& source, + json_visitor2& visitor, + const Allocator alloc) + : basic_msgpack_reader(std::forward<Sourceable>(source), + visitor, + msgpack_decode_options(), + alloc) + { + } + + template <class Sourceable> + basic_msgpack_reader(Sourceable&& source, + json_visitor2& visitor, + const msgpack_decode_options& options = msgpack_decode_options(), + const Allocator alloc=Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + visitor_(visitor) + { + } + + void read() + { + std::error_code ec; + read(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,line(),column())); + } + } + + void read(std::error_code& ec) + { + parser_.reset(); + parser_.parse(visitor_, ec); + if (ec) + { + return; + } + } + + std::size_t line() const + { + return parser_.line(); + } + + std::size_t column() const + { + return parser_.column(); + } +}; + +using msgpack_stream_reader = basic_msgpack_reader<jsoncons::binary_stream_source>; + +using msgpack_bytes_reader = basic_msgpack_reader<jsoncons::bytes_source>; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use msgpack_stream_reader") typedef msgpack_stream_reader msgpack_reader; +JSONCONS_DEPRECATED_MSG("Instead, use msgpack_bytes_reader") typedef msgpack_bytes_reader msgpack_buffer_reader; +#endif + +}} + +#endif diff --git a/include/jsoncons_ext/msgpack/msgpack_type.hpp b/include/jsoncons_ext/msgpack/msgpack_type.hpp new file mode 100644 index 0000000..aa9f8fd --- /dev/null +++ b/include/jsoncons_ext/msgpack/msgpack_type.hpp @@ -0,0 +1,63 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_MSGPACK_MSGPACK_TYPE_HPP +#define JSONCONS_MSGPACK_MSGPACK_TYPE_HPP + +#include <string> +#include <memory> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace msgpack { + + namespace msgpack_type + { + const uint8_t positive_fixint_base_type = 0x00; + const uint8_t nil_type = 0xc0; + const uint8_t false_type = 0xc2; + const uint8_t true_type = 0xc3; + const uint8_t float32_type = 0xca; + const uint8_t float64_type = 0xcb; + const uint8_t uint8_type = 0xcc; + const uint8_t uint16_type = 0xcd; + const uint8_t uint32_type = 0xce; + const uint8_t uint64_type = 0xcf; + const uint8_t int8_type = 0xd0; + const uint8_t int16_type = 0xd1; + const uint8_t int32_type = 0xd2; + const uint8_t int64_type = 0xd3; + + const uint8_t fixmap_base_type = 0x80; + const uint8_t fixarray_base_type = 0x90; + const uint8_t fixstr_base_type = 0xa0; + const uint8_t str8_type = 0xd9; + const uint8_t str16_type = 0xda; + const uint8_t str32_type = 0xdb; + + const uint8_t bin8_type = 0xc4; // 0xC4 + const uint8_t bin16_type = 0xc5; + const uint8_t bin32_type = 0xc6; + + const uint8_t fixext1_type = 0xd4; + const uint8_t fixext2_type = 0xd5; + const uint8_t fixext4_type = 0xd6; + const uint8_t fixext8_type = 0xd7; + const uint8_t fixext16_type = 0xd8; + const uint8_t ext8_type = 0xc7; // 0xC4 + const uint8_t ext16_type = 0xc8; + const uint8_t ext32_type = 0xc9; + + const uint8_t array16_type = 0xdc; + const uint8_t array32_type = 0xdd; + const uint8_t map16_type = 0xde; + const uint8_t map32_type = 0xdf; + const uint8_t negative_fixint_base_type = 0xe0; + } + +} // namespace msgpack +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/ubjson/decode_ubjson.hpp b/include/jsoncons_ext/ubjson/decode_ubjson.hpp new file mode 100644 index 0000000..91c9c0f --- /dev/null +++ b/include/jsoncons_ext/ubjson/decode_ubjson.hpp @@ -0,0 +1,201 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_DECODE_UBJSON_HPP +#define JSONCONS_UBJSON_DECODE_UBJSON_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/ubjson/ubjson_reader.hpp> +#include <jsoncons_ext/ubjson/ubjson_cursor.hpp> + +namespace jsoncons { +namespace ubjson { + + template<class T, class Source> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_ubjson(const Source& v, + const ubjson_decode_options& options = ubjson_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_ubjson_reader<jsoncons::bytes_source> reader(v, adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class Source> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_ubjson(const Source& v, + const ubjson_decode_options& options = ubjson_decode_options()) + { + basic_ubjson_cursor<bytes_source> cursor(v, options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_ubjson(std::istream& is, + const ubjson_decode_options& options = ubjson_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + ubjson_stream_reader reader(is, adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_ubjson(std::istream& is, + const ubjson_decode_options& options = ubjson_decode_options()) + { + basic_ubjson_cursor<binary_stream_source> cursor(is, options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T, class InputIt> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_ubjson(InputIt first, InputIt last, + const ubjson_decode_options& options = ubjson_decode_options()) + { + jsoncons::json_decoder<T> decoder; + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_ubjson_reader<binary_iterator_source<InputIt>> reader(binary_iterator_source<InputIt>(first, last), adaptor, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class InputIt> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_ubjson(InputIt first, InputIt last, + const ubjson_decode_options& options = ubjson_decode_options()) + { + basic_ubjson_cursor<binary_iterator_source<InputIt>> cursor(binary_iterator_source<InputIt>(first, last), options); + json_decoder<basic_json<char,sorted_policy>> decoder{}; + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + // With leading allocator parameter + + template<class T, class Source, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_ubjson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& v, + const ubjson_decode_options& options = ubjson_decode_options()) + { + json_decoder<T,TempAllocator> decoder(temp_alloc); + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_ubjson_reader<jsoncons::bytes_source,TempAllocator> reader(v, adaptor, options, temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T, class Source, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_byte_sequence<Source>::value,T>::type + decode_ubjson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& v, + const ubjson_decode_options& options = ubjson_decode_options()) + { + basic_ubjson_cursor<bytes_source,TempAllocator> cursor(v, options, temp_alloc); + json_decoder<basic_json<char,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template<class T,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_ubjson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::istream& is, + const ubjson_decode_options& options = ubjson_decode_options()) + { + json_decoder<T,TempAllocator> decoder(temp_alloc); + auto adaptor = make_json_visitor_adaptor<json_visitor>(decoder); + basic_ubjson_reader<jsoncons::binary_stream_source,TempAllocator> reader(is, adaptor, options, temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template<class T,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_ubjson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::istream& is, + const ubjson_decode_options& options = ubjson_decode_options()) + { + basic_ubjson_cursor<binary_stream_source,TempAllocator> cursor(is, options, temp_alloc); + json_decoder<basic_json<char,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + +} // ubjson +} // jsoncons + +#endif diff --git a/include/jsoncons_ext/ubjson/encode_ubjson.hpp b/include/jsoncons_ext/ubjson/encode_ubjson.hpp new file mode 100644 index 0000000..e8a244b --- /dev/null +++ b/include/jsoncons_ext/ubjson/encode_ubjson.hpp @@ -0,0 +1,142 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_ENCODE_UBJSON_HPP +#define JSONCONS_UBJSON_ENCODE_UBJSON_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/ubjson/ubjson_encoder.hpp> +#include <jsoncons_ext/ubjson/ubjson_reader.hpp> + +namespace jsoncons { +namespace ubjson { + + template<class T, class Container> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_ubjson(const T& j, + Container& v, + const ubjson_encode_options& options = ubjson_encode_options()) + { + using char_type = typename T::char_type; + basic_ubjson_encoder<jsoncons::bytes_sink<Container>> encoder(v, options); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T, class Container> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_ubjson(const T& val, + Container& v, + const ubjson_encode_options& options = ubjson_encode_options()) + { + basic_ubjson_encoder<jsoncons::bytes_sink<Container>> encoder(v, options); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template<class T> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_ubjson(const T& j, + std::ostream& os, + const ubjson_encode_options& options = ubjson_encode_options()) + { + using char_type = typename T::char_type; + ubjson_stream_encoder encoder(os, options); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_ubjson(const T& val, + std::ostream& os, + const ubjson_encode_options& options = ubjson_encode_options()) + { + ubjson_stream_encoder encoder(os, options); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + // with temp_allocator_arg_t + + template<class T, class Container, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_ubjson(temp_allocator_arg_t, const TempAllocator& temp_alloc,const T& j, + Container& v, + const ubjson_encode_options& options = ubjson_encode_options()) + { + using char_type = typename T::char_type; + basic_ubjson_encoder<jsoncons::bytes_sink<Container>,TempAllocator> encoder(v, options, temp_alloc); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T, class Container, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_byte_container<Container>::value,void>::type + encode_ubjson(temp_allocator_arg_t, const TempAllocator& temp_alloc,const T& val, + Container& v, + const ubjson_encode_options& options = ubjson_encode_options()) + { + basic_ubjson_encoder<jsoncons::bytes_sink<Container>,TempAllocator> encoder(v, options, temp_alloc); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template<class T,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_ubjson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, + std::ostream& os, + const ubjson_encode_options& options = ubjson_encode_options()) + { + using char_type = typename T::char_type; + basic_ubjson_encoder<jsoncons::binary_stream_sink,TempAllocator> encoder(os, options, temp_alloc); + auto adaptor = make_json_visitor_adaptor<basic_json_visitor<char_type>>(encoder); + j.dump(adaptor); + } + + template<class T,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_ubjson(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, + std::ostream& os, + const ubjson_encode_options& options = ubjson_encode_options()) + { + basic_ubjson_encoder<jsoncons::binary_stream_sink,TempAllocator> encoder(os, options, temp_alloc); + std::error_code ec; + encode_traits<T,char>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + +} // ubjson +} // jsoncons + +#endif diff --git a/include/jsoncons_ext/ubjson/ubjson.hpp b/include/jsoncons_ext/ubjson/ubjson.hpp new file mode 100644 index 0000000..c2729bd --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson.hpp @@ -0,0 +1,23 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_HPP +#define JSONCONS_UBJSON_UBJSON_HPP + +#include <string> +#include <vector> +#include <memory> +#include <type_traits> // std::enable_if +#include <istream> // std::basic_istream +#include <jsoncons/json.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/ubjson/ubjson_encoder.hpp> +#include <jsoncons_ext/ubjson/ubjson_reader.hpp> +#include <jsoncons_ext/ubjson/ubjson_cursor.hpp> +#include <jsoncons_ext/ubjson/encode_ubjson.hpp> +#include <jsoncons_ext/ubjson/decode_ubjson.hpp> + +#endif diff --git a/include/jsoncons_ext/ubjson/ubjson_cursor.hpp b/include/jsoncons_ext/ubjson/ubjson_cursor.hpp new file mode 100644 index 0000000..f60825e --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson_cursor.hpp @@ -0,0 +1,307 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_CURSOR_HPP +#define JSONCONS_UBJSON_UBJSON_CURSOR_HPP + +#include <memory> // std::allocator +#include <string> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <ios> +#include <istream> // std::basic_istream +#include <jsoncons/byte_string.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/staj_cursor.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons_ext/ubjson/ubjson_parser.hpp> + +namespace jsoncons { +namespace ubjson { + +template<class Source=jsoncons::binary_stream_source,class Allocator=std::allocator<char>> +class basic_ubjson_cursor : public basic_staj_cursor<char>, private virtual ser_context +{ +public: + using source_type = Source; + using char_type = char; + using allocator_type = Allocator; +private: + basic_ubjson_parser<Source,Allocator> parser_; + basic_staj_visitor<char_type> cursor_visitor_; + bool eof_; + + // Noncopyable and nonmoveable + basic_ubjson_cursor(const basic_ubjson_cursor&) = delete; + basic_ubjson_cursor& operator=(const basic_ubjson_cursor&) = delete; + +public: + using string_view_type = string_view; + + template <class Sourceable> + basic_ubjson_cursor(Sourceable&& source, + const ubjson_decode_options& options = ubjson_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + eof_(false) + { + if (!done()) + { + next(); + } + } + + // Constructors that set parse error codes + + template <class Sourceable> + basic_ubjson_cursor(Sourceable&& source, + std::error_code& ec) + : basic_ubjson_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + ubjson_decode_options(), + ec) + { + } + + template <class Sourceable> + basic_ubjson_cursor(Sourceable&& source, + const ubjson_decode_options& options, + std::error_code& ec) + : basic_ubjson_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + ec) + { + } + + template <class Sourceable> + basic_ubjson_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const ubjson_decode_options& options, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(accept_all), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + void reset() + { + parser_.reset(); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(); + } + } + + void reset(std::error_code& ec) + { + parser_.reset(); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + void reset(Sourceable&& source, std::error_code& ec) + { + parser_.reset(std::forward<Sourceable>(source)); + cursor_visitor_.reset(); + eof_ = false; + if (!done()) + { + next(ec); + } + } + + bool done() const override + { + return parser_.done(); + } + + const staj_event& current() const override + { + return cursor_visitor_.event(); + } + + void read_to(basic_json_visitor<char_type>& visitor) override + { + std::error_code ec; + read_to(visitor, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read_to(basic_json_visitor<char_type>& visitor, + std::error_code& ec) override + { + if (staj_to_saj_event(cursor_visitor_.event(), visitor, *this, ec)) + { + read_next(visitor, ec); + } + } + + void next() override + { + std::error_code ec; + next(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void next(std::error_code& ec) override + { + read_next(ec); + } + + const ser_context& context() const override + { + return *this; + } + + bool eof() const + { + return eof_; + } + + std::size_t line() const override + { + return parser_.line(); + } + + std::size_t column() const override + { + return parser_.column(); + } + + friend + staj_filter_view operator|(basic_ubjson_cursor& cursor, + std::function<bool(const staj_event&, const ser_context&)> pred) + { + return staj_filter_view(cursor, pred); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_ubjson_cursor(Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + const ubjson_decode_options& options = ubjson_decode_options(), + const Allocator& alloc = Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + cursor_visitor_(filter), + eof_(false) + { + if (!done()) + { + next(); + } + } + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_ubjson_cursor(Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + std::error_code& ec) + : basic_ubjson_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), filter, ec) + { + } + + template <class Sourceable> + JSONCONS_DEPRECATED_MSG("Instead, use pipe syntax for filter") + basic_ubjson_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + std::function<bool(const staj_event&, const ser_context&)> filter, + std::error_code& ec) + : parser_(std::forward<Sourceable>(source), alloc), + cursor_visitor_(filter), + eof_(false) + { + if (!done()) + { + next(ec); + } + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<char_type>&)") + void read(basic_json_visitor<char_type>& visitor) + { + read_to(visitor); + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<char_type>&, std::error_code&)") + void read(basic_json_visitor<char_type>& visitor, + std::error_code& ec) + { + read_to(visitor, ec); + } +#endif +private: + static bool accept_all(const staj_event&, const ser_context&) + { + return true; + } + + void read_next(std::error_code& ec) + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(cursor_visitor_, ec); + if (ec) return; + } + } + + void read_next(basic_json_visitor<char_type>& visitor, std::error_code& ec) + { + parser_.restart(); + while (!parser_.stopped()) + { + parser_.parse(visitor, ec); + if (ec) return; + } + } +}; + +using ubjson_stream_cursor = basic_ubjson_cursor<jsoncons::binary_stream_source>; +using ubjson_bytes_cursor = basic_ubjson_cursor<jsoncons::bytes_source>; + +} // namespace ubjson +} // namespace jsoncons + +#endif + diff --git a/include/jsoncons_ext/ubjson/ubjson_encoder.hpp b/include/jsoncons_ext/ubjson/ubjson_encoder.hpp new file mode 100644 index 0000000..2d90e4a --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson_encoder.hpp @@ -0,0 +1,502 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_ENCODER_HPP +#define JSONCONS_UBJSON_UBJSON_ENCODER_HPP + +#include <string> +#include <vector> +#include <limits> // std::numeric_limits +#include <memory> +#include <utility> // std::move +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/sink.hpp> +#include <jsoncons/detail/parse_number.hpp> +#include <jsoncons_ext/ubjson/ubjson_type.hpp> +#include <jsoncons_ext/ubjson/ubjson_error.hpp> +#include <jsoncons_ext/ubjson/ubjson_options.hpp> + +namespace jsoncons { namespace ubjson { + +enum class ubjson_container_type {object, indefinite_length_object, array, indefinite_length_array}; + +template<class Sink=jsoncons::binary_stream_sink,class Allocator=std::allocator<char>> +class basic_ubjson_encoder final : public basic_json_visitor<char> +{ + + enum class decimal_parse_state { start, integer, exp1, exp2, fraction1 }; +public: + using allocator_type = Allocator; + using typename basic_json_visitor<char>::string_view_type; + using sink_type = Sink; + +private: + struct stack_item + { + ubjson_container_type type_; + std::size_t length_; + std::size_t count_; + + stack_item(ubjson_container_type type, std::size_t length = 0) noexcept + : type_(type), length_(length), count_(0) + { + } + + std::size_t length() const + { + return length_; + } + + std::size_t count() const + { + return count_; + } + + bool is_object() const + { + return type_ == ubjson_container_type::object || type_ == ubjson_container_type::indefinite_length_object; + } + + bool is_indefinite_length() const + { + return type_ == ubjson_container_type::indefinite_length_array || type_ == ubjson_container_type::indefinite_length_object; + } + + }; + + Sink sink_; + const ubjson_encode_options options_; + allocator_type alloc_; + + std::vector<stack_item> stack_; + int nesting_depth_; + + // Noncopyable and nonmoveable + basic_ubjson_encoder(const basic_ubjson_encoder&) = delete; + basic_ubjson_encoder& operator=(const basic_ubjson_encoder&) = delete; +public: + basic_ubjson_encoder(Sink&& sink, + const Allocator& alloc = Allocator()) + : basic_ubjson_encoder(std::forward<Sink>(sink), ubjson_encode_options(), alloc) + { + } + + explicit basic_ubjson_encoder(Sink&& sink, + const ubjson_encode_options& options, + const Allocator& alloc = Allocator()) + : sink_(std::forward<Sink>(sink)), + options_(options), + alloc_(alloc), + nesting_depth_(0) + { + } + + void reset() + { + stack_.clear(); + nesting_depth_ = 0; + } + + void reset(Sink&& sink) + { + sink_ = std::move(sink); + reset(); + } + + ~basic_ubjson_encoder() noexcept + { + JSONCONS_TRY + { + sink_.flush(); + } + JSONCONS_CATCH(...) + { + } + } + +private: + // Implementing methods + + void visit_flush() override + { + sink_.flush(); + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = ubjson_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(ubjson_container_type::indefinite_length_object); + sink_.push_back(jsoncons::ubjson::ubjson_type::start_object_marker); + + return true; + } + + bool visit_begin_object(std::size_t length, semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = ubjson_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(ubjson_container_type::object, length); + sink_.push_back(jsoncons::ubjson::ubjson_type::start_object_marker); + sink_.push_back(jsoncons::ubjson::ubjson_type::count_marker); + put_length(length); + + return true; + } + + bool visit_end_object(const ser_context&, std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + --nesting_depth_; + + if (stack_.back().is_indefinite_length()) + { + sink_.push_back(jsoncons::ubjson::ubjson_type::end_object_marker); + } + else + { + if (stack_.back().count() < stack_.back().length()) + { + ec = ubjson_errc::too_few_items; + return false; + } + if (stack_.back().count() > stack_.back().length()) + { + ec = ubjson_errc::too_many_items; + return false; + } + } + stack_.pop_back(); + end_value(); + return true; + } + + bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = ubjson_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(ubjson_container_type::indefinite_length_array); + sink_.push_back(jsoncons::ubjson::ubjson_type::start_array_marker); + + return true; + } + + bool visit_begin_array(std::size_t length, semantic_tag, const ser_context&, std::error_code& ec) override + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = ubjson_errc::max_nesting_depth_exceeded; + return false; + } + stack_.emplace_back(ubjson_container_type::array, length); + sink_.push_back(jsoncons::ubjson::ubjson_type::start_array_marker); + sink_.push_back(jsoncons::ubjson::ubjson_type::count_marker); + put_length(length); + + return true; + } + + bool visit_end_array(const ser_context&, std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + --nesting_depth_; + + if (stack_.back().is_indefinite_length()) + { + sink_.push_back(jsoncons::ubjson::ubjson_type::end_array_marker); + } + else + { + if (stack_.back().count() < stack_.back().length()) + { + ec = ubjson_errc::too_few_items; + return false; + } + if (stack_.back().count() > stack_.back().length()) + { + ec = ubjson_errc::too_many_items; + return false; + } + } + stack_.pop_back(); + end_value(); + return true; + } + + bool visit_key(const string_view_type& name, const ser_context&, std::error_code& ec) override + { + auto sink = unicode_traits::validate(name.data(), name.size()); + if (sink.ec != unicode_traits::conv_errc()) + { + ec = ubjson_errc::invalid_utf8_text_string; + return false; + } + + put_length(name.length()); + + for (auto c : name) + { + sink_.push_back(c); + } + return true; + } + + bool visit_null(semantic_tag, const ser_context&, std::error_code&) override + { + // nil + binary::native_to_big(static_cast<uint8_t>(jsoncons::ubjson::ubjson_type::null_type), std::back_inserter(sink_)); + end_value(); + return true; + } + + bool visit_string(const string_view_type& sv, semantic_tag tag, const ser_context&, std::error_code& ec) override + { + switch (tag) + { + case semantic_tag::bigint: + case semantic_tag::bigdec: + { + sink_.push_back(jsoncons::ubjson::ubjson_type::high_precision_number_type); + break; + } + default: + { + sink_.push_back(jsoncons::ubjson::ubjson_type::string_type); + break; + } + } + + auto sink = unicode_traits::validate(sv.data(), sv.size()); + if (sink.ec != unicode_traits::conv_errc()) + { + ec = ubjson_errc::invalid_utf8_text_string; + return false; + } + + put_length(sv.length()); + + for (auto c : sv) + { + sink_.push_back(c); + } + + end_value(); + return true; + } + + void put_length(std::size_t length) + { + if (length <= (std::numeric_limits<uint8_t>::max)()) + { + sink_.push_back(ubjson_type::uint8_type); + binary::native_to_big(static_cast<uint8_t>(length), std::back_inserter(sink_)); + } + else if (length <= (std::size_t)(std::numeric_limits<int16_t>::max)()) + { + sink_.push_back(ubjson_type::int16_type); + binary::native_to_big(static_cast<uint16_t>(length), std::back_inserter(sink_)); + } + else if (length <= (std::size_t)(std::numeric_limits<int32_t>::max)()) + { + sink_.push_back(ubjson_type::int32_type); + binary::native_to_big(static_cast<uint32_t>(length),std::back_inserter(sink_)); + } + else if (length <= (std::size_t)(std::numeric_limits<int64_t>::max)()) + { + sink_.push_back(ubjson_type::int64_type); + binary::native_to_big(static_cast<uint64_t>(length),std::back_inserter(sink_)); + } + else + { + JSONCONS_THROW(ser_error(ubjson_errc::too_many_items)); + } + } + + bool visit_byte_string(const byte_string_view& b, + semantic_tag, + const ser_context&, + std::error_code&) override + { + + const size_t length = b.size(); + sink_.push_back(jsoncons::ubjson::ubjson_type::start_array_marker); + binary::native_to_big(static_cast<uint8_t>(jsoncons::ubjson::ubjson_type::type_marker), std::back_inserter(sink_)); + binary::native_to_big(static_cast<uint8_t>(jsoncons::ubjson::ubjson_type::uint8_type), std::back_inserter(sink_)); + put_length(length); + + for (auto c : b) + { + sink_.push_back(c); + } + + end_value(); + return true; + } + + bool visit_double(double val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + float valf = (float)val; + if ((double)valf == val) + { + // float 32 + sink_.push_back(static_cast<uint8_t>(jsoncons::ubjson::ubjson_type::float32_type)); + binary::native_to_big(valf,std::back_inserter(sink_)); + } + else + { + // float 64 + sink_.push_back(static_cast<uint8_t>(jsoncons::ubjson::ubjson_type::float64_type)); + binary::native_to_big(val,std::back_inserter(sink_)); + } + + // write double + + end_value(); + return true; + } + + bool visit_int64(int64_t val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + if (val >= 0) + { + if (val <= (std::numeric_limits<uint8_t>::max)()) + { + // uint 8 stores a 8-bit unsigned integer + sink_.push_back(jsoncons::ubjson::ubjson_type::uint8_type); + binary::native_to_big(static_cast<uint8_t>(val),std::back_inserter(sink_)); + } + else if (val <= (std::numeric_limits<int16_t>::max)()) + { + // uint 16 stores a 16-bit big-endian unsigned integer + sink_.push_back(jsoncons::ubjson::ubjson_type::int16_type); + binary::native_to_big(static_cast<int16_t>(val),std::back_inserter(sink_)); + } + else if (val <= (std::numeric_limits<int32_t>::max)()) + { + // uint 32 stores a 32-bit big-endian unsigned integer + sink_.push_back(jsoncons::ubjson::ubjson_type::int32_type); + binary::native_to_big(static_cast<int32_t>(val),std::back_inserter(sink_)); + } + else if (val <= (std::numeric_limits<int64_t>::max)()) + { + // int 64 stores a 64-bit big-endian signed integer + sink_.push_back(jsoncons::ubjson::ubjson_type::int64_type); + binary::native_to_big(static_cast<int64_t>(val),std::back_inserter(sink_)); + } + else + { + // big integer + } + } + else + { + if (val >= (std::numeric_limits<int8_t>::lowest)()) + { + // int 8 stores a 8-bit signed integer + sink_.push_back(jsoncons::ubjson::ubjson_type::int8_type); + binary::native_to_big(static_cast<int8_t>(val),std::back_inserter(sink_)); + } + else if (val >= (std::numeric_limits<int16_t>::lowest)()) + { + // int 16 stores a 16-bit big-endian signed integer + sink_.push_back(jsoncons::ubjson::ubjson_type::int16_type); + binary::native_to_big(static_cast<int16_t>(val),std::back_inserter(sink_)); + } + else if (val >= (std::numeric_limits<int32_t>::lowest)()) + { + // int 32 stores a 32-bit big-endian signed integer + sink_.push_back(jsoncons::ubjson::ubjson_type::int32_type); + binary::native_to_big(static_cast<int32_t>(val),std::back_inserter(sink_)); + } + else if (val >= (std::numeric_limits<int64_t>::lowest)()) + { + // int 64 stores a 64-bit big-endian signed integer + sink_.push_back(jsoncons::ubjson::ubjson_type::int64_type); + binary::native_to_big(static_cast<int64_t>(val),std::back_inserter(sink_)); + } + } + end_value(); + return true; + } + + bool visit_uint64(uint64_t val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + if (val <= (std::numeric_limits<uint8_t>::max)()) + { + sink_.push_back(jsoncons::ubjson::ubjson_type::uint8_type); + binary::native_to_big(static_cast<uint8_t>(val),std::back_inserter(sink_)); + } + else if (val <= static_cast<uint64_t>((std::numeric_limits<int16_t>::max)())) + { + sink_.push_back(jsoncons::ubjson::ubjson_type::int16_type); + binary::native_to_big(static_cast<int16_t>(val),std::back_inserter(sink_)); + } + else if (val <= static_cast<uint64_t>((std::numeric_limits<int32_t>::max)())) + { + sink_.push_back(jsoncons::ubjson::ubjson_type::int32_type); + binary::native_to_big(static_cast<int32_t>(val),std::back_inserter(sink_)); + } + else if (val <= static_cast<uint64_t>((std::numeric_limits<int64_t>::max)())) + { + sink_.push_back(jsoncons::ubjson::ubjson_type::int64_type); + binary::native_to_big(static_cast<int64_t>(val),std::back_inserter(sink_)); + } + end_value(); + return true; + } + + bool visit_bool(bool val, semantic_tag, const ser_context&, std::error_code&) override + { + // true and false + sink_.push_back(static_cast<uint8_t>(val ? jsoncons::ubjson::ubjson_type::true_type : jsoncons::ubjson::ubjson_type::false_type)); + + end_value(); + return true; + } + + void end_value() + { + if (!stack_.empty()) + { + ++stack_.back().count_; + } + } +}; + +using ubjson_stream_encoder = basic_ubjson_encoder<jsoncons::binary_stream_sink>; +using ubjson_bytes_encoder = basic_ubjson_encoder<jsoncons::bytes_sink<std::vector<uint8_t>>>; + +#if !defined(JSONCONS_NO_DEPRECATED) +template<class Sink=jsoncons::binary_stream_sink> +using basic_ubjson_serializer = basic_ubjson_encoder<Sink>; + +JSONCONS_DEPRECATED_MSG("Instead, use ubjson_stream_encoder") typedef ubjson_stream_encoder ubjson_encoder; +JSONCONS_DEPRECATED_MSG("Instead, use ubjson_stream_encoder") typedef ubjson_stream_encoder ubjson_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use ubjson_bytes_encoder") typedef ubjson_bytes_encoder ubjson_buffer_serializer; +#endif + +}} +#endif diff --git a/include/jsoncons_ext/ubjson/ubjson_error.hpp b/include/jsoncons_ext/ubjson/ubjson_error.hpp new file mode 100644 index 0000000..cc7f5b8 --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson_error.hpp @@ -0,0 +1,100 @@ +/// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_ERROR_HPP +#define JSONCONS_UBJSON_UBJSON_ERROR_HPP + +#include <system_error> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace ubjson { + +enum class ubjson_errc +{ + success = 0, + unexpected_eof = 1, + source_error, + count_required_after_type, + length_is_negative, + length_must_be_integer, + unknown_type, + invalid_utf8_text_string, + too_many_items, + too_few_items, + number_too_large, + max_nesting_depth_exceeded, + key_expected, + max_items_exceeded +}; + +class ubjson_error_category_impl + : public std::error_category +{ +public: + const char* name() const noexcept override + { + return "jsoncons/ubjson"; + } + std::string message(int ev) const override + { + switch (static_cast<ubjson_errc>(ev)) + { + case ubjson_errc::unexpected_eof: + return "Unexpected end of file"; + case ubjson_errc::source_error: + return "Source error"; + case ubjson_errc::count_required_after_type: + return "Type is specified for container, but count is not specified"; + case ubjson_errc::length_is_negative: + return "Request for the length of an array, map or string returned a negative result"; + case ubjson_errc::length_must_be_integer: + return "Length must be a integer numeric type (int8, uint8, int16, int32, int64)"; + case ubjson_errc::unknown_type: + return "Unknown type"; + case ubjson_errc::invalid_utf8_text_string: + return "Illegal UTF-8 encoding in text string"; + case ubjson_errc::too_many_items: + return "Too many items were added to a UBJSON object or array of known length"; + case ubjson_errc::too_few_items: + return "Too few items were added to a UBJSON object or array of known length"; + case ubjson_errc::number_too_large: + return "Number exceeds implementation limits"; + case ubjson_errc::max_nesting_depth_exceeded: + return "Data item nesting exceeds limit in options"; + case ubjson_errc::key_expected: + return "Text string key in a map expected"; + case ubjson_errc::max_items_exceeded: + return "Number of items in UBJSON object or array exceeds limit set in options"; + default: + return "Unknown UBJSON parser error"; + } + } +}; + +inline +const std::error_category& ubjson_error_category() +{ + static ubjson_error_category_impl instance; + return instance; +} + +inline +std::error_code make_error_code(ubjson_errc e) +{ + return std::error_code(static_cast<int>(e),ubjson_error_category()); +} + + +}} + +namespace std { + template<> + struct is_error_code_enum<jsoncons::ubjson::ubjson_errc> : public true_type + { + }; +} + +#endif diff --git a/include/jsoncons_ext/ubjson/ubjson_options.hpp b/include/jsoncons_ext/ubjson/ubjson_options.hpp new file mode 100644 index 0000000..1498d50 --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson_options.hpp @@ -0,0 +1,87 @@ +// Copyright 2019 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_OPTIONS_HPP +#define JSONCONS_UBJSON_UBJSON_OPTIONS_HPP + +#include <string> +#include <limits> // std::numeric_limits +#include <cwchar> +#include <jsoncons/json_exception.hpp> + +namespace jsoncons { namespace ubjson { + +class ubjson_options; + +class ubjson_options_common +{ + friend class ubjson_options; + + int max_nesting_depth_; +protected: + virtual ~ubjson_options_common() = default; + + ubjson_options_common() + : max_nesting_depth_(1024) + { + } + + ubjson_options_common(const ubjson_options_common&) = default; + ubjson_options_common& operator=(const ubjson_options_common&) = default; + ubjson_options_common(ubjson_options_common&&) = default; + ubjson_options_common& operator=(ubjson_options_common&&) = default; +public: + int max_nesting_depth() const + { + return max_nesting_depth_; + } +}; + +class ubjson_decode_options : public virtual ubjson_options_common +{ + friend class ubjson_options; + std::size_t max_items_; +public: + ubjson_decode_options() : + max_items_(1 << 24) + { + } + + std::size_t max_items() const + { + return max_items_; + } +}; + +class ubjson_encode_options : public virtual ubjson_options_common +{ + friend class ubjson_options; +public: + ubjson_encode_options() + { + } +}; + +class ubjson_options final : public ubjson_decode_options, public ubjson_encode_options +{ +public: + using ubjson_options_common::max_nesting_depth; + + ubjson_options& max_nesting_depth(int value) + { + this->max_nesting_depth_ = value; + return *this; + } + + ubjson_options& max_items(std::size_t value) + { + this->max_items_ = value; + return *this; + } +}; + +}} +#endif diff --git a/include/jsoncons_ext/ubjson/ubjson_parser.hpp b/include/jsoncons_ext/ubjson/ubjson_parser.hpp new file mode 100644 index 0000000..468f139 --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson_parser.hpp @@ -0,0 +1,880 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_PARSER_HPP +#define JSONCONS_UBJSON_UBJSON_PARSER_HPP + +#include <string> +#include <memory> +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/ubjson/ubjson_type.hpp> +#include <jsoncons_ext/ubjson/ubjson_error.hpp> +#include <jsoncons_ext/ubjson/ubjson_options.hpp> + +namespace jsoncons { namespace ubjson { + +enum class parse_mode {root,accept,array,indefinite_array,strongly_typed_array,map_key,map_value,strongly_typed_map_key,strongly_typed_map_value,indefinite_map_key,indefinite_map_value}; + +struct parse_state +{ + parse_mode mode; + std::size_t length; + uint8_t type; + std::size_t index; + + parse_state(parse_mode mode, std::size_t length, uint8_t type = 0) noexcept + : mode(mode), length(length), type(type), index(0) + { + } + + parse_state(const parse_state&) = default; + parse_state(parse_state&&) = default; +}; + +template <class Source,class Allocator=std::allocator<char>> +class basic_ubjson_parser : public ser_context +{ + using char_type = char; + using char_traits_type = std::char_traits<char>; + using temp_allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<char_type>; + using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>; + using parse_state_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_state>; + + Source source_; + ubjson_decode_options options_; + bool more_; + bool done_; + std::basic_string<char,std::char_traits<char>,char_allocator_type> text_buffer_; + std::vector<parse_state,parse_state_allocator_type> state_stack_; + int nesting_depth_; +public: + template <class Sourceable> + basic_ubjson_parser(Sourceable&& source, + const ubjson_decode_options& options = ubjson_decode_options(), + const Allocator alloc = Allocator()) + : source_(std::forward<Sourceable>(source)), + options_(options), + more_(true), + done_(false), + text_buffer_(alloc), + state_stack_(alloc), + nesting_depth_(0) + { + state_stack_.emplace_back(parse_mode::root,0); + } + + void restart() + { + more_ = true; + } + + void reset() + { + more_ = true; + done_ = false; + text_buffer_.clear(); + state_stack_.clear(); + state_stack_.emplace_back(parse_mode::root,0,0); + nesting_depth_ = 0; + } + + template <class Sourceable> + void reset(Sourceable&& source) + { + source_ = std::forward<Sourceable>(source); + reset(); + } + + bool done() const + { + return done_; + } + + bool stopped() const + { + return !more_; + } + + std::size_t line() const override + { + return 0; + } + + std::size_t column() const override + { + return source_.position(); + } + + void parse(json_visitor& visitor, std::error_code& ec) + { + while (!done_ && more_) + { + switch (state_stack_.back().mode) + { + case parse_mode::array: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + } + else + { + end_array(visitor, ec); + } + break; + } + case parse_mode::strongly_typed_array: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_value(visitor, state_stack_.back().type, ec); + if (ec) + { + return; + } + } + else + { + end_array(visitor, ec); + } + break; + } + case parse_mode::indefinite_array: + { + auto c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::end_array_marker) + { + source_.ignore(1); + end_array(visitor, ec); + if (ec) + { + return; + } + } + else + { + if (++state_stack_.back().index > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + } + break; + } + case parse_mode::map_key: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_key(visitor, ec); + if (ec) + { + return; + } + state_stack_.back().mode = parse_mode::map_value; + } + else + { + end_object(visitor, ec); + } + break; + } + case parse_mode::map_value: + { + state_stack_.back().mode = parse_mode::map_key; + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::strongly_typed_map_key: + { + if (state_stack_.back().index < state_stack_.back().length) + { + ++state_stack_.back().index; + read_key(visitor, ec); + if (ec) + { + return; + } + state_stack_.back().mode = parse_mode::strongly_typed_map_value; + } + else + { + end_object(visitor, ec); + } + break; + } + case parse_mode::strongly_typed_map_value: + { + state_stack_.back().mode = parse_mode::strongly_typed_map_key; + read_value(visitor, state_stack_.back().type, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::indefinite_map_key: + { + auto c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::end_object_marker) + { + source_.ignore(1); + end_object(visitor, ec); + if (ec) + { + return; + } + } + else + { + if (++state_stack_.back().index > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + read_key(visitor, ec); + if (ec) + { + return; + } + state_stack_.back().mode = parse_mode::indefinite_map_value; + } + break; + } + case parse_mode::indefinite_map_value: + { + state_stack_.back().mode = parse_mode::indefinite_map_key; + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::root: + { + state_stack_.back().mode = parse_mode::accept; + read_type_and_value(visitor, ec); + if (ec) + { + return; + } + break; + } + case parse_mode::accept: + { + JSONCONS_ASSERT(state_stack_.size() == 1); + state_stack_.clear(); + more_ = false; + done_ = true; + visitor.flush(); + break; + } + } + } + } +private: + void read_type_and_value(json_visitor& visitor, std::error_code& ec) + { + if (source_.is_error()) + { + ec = ubjson_errc::source_error; + more_ = false; + return; + } + + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + read_value(visitor, b, ec); + } + + void read_value(json_visitor& visitor, uint8_t type, std::error_code& ec) + { + switch (type) + { + case jsoncons::ubjson::ubjson_type::null_type: + { + more_ = visitor.null_value(semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::no_op_type: + { + break; + } + case jsoncons::ubjson::ubjson_type::true_type: + { + more_ = visitor.bool_value(true, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::false_type: + { + more_ = visitor.bool_value(false, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::int8_type: + { + uint8_t buf[sizeof(int8_t)]; + if (source_.read(buf, sizeof(int8_t)) != sizeof(int8_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + int8_t val = binary::big_to_native<int8_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::uint8_type: + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + more_ = visitor.uint64_value(b, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::int16_type: + { + uint8_t buf[sizeof(int16_t)]; + if (source_.read(buf, sizeof(int16_t)) != sizeof(int16_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + int16_t val = binary::big_to_native<int16_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::int32_type: + { + uint8_t buf[sizeof(int32_t)]; + if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + int32_t val = binary::big_to_native<int32_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::int64_type: + { + uint8_t buf[sizeof(int64_t)]; + if (source_.read(buf, sizeof(int64_t)) != sizeof(int64_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + int64_t val = binary::big_to_native<int64_t>(buf, sizeof(buf)); + more_ = visitor.int64_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::float32_type: + { + uint8_t buf[sizeof(float)]; + if (source_.read(buf, sizeof(float)) != sizeof(float)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + float val = binary::big_to_native<float>(buf, sizeof(buf)); + more_ = visitor.double_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::float64_type: + { + uint8_t buf[sizeof(double)]; + if (source_.read(buf, sizeof(double)) != sizeof(double)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + double val = binary::big_to_native<double>(buf, sizeof(buf)); + more_ = visitor.double_value(val, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::char_type: + { + text_buffer_.clear(); + if (source_reader<Source>::read(source_,text_buffer_,1) != 1) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = ubjson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(text_buffer_, semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::string_type: + { + std::size_t length = get_length(ec); + if (ec) + { + return; + } + text_buffer_.clear(); + if (source_reader<Source>::read(source_,text_buffer_,length) != length) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = ubjson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.string_value(jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()), semantic_tag::none, *this, ec); + break; + } + case jsoncons::ubjson::ubjson_type::high_precision_number_type: + { + std::size_t length = get_length(ec); + if (ec) + { + return; + } + text_buffer_.clear(); + if (source_reader<Source>::read(source_,text_buffer_,length) != length) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (jsoncons::detail::is_base10(text_buffer_.data(),text_buffer_.length())) + { + more_ = visitor.string_value(jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()), semantic_tag::bigint, *this, ec); + } + else + { + more_ = visitor.string_value(jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()), semantic_tag::bigdec, *this, ec); + } + break; + } + case jsoncons::ubjson::ubjson_type::start_array_marker: + { + begin_array(visitor,ec); + break; + } + case jsoncons::ubjson::ubjson_type::start_object_marker: + { + begin_object(visitor, ec); + break; + } + default: + { + ec = ubjson_errc::unknown_type; + break; + } + } + if (ec) + { + more_ = false; + } + } + + void begin_array(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = ubjson_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + + auto c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::type_marker) + { + source_.ignore(1); + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::count_marker) + { + source_.ignore(1); + std::size_t length = get_length(ec); + if (ec) + { + return; + } + if (length > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + state_stack_.emplace_back(parse_mode::strongly_typed_array,length,b); + more_ = visitor.begin_array(length, semantic_tag::none, *this, ec); + } + else + { + ec = ubjson_errc::count_required_after_type; + more_ = false; + return; + } + } + else if (c.value == jsoncons::ubjson::ubjson_type::count_marker) + { + source_.ignore(1); + std::size_t length = get_length(ec); + if (ec) + { + return; + } + if (length > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + state_stack_.emplace_back(parse_mode::array,length); + more_ = visitor.begin_array(length, semantic_tag::none, *this, ec); + } + else + { + state_stack_.emplace_back(parse_mode::indefinite_array,0); + more_ = visitor.begin_array(semantic_tag::none, *this, ec); + } + } + + void end_array(json_visitor& visitor, std::error_code& ec) + { + --nesting_depth_; + + more_ = visitor.end_array(*this, ec); + state_stack_.pop_back(); + } + + void begin_object(json_visitor& visitor, std::error_code& ec) + { + if (JSONCONS_UNLIKELY(++nesting_depth_ > options_.max_nesting_depth())) + { + ec = ubjson_errc::max_nesting_depth_exceeded; + more_ = false; + return; + } + + auto c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::type_marker) + { + source_.ignore(1); + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::count_marker) + { + source_.ignore(1); + std::size_t length = get_length(ec); + if (ec) + { + return; + } + if (length > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + state_stack_.emplace_back(parse_mode::strongly_typed_map_key,length,b); + more_ = visitor.begin_object(length, semantic_tag::none, *this, ec); + } + else + { + ec = ubjson_errc::count_required_after_type; + more_ = false; + return; + } + } + else + { + c = source_.peek(); + if (c.eof) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + if (c.value == jsoncons::ubjson::ubjson_type::count_marker) + { + source_.ignore(1); + std::size_t length = get_length(ec); + if (ec) + { + return; + } + if (length > options_.max_items()) + { + ec = ubjson_errc::max_items_exceeded; + more_ = false; + return; + } + state_stack_.emplace_back(parse_mode::map_key,length); + more_ = visitor.begin_object(length, semantic_tag::none, *this, ec); + } + else + { + state_stack_.emplace_back(parse_mode::indefinite_map_key,0); + more_ = visitor.begin_object(semantic_tag::none, *this, ec); + } + } + } + + void end_object(json_visitor& visitor, std::error_code& ec) + { + --nesting_depth_; + more_ = visitor.end_object(*this, ec); + state_stack_.pop_back(); + } + + std::size_t get_length(std::error_code& ec) + { + std::size_t length = 0; + uint8_t type; + if (source_.read(&type, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + switch (type) + { + case jsoncons::ubjson::ubjson_type::int8_type: + { + uint8_t buf[sizeof(int8_t)]; + if (source_.read(buf, sizeof(int8_t)) != sizeof(int8_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + int8_t val = binary::big_to_native<int8_t>(buf, sizeof(buf)); + if (val >= 0) + { + length = val; + } + else + { + ec = ubjson_errc::length_is_negative; + more_ = false; + return length; + } + break; + } + case jsoncons::ubjson::ubjson_type::uint8_type: + { + uint8_t b; + if (source_.read(&b, 1) == 0) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + length = b; + break; + } + case jsoncons::ubjson::ubjson_type::int16_type: + { + uint8_t buf[sizeof(int16_t)]; + if (source_.read(buf, sizeof(int16_t)) != sizeof(int16_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + int16_t val = binary::big_to_native<int16_t>(buf, sizeof(buf)); + if (val >= 0) + { + length = val; + } + else + { + ec = ubjson_errc::length_is_negative; + more_ = false; + return length; + } + break; + } + case jsoncons::ubjson::ubjson_type::int32_type: + { + uint8_t buf[sizeof(int32_t)]; + if (source_.read(buf, sizeof(int32_t)) != sizeof(int32_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + int32_t val = binary::big_to_native<int32_t>(buf, sizeof(buf)); + if (val >= 0) + { + length = static_cast<std::size_t>(val); + } + else + { + ec = ubjson_errc::length_is_negative; + more_ = false; + return length; + } + break; + } + case jsoncons::ubjson::ubjson_type::int64_type: + { + uint8_t buf[sizeof(int64_t)]; + if (source_.read(buf, sizeof(int64_t)) != sizeof(int64_t)) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return length; + } + int64_t val = binary::big_to_native<int64_t>(buf, sizeof(buf)); + if (val >= 0) + { + length = (std::size_t)val; + if (length != (uint64_t)val) + { + ec = ubjson_errc::number_too_large; + more_ = false; + return length; + } + } + else + { + ec = ubjson_errc::length_is_negative; + more_ = false; + return length; + } + break; + } + default: + { + ec = ubjson_errc::length_must_be_integer; + more_ = false; + return length; + } + } + return length; + } + + void read_key(json_visitor& visitor, std::error_code& ec) + { + std::size_t length = get_length(ec); + if (ec) + { + ec = ubjson_errc::key_expected; + more_ = false; + return; + } + text_buffer_.clear(); + if (source_reader<Source>::read(source_,text_buffer_,length) != length) + { + ec = ubjson_errc::unexpected_eof; + more_ = false; + return; + } + + auto result = unicode_traits::validate(text_buffer_.data(),text_buffer_.size()); + if (result.ec != unicode_traits::conv_errc()) + { + ec = ubjson_errc::invalid_utf8_text_string; + more_ = false; + return; + } + more_ = visitor.key(jsoncons::basic_string_view<char>(text_buffer_.data(),text_buffer_.length()), *this, ec); + } +}; + +}} + +#endif diff --git a/include/jsoncons_ext/ubjson/ubjson_reader.hpp b/include/jsoncons_ext/ubjson/ubjson_reader.hpp new file mode 100644 index 0000000..210403a --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson_reader.hpp @@ -0,0 +1,92 @@ +// Copyright 2017 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_READER_HPP +#define JSONCONS_UBJSON_UBJSON_READER_HPP + +#include <string> +#include <memory> +#include <utility> // std::move +#include <jsoncons/json.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons_ext/ubjson/ubjson_type.hpp> +#include <jsoncons_ext/ubjson/ubjson_error.hpp> +#include <jsoncons_ext/ubjson/ubjson_parser.hpp> + +namespace jsoncons { namespace ubjson { + +template <class Source,class Allocator=std::allocator<char>> +class basic_ubjson_reader +{ + basic_ubjson_parser<Source,Allocator> parser_; + json_visitor& visitor_; +public: + template <class Sourceable> + basic_ubjson_reader(Sourceable&& source, + json_visitor& visitor, + const Allocator alloc) + : basic_ubjson_reader(std::forward<Sourceable>(source), + visitor, + ubjson_decode_options(), + alloc) + { + } + + template <class Sourceable> + basic_ubjson_reader(Sourceable&& source, + json_visitor& visitor, + const ubjson_decode_options& options = ubjson_decode_options(), + const Allocator alloc=Allocator()) + : parser_(std::forward<Sourceable>(source), options, alloc), + visitor_(visitor) + { + } + + void read() + { + std::error_code ec; + read(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,line(),column())); + } + } + + void read(std::error_code& ec) + { + parser_.reset(); + parser_.parse(visitor_, ec); + if (ec) + { + return; + } + } + + std::size_t line() const + { + return parser_.line(); + } + + std::size_t column() const + { + return parser_.column(); + } +}; + +using ubjson_stream_reader = basic_ubjson_reader<jsoncons::binary_stream_source>; + +using ubjson_bytes_reader = basic_ubjson_reader<jsoncons::bytes_source>; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use ubjson_stream_reader") typedef ubjson_stream_reader ubjson_reader; +JSONCONS_DEPRECATED_MSG("Instead, use ubjson_bytes_reader") typedef ubjson_bytes_reader ubjson_buffer_reader; +#endif + +}} + +#endif diff --git a/include/jsoncons_ext/ubjson/ubjson_type.hpp b/include/jsoncons_ext/ubjson/ubjson_type.hpp new file mode 100644 index 0000000..ef219ce --- /dev/null +++ b/include/jsoncons_ext/ubjson/ubjson_type.hpp @@ -0,0 +1,43 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_UBJSON_UBJSON_TYPE_HPP +#define JSONCONS_UBJSON_UBJSON_TYPE_HPP + +#include <string> +#include <memory> +#include <jsoncons/config/jsoncons_config.hpp> + +namespace jsoncons { namespace ubjson { + + namespace ubjson_type + { + const uint8_t null_type = 'Z'; + const uint8_t no_op_type = 'N'; + const uint8_t true_type = 'T'; + const uint8_t false_type = 'F'; + const uint8_t int8_type = 'i'; + const uint8_t uint8_type = 'U'; + const uint8_t int16_type = 'I'; + const uint8_t int32_type = 'l'; + const uint8_t int64_type = 'L'; + const uint8_t float32_type = 'd'; + const uint8_t float64_type = 'D'; + const uint8_t high_precision_number_type = 'H'; + const uint8_t char_type = 'C'; + const uint8_t string_type = 'S'; + const uint8_t start_array_marker = '['; + const uint8_t end_array_marker = ']'; + const uint8_t start_object_marker = '{'; + const uint8_t end_object_marker = '}'; + const uint8_t type_marker = '$'; + const uint8_t count_marker = '#'; + } + +} // namespace ubjson +} // namespace jsoncons + +#endif |