diff options
Diffstat (limited to 'include/jsoncons_ext/csv')
-rw-r--r-- | include/jsoncons_ext/csv/csv.hpp | 17 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/csv_cursor.hpp | 358 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/csv_encoder.hpp | 954 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/csv_error.hpp | 85 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/csv_options.hpp | 973 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/csv_parser.hpp | 2097 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/csv_reader.hpp | 348 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/csv_serializer.hpp | 12 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/decode_csv.hpp | 208 | ||||
-rw-r--r-- | include/jsoncons_ext/csv/encode_csv.hpp | 122 |
10 files changed, 5174 insertions, 0 deletions
diff --git a/include/jsoncons_ext/csv/csv.hpp b/include/jsoncons_ext/csv/csv.hpp new file mode 100644 index 0000000..9f8a9c5 --- /dev/null +++ b/include/jsoncons_ext/csv/csv.hpp @@ -0,0 +1,17 @@ +/// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_HPP +#define JSONCONS_CSV_CSV_HPP + +#include <jsoncons_ext/csv/csv_options.hpp> +#include <jsoncons_ext/csv/csv_reader.hpp> +#include <jsoncons_ext/csv/csv_encoder.hpp> +#include <jsoncons_ext/csv/csv_cursor.hpp> +#include <jsoncons_ext/csv/decode_csv.hpp> +#include <jsoncons_ext/csv/encode_csv.hpp> + +#endif diff --git a/include/jsoncons_ext/csv/csv_cursor.hpp b/include/jsoncons_ext/csv/csv_cursor.hpp new file mode 100644 index 0000000..67d55a6 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_cursor.hpp @@ -0,0 +1,358 @@ +// Copyright 2018 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_CURSOR_HPP +#define JSONCONS_CSV_CSV_CURSOR_HPP + +#include <memory> // std::allocator +#include <string> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <ios> +#include <istream> // std::basic_istream +#include <jsoncons/byte_string.hpp> +#include <jsoncons/config/jsoncons_config.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons_ext/csv/csv_parser.hpp> +#include <jsoncons/staj_cursor.hpp> +#include <jsoncons/source.hpp> +#include <jsoncons/source_adaptor.hpp> + +namespace jsoncons { namespace csv { + +template<class CharT,class Source=jsoncons::stream_source<CharT>,class Allocator=std::allocator<char>> +class basic_csv_cursor : public basic_staj_cursor<CharT>, private virtual ser_context +{ +public: + using source_type = Source; + using char_type = CharT; + using allocator_type = Allocator; +private: + static constexpr size_t default_max_buffer_size = 16384; + + typedef typename std::allocator_traits<allocator_type>:: template rebind_alloc<CharT> char_allocator_type; + + text_source_adaptor<Source> source_; + basic_csv_parser<CharT,Allocator> parser_; + basic_staj_visitor<CharT> cursor_visitor_; + + // Noncopyable and nonmoveable + basic_csv_cursor(const basic_csv_cursor&) = delete; + basic_csv_cursor& operator=(const basic_csv_cursor&) = delete; + +public: + using string_view_type = jsoncons::basic_string_view<CharT>; + + // Constructors that throw parse exceptions + + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>(), + std::function<bool(csv_errc,const ser_context&)> err_handler = default_csv_parsing(), + const Allocator& alloc = Allocator(), + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(std::forward<Sourceable>(source)), + parser_(options,err_handler,alloc), + cursor_visitor_(accept_all) + { + if (!done()) + { + next(); + } + } + + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>(), + std::function<bool(csv_errc,const ser_context&)> err_handler = default_csv_parsing(), + const Allocator& alloc = Allocator(), + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(), + parser_(options,err_handler,alloc), + cursor_visitor_(accept_all) + { + jsoncons::basic_string_view<CharT> sv(std::forward<Sourceable>(source)); + initialize_with_string_view(sv); + } + + + // Constructors that set parse error codes + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + std::error_code& ec) + : basic_csv_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + basic_csv_decode_options<CharT>(), + default_csv_parsing(), + ec) + { + } + + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + const basic_csv_decode_options<CharT>& options, + std::error_code& ec) + : basic_csv_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + default_csv_parsing(), + ec) + { + } + + template <class Sourceable> + basic_csv_cursor(Sourceable&& source, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + std::error_code& ec) + : basic_csv_cursor(std::allocator_arg, Allocator(), + std::forward<Sourceable>(source), + options, + err_handler, + ec) + { + } + + template <class Sourceable> + basic_csv_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + std::error_code& ec, + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(std::forward<Sourceable>(source)), + parser_(options,err_handler,alloc), + cursor_visitor_(accept_all) + { + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + basic_csv_cursor(std::allocator_arg_t, const Allocator& alloc, + Sourceable&& source, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + std::error_code& ec, + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(), + parser_(options,err_handler,alloc), + cursor_visitor_(accept_all) + { + jsoncons::basic_string_view<CharT> sv(std::forward<Sourceable>(source)); + initialize_with_string_view(sv, ec); + } + + template <class Sourceable> + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type + reset(Sourceable&& source) + { + source_ = std::forward<Sourceable>(source); + parser_.reinitialize(); + cursor_visitor_.reset(); + if (!done()) + { + next(); + } + } + + template <class Sourceable> + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type + reset(Sourceable&& source) + { + source_ = {}; + parser_.reinitialize(); + cursor_visitor_.reset(); + initialize_with_string_view(std::forward<Sourceable>(source)); + } + + template <class Sourceable> + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type + reset(Sourceable&& source, std::error_code& ec) + { + source_ = std::forward<Sourceable>(source); + parser_.reinitialize(); + cursor_visitor_.reset(); + if (!done()) + { + next(ec); + } + } + + template <class Sourceable> + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type + reset(Sourceable&& source, std::error_code& ec) + { + source_ = {}; + parser_.reinitialize(); + initialize_with_string_view(std::forward<Sourceable>(source), ec); + } + + bool done() const override + { + return parser_.done(); + } + + const basic_staj_event<CharT>& current() const override + { + return cursor_visitor_.event(); + } + + void read_to(basic_json_visitor<CharT>& visitor) override + { + std::error_code ec; + read_to(visitor, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read_to(basic_json_visitor<CharT>& visitor, + std::error_code& ec) override + { + if (staj_to_saj_event(cursor_visitor_.event(), visitor, *this, ec)) + { + read_next(visitor, ec); + } + } + + void next() override + { + std::error_code ec; + next(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void next(std::error_code& ec) override + { + read_next(ec); + } + + static bool accept_all(const basic_staj_event<CharT>&, const ser_context&) + { + return true; + } + + const ser_context& context() const override + { + return *this; + } + + bool eof() const + { + return parser_.source_exhausted() && source_.eof(); + } + + std::size_t line() const override + { + return parser_.line(); + } + + std::size_t column() const override + { + return parser_.column(); + } + + friend + basic_staj_filter_view<CharT> operator|(basic_csv_cursor& cursor, + std::function<bool(const basic_staj_event<CharT>&, const ser_context&)> pred) + { + return basic_staj_filter_view<CharT>(cursor, pred); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<CharT>&)") + void read(basic_json_visitor<CharT>& visitor) + { + read_to(visitor); + } + + JSONCONS_DEPRECATED_MSG("Instead, use read_to(basic_json_visitor<CharT>&, std::error_code&)") + void read(basic_json_visitor<CharT>& visitor, + std::error_code& ec) + { + read_to(visitor, ec); + } +#endif +private: + + void initialize_with_string_view(string_view_type sv) + { + auto r = unicode_traits::detect_json_encoding(sv.data(), sv.size()); + if (!(r.encoding == unicode_traits::encoding_kind::utf8 || r.encoding == unicode_traits::encoding_kind::undetected)) + { + JSONCONS_THROW(ser_error(json_errc::illegal_unicode_character,parser_.line(),parser_.column())); + } + std::size_t offset = (r.ptr - sv.data()); + parser_.update(sv.data()+offset,sv.size()-offset); + if (!done()) + { + next(); + } + } + + void initialize_with_string_view(string_view_type sv, std::error_code& ec) + { + auto r = unicode_traits::detect_encoding_from_bom(sv.data(), sv.size()); + if (!(r.encoding == unicode_traits::encoding_kind::utf8 || r.encoding == unicode_traits::encoding_kind::undetected)) + { + ec = json_errc::illegal_unicode_character; + return; + } + std::size_t offset = (r.ptr - sv.data()); + parser_.update(sv.data()+offset,sv.size()-offset); + if (!done()) + { + next(ec); + } + } + + void read_next(std::error_code& ec) + { + read_next(cursor_visitor_, ec); + } + + void read_next(basic_json_visitor<CharT>& visitor, std::error_code& ec) + { + parser_.restart(); + while (!parser_.stopped()) + { + if (parser_.source_exhausted()) + { + auto s = source_.read_buffer(ec); + if (ec) return; + if (s.size() > 0) + { + parser_.update(s.data(),s.size()); + } + } + parser_.parse_some(visitor, ec); + if (ec) return; + } + } +}; + +using csv_stream_cursor = basic_csv_cursor<char,jsoncons::stream_source<char>>; +using csv_string_cursor = basic_csv_cursor<char,jsoncons::string_source<char>>; +using wcsv_stream_cursor = basic_csv_cursor<wchar_t,jsoncons::stream_source<wchar_t>>; +using wcsv_string_cursor = basic_csv_cursor<wchar_t,jsoncons::string_source<wchar_t>>; + +using csv_cursor = basic_csv_cursor<char>; +using wcsv_cursor = basic_csv_cursor<wchar_t>; + +}} + +#endif + diff --git a/include/jsoncons_ext/csv/csv_encoder.hpp b/include/jsoncons_ext/csv/csv_encoder.hpp new file mode 100644 index 0000000..49c1a3d --- /dev/null +++ b/include/jsoncons_ext/csv/csv_encoder.hpp @@ -0,0 +1,954 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_ENCODER_HPP +#define JSONCONS_CSV_CSV_ENCODER_HPP + +#include <array> // std::array +#include <string> +#include <vector> +#include <ostream> +#include <utility> // std::move +#include <unordered_map> // std::unordered_map +#include <memory> // std::allocator +#include <limits> // std::numeric_limits +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/detail/write_number.hpp> +#include <jsoncons_ext/csv/csv_options.hpp> +#include <jsoncons/sink.hpp> + +namespace jsoncons { namespace csv { + +template<class CharT,class Sink=jsoncons::stream_sink<CharT>,class Allocator=std::allocator<char>> +class basic_csv_encoder final : public basic_json_visitor<CharT> +{ +public: + using char_type = CharT; + using typename basic_json_visitor<CharT>::string_view_type; + using sink_type = Sink; + + using allocator_type = Allocator; + using char_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<CharT>; + using string_type = std::basic_string<CharT, std::char_traits<CharT>, char_allocator_type>; + using string_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<string_type>; + using string_string_allocator_type = typename std::allocator_traits<allocator_type>:: template rebind_alloc<std::pair<const string_type,string_type>>; + +private: + static jsoncons::basic_string_view<CharT> null_constant() + { + static jsoncons::basic_string_view<CharT> k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"null"); + return k; + } + static jsoncons::basic_string_view<CharT> true_constant() + { + static jsoncons::basic_string_view<CharT> k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"true"); + return k; + } + static jsoncons::basic_string_view<CharT> false_constant() + { + static jsoncons::basic_string_view<CharT> k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"false"); + return k; + } + + enum class stack_item_kind + { + row_mapping, + column_mapping, + object, + row, + column, + object_multi_valued_field, + row_multi_valued_field, + column_multi_valued_field + }; + + struct stack_item + { + stack_item_kind item_kind_; + std::size_t count_; + + stack_item(stack_item_kind item_kind) noexcept + : item_kind_(item_kind), count_(0) + { + } + + bool is_object() const + { + return item_kind_ == stack_item_kind::object; + } + + stack_item_kind item_kind() const + { + return item_kind_; + } + }; + + Sink sink_; + const basic_csv_encode_options<CharT> options_; + allocator_type alloc_; + + std::vector<stack_item> stack_; + jsoncons::detail::write_double fp_; + std::vector<string_type,string_allocator_type> strings_buffer_; + + std::unordered_map<string_type,string_type, std::hash<string_type>,std::equal_to<string_type>,string_string_allocator_type> buffered_line_; + string_type name_; + std::size_t column_index_; + std::vector<std::size_t> row_counts_; + + // Noncopyable and nonmoveable + basic_csv_encoder(const basic_csv_encoder&) = delete; + basic_csv_encoder& operator=(const basic_csv_encoder&) = delete; +public: + basic_csv_encoder(Sink&& sink, + const Allocator& alloc = Allocator()) + : basic_csv_encoder(std::forward<Sink>(sink), basic_csv_encode_options<CharT>(), alloc) + { + } + + basic_csv_encoder(Sink&& sink, + const basic_csv_encode_options<CharT>& options, + const Allocator& alloc = Allocator()) + : sink_(std::forward<Sink>(sink)), + options_(options), + alloc_(alloc), + stack_(), + fp_(options.float_format(), options.precision()), + column_index_(0) + { + jsoncons::csv::detail::parse_column_names(options.column_names(), strings_buffer_); + } + + ~basic_csv_encoder() noexcept + { + JSONCONS_TRY + { + sink_.flush(); + } + JSONCONS_CATCH(...) + { + } + } + + void reset() + { + stack_.clear(); + strings_buffer_.clear(); + buffered_line_.clear(); + name_.clear(); + column_index_ = 0; + row_counts_.clear(); + } + + void reset(Sink&& sink) + { + sink_ = std::move(sink); + reset(); + } + +private: + + template<class AnyWriter> + void escape_string(const CharT* s, + std::size_t length, + CharT quote_char, CharT quote_escape_char, + AnyWriter& sink) + { + const CharT* begin = s; + const CharT* end = s + length; + for (const CharT* it = begin; it != end; ++it) + { + CharT c = *it; + if (c == quote_char) + { + sink.push_back(quote_escape_char); + sink.push_back(quote_char); + } + else + { + sink.push_back(c); + } + } + } + + void visit_flush() override + { + sink_.flush(); + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + stack_.emplace_back(stack_item_kind::column_mapping); + return true; + } + switch (stack_.back().item_kind_) + { + case stack_item_kind::row_mapping: + stack_.emplace_back(stack_item_kind::object); + return true; + default: // error + ec = csv_errc::source_error; + return false; + } + } + + bool visit_end_object(const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + if (stack_[0].count_ == 0) + { + for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(strings_buffer_[i].data(), + strings_buffer_[i].length()); + } + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } + for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + auto it = buffered_line_.find(strings_buffer_[i]); + if (it != buffered_line_.end()) + { + sink_.append(it->second.data(),it->second.length()); + it->second.clear(); + } + } + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + break; + case stack_item_kind::column_mapping: + { + for (const auto& item : strings_buffer_) + { + sink_.append(item.data(), item.size()); + sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); + } + break; + } + default: + break; + } + stack_.pop_back(); + if (!stack_.empty()) + { + end_value(); + } + return true; + } + + bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override + { + if (stack_.empty()) + { + stack_.emplace_back(stack_item_kind::row_mapping); + return true; + } + switch (stack_.back().item_kind_) + { + case stack_item_kind::row_mapping: + stack_.emplace_back(stack_item_kind::row); + if (stack_[0].count_ == 0) + { + for (std::size_t i = 0; i < strings_buffer_.size(); ++i) + { + if (i > 0) + { + sink_.push_back(options_.field_delimiter()); + } + sink_.append(strings_buffer_[i].data(),strings_buffer_[i].length()); + } + if (strings_buffer_.size() > 0) + { + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + } + } + return true; + case stack_item_kind::object: + stack_.emplace_back(stack_item_kind::object_multi_valued_field); + return true; + case stack_item_kind::column_mapping: + stack_.emplace_back(stack_item_kind::column); + row_counts_.push_back(1); + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + return true; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + begin_value(bo); + stack_.emplace_back(stack_item_kind::column_multi_valued_field); + return true; + } + case stack_item_kind::row: + begin_value(sink_); + stack_.emplace_back(stack_item_kind::row_multi_valued_field); + return true; + default: // error + ec = csv_errc::source_error; + return false; + } + } + + bool visit_end_array(const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::row: + sink_.append(options_.line_delimiter().data(), + options_.line_delimiter().length()); + break; + case stack_item_kind::column: + ++column_index_; + break; + default: + break; + } + stack_.pop_back(); + + if (!stack_.empty()) + { + end_value(); + } + return true; + } + + bool visit_key(const string_view_type& name, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + { + name_ = string_type(name); + buffered_line_[string_type(name)] = std::basic_string<CharT>(); + if (stack_[0].count_ == 0 && options_.column_names().size() == 0) + { + strings_buffer_.emplace_back(name); + } + break; + } + case stack_item_kind::column_mapping: + { + if (strings_buffer_.empty()) + { + strings_buffer_.emplace_back(name); + } + else + { + strings_buffer_[0].push_back(options_.field_delimiter()); + strings_buffer_[0].append(string_type(name)); + } + break; + } + default: + break; + } + return true; + } + + bool visit_null(semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_null_value(bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_null_value(sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_null_value(bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_null_value(bo); + break; + } + default: + break; + } + return true; + } + + bool visit_string(const string_view_type& sv, semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_string_value(sv,bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_string_value(sv,sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_string_value(sv,bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_string_value(sv,bo); + break; + } + default: + break; + } + return true; + } + + bool visit_byte_string(const byte_string_view& b, + semantic_tag tag, + const ser_context& context, + std::error_code& ec) override + { + byte_string_chars_format encoding_hint; + switch (tag) + { + case semantic_tag::base16: + encoding_hint = byte_string_chars_format::base16; + break; + case semantic_tag::base64: + encoding_hint = byte_string_chars_format::base64; + break; + case semantic_tag::base64url: + encoding_hint = byte_string_chars_format::base64url; + break; + default: + encoding_hint = byte_string_chars_format::none; + break; + } + byte_string_chars_format format = jsoncons::detail::resolve_byte_string_chars_format(encoding_hint,byte_string_chars_format::none,byte_string_chars_format::base64url); + + std::basic_string<CharT> s; + switch (format) + { + case byte_string_chars_format::base16: + { + encode_base16(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + case byte_string_chars_format::base64: + { + encode_base64(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + case byte_string_chars_format::base64url: + { + encode_base64url(b.begin(),b.end(),s); + visit_string(s, semantic_tag::none, context, ec); + break; + } + default: + { + JSONCONS_UNREACHABLE(); + } + } + + return true; + } + + bool visit_double(double val, + semantic_tag, + const ser_context& context, + std::error_code& ec) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_double_value(val, context, bo, ec); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_double_value(val, context, sink_, ec); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_double_value(val, context, bo, ec); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_double_value(val, context, bo, ec); + break; + } + default: + break; + } + return true; + } + + bool visit_int64(int64_t val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_int64_value(val,bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_int64_value(val,sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_int64_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_int64_value(val, bo); + break; + } + default: + break; + } + return true; + } + + bool visit_uint64(uint64_t val, + semantic_tag, + const ser_context&, + std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_uint64_value(val, bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_uint64_value(val,sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_uint64_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_uint64_value(val, bo); + break; + } + default: + break; + } + return true; + } + + bool visit_bool(bool val, semantic_tag, const ser_context&, std::error_code&) override + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::object: + case stack_item_kind::object_multi_valued_field: + { + auto it = buffered_line_.find(name_); + if (it != buffered_line_.end()) + { + std::basic_string<CharT> s; + jsoncons::string_sink<std::basic_string<CharT>> bo(s); + write_bool_value(val,bo); + bo.flush(); + if (!it->second.empty() && options_.subfield_delimiter() != char_type()) + { + it->second.push_back(options_.subfield_delimiter()); + } + it->second.append(s); + } + break; + } + case stack_item_kind::row: + case stack_item_kind::row_multi_valued_field: + write_bool_value(val,sink_); + break; + case stack_item_kind::column: + { + if (strings_buffer_.size() <= row_counts_.back()) + { + strings_buffer_.emplace_back(); + } + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_bool_value(val, bo); + break; + } + case stack_item_kind::column_multi_valued_field: + { + jsoncons::string_sink<std::basic_string<CharT>> bo(strings_buffer_[row_counts_.back()]); + write_bool_value(val, bo); + break; + } + default: + break; + } + return true; + } + + template <class AnyWriter> + bool do_string_value(const CharT* s, std::size_t length, AnyWriter& sink) + { + bool quote = false; + if (options_.quote_style() == quote_style_kind::all || options_.quote_style() == quote_style_kind::nonnumeric || + (options_.quote_style() == quote_style_kind::minimal && + (std::char_traits<CharT>::find(s, length, options_.field_delimiter()) != nullptr || std::char_traits<CharT>::find(s, length, options_.quote_char()) != nullptr))) + { + quote = true; + sink.push_back(options_.quote_char()); + } + escape_string(s, length, options_.quote_char(), options_.quote_escape_char(), sink); + if (quote) + { + sink.push_back(options_.quote_char()); + } + + return true; + } + + template <class AnyWriter> + void write_string_value(const string_view_type& value, AnyWriter& sink) + { + begin_value(sink); + do_string_value(value.data(),value.length(),sink); + end_value(); + } + + template <class AnyWriter> + void write_double_value(double val, const ser_context& context, AnyWriter& sink, std::error_code& ec) + { + begin_value(sink); + + if (!std::isfinite(val)) + { + if ((std::isnan)(val)) + { + if (options_.enable_nan_to_num()) + { + sink.append(options_.nan_to_num().data(), options_.nan_to_num().length()); + } + else if (options_.enable_nan_to_str()) + { + visit_string(options_.nan_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + else if (val == std::numeric_limits<double>::infinity()) + { + if (options_.enable_inf_to_num()) + { + sink.append(options_.inf_to_num().data(), options_.inf_to_num().length()); + } + else if (options_.enable_inf_to_str()) + { + visit_string(options_.inf_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + else + { + if (options_.enable_neginf_to_num()) + { + sink.append(options_.neginf_to_num().data(), options_.neginf_to_num().length()); + } + else if (options_.enable_neginf_to_str()) + { + visit_string(options_.neginf_to_str(), semantic_tag::none, context, ec); + } + else + { + sink.append(null_constant().data(), null_constant().size()); + } + } + } + else + { + fp_(val, sink); + } + + end_value(); + + } + + template <class AnyWriter> + void write_int64_value(int64_t val, AnyWriter& sink) + { + begin_value(sink); + + jsoncons::detail::from_integer(val,sink); + + end_value(); + } + + template <class AnyWriter> + void write_uint64_value(uint64_t val, AnyWriter& sink) + { + begin_value(sink); + + jsoncons::detail::from_integer(val,sink); + + end_value(); + } + + template <class AnyWriter> + void write_bool_value(bool val, AnyWriter& sink) + { + begin_value(sink); + + if (val) + { + sink.append(true_constant().data(), true_constant().size()); + } + else + { + sink.append(false_constant().data(), false_constant().size()); + } + + end_value(); + } + + template <class AnyWriter> + bool write_null_value(AnyWriter& sink) + { + begin_value(sink); + sink.append(null_constant().data(), null_constant().size()); + end_value(); + return true; + } + + template <class AnyWriter> + void begin_value(AnyWriter& sink) + { + JSONCONS_ASSERT(!stack_.empty()); + switch (stack_.back().item_kind_) + { + case stack_item_kind::row: + if (stack_.back().count_ > 0) + { + sink.push_back(options_.field_delimiter()); + } + break; + case stack_item_kind::column: + { + if (row_counts_.size() >= 3) + { + for (std::size_t i = row_counts_.size()-2; i-- > 0;) + { + if (row_counts_[i] <= row_counts_.back()) + { + sink.push_back(options_.field_delimiter()); + } + else + { + break; + } + } + } + if (column_index_ > 0) + { + sink.push_back(options_.field_delimiter()); + } + break; + } + case stack_item_kind::row_multi_valued_field: + case stack_item_kind::column_multi_valued_field: + if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) + { + sink.push_back(options_.subfield_delimiter()); + } + break; + default: + break; + } + } + + void end_value() + { + JSONCONS_ASSERT(!stack_.empty()); + switch(stack_.back().item_kind_) + { + case stack_item_kind::row: + { + ++stack_.back().count_; + break; + } + case stack_item_kind::column: + { + ++row_counts_.back(); + break; + } + default: + ++stack_.back().count_; + break; + } + } +}; + +using csv_stream_encoder = basic_csv_encoder<char>; +using csv_string_encoder = basic_csv_encoder<char,jsoncons::string_sink<std::string>>; +using csv_wstream_encoder = basic_csv_encoder<wchar_t>; +using wcsv_string_encoder = basic_csv_encoder<wchar_t,jsoncons::string_sink<std::wstring>>; + +#if !defined(JSONCONS_NO_DEPRECATED) +template<class CharT, class Sink = jsoncons::stream_sink<CharT>, class Allocator = std::allocator<CharT>> +using basic_csv_serializer = basic_csv_encoder<CharT,Sink,Allocator>; + +JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use csv_string_encoder") typedef csv_string_encoder csv_string_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use csv_string_encoder") typedef csv_string_encoder csv_string_serializer; +JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_encoder; +JSONCONS_DEPRECATED_MSG("Instead, use wcsv_stream_encoder") typedef csv_stream_encoder wcsv_encoder; +#endif + +}} + +#endif diff --git a/include/jsoncons_ext/csv/csv_error.hpp b/include/jsoncons_ext/csv/csv_error.hpp new file mode 100644 index 0000000..30255dd --- /dev/null +++ b/include/jsoncons_ext/csv/csv_error.hpp @@ -0,0 +1,85 @@ +/// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_ERROR_HPP +#define JSONCONS_CSV_CSV_ERROR_HPP + +#include <system_error> +#include <jsoncons/json_exception.hpp> + +namespace jsoncons { namespace csv { + + enum class csv_errc : int + { + success = 0, + unexpected_eof = 1, + source_error, + expected_quote, + syntax_error, + invalid_parse_state, + invalid_escaped_char, + unexpected_char_between_fields + }; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use csv_errc") typedef csv_errc csv_parser_errc; +#endif + +class csv_error_category_impl + : public std::error_category +{ +public: + const char* name() const noexcept override + { + return "jsoncons/csv"; + } + std::string message(int ev) const override + { + switch (static_cast<csv_errc>(ev)) + { + case csv_errc::unexpected_eof: + return "Unexpected end of file"; + case csv_errc::source_error: + return "Source error"; + case csv_errc::expected_quote: + return "Expected quote character"; + case csv_errc::syntax_error: + return "CSV syntax error"; + case csv_errc::invalid_parse_state: + return "Invalid CSV parser state"; + case csv_errc::invalid_escaped_char: + return "Invalid character following quote escape character"; + case csv_errc::unexpected_char_between_fields: + return "Unexpected character between fields"; + default: + return "Unknown CSV parser error"; + } + } +}; + +inline +const std::error_category& csv_error_category() +{ + static csv_error_category_impl instance; + return instance; +} + +inline +std::error_code make_error_code(csv_errc result) +{ + return std::error_code(static_cast<int>(result),csv_error_category()); +} + +}} + +namespace std { + template<> + struct is_error_code_enum<jsoncons::csv::csv_errc> : public true_type + { + }; +} + +#endif diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp new file mode 100644 index 0000000..8bd2e22 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -0,0 +1,973 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_OPTIONS_HPP +#define JSONCONS_CSV_CSV_OPTIONS_HPP + +#include <string> +#include <vector> +#include <utility> // std::pair +#include <unordered_map> // std::unordered_map +#include <map> +#include <limits> // std::numeric_limits +#include <cwchar> +#include <jsoncons/json_options.hpp> + +namespace jsoncons { namespace csv { + +enum class csv_column_type : uint8_t +{ + string_t,integer_t,float_t,boolean_t,repeat_t +}; + +enum class quote_style_kind : uint8_t +{ + minimal,all,nonnumeric,none +}; + +enum class csv_mapping_kind : uint8_t +{ + n_rows = 1, + n_objects, + m_columns +}; + +#if !defined(JSONCONS_NO_DEPRECATED) +using mapping_kind = csv_mapping_kind; +JSONCONS_DEPRECATED_MSG("Instead, use quote_style_kind") typedef quote_style_kind quote_styles; +JSONCONS_DEPRECATED_MSG("Instead, use quote_style_kind") typedef quote_style_kind quote_style_type; +JSONCONS_DEPRECATED_MSG("Instead, use csv_mapping_kind") typedef csv_mapping_kind mapping_type; +#endif + +enum class column_state {sequence,label}; + +struct csv_type_info +{ + csv_type_info() = default; + csv_type_info(const csv_type_info&) = default; + csv_type_info(csv_type_info&&) = default; + + csv_type_info(csv_column_type ctype, std::size_t lev, std::size_t repcount = 0) noexcept + { + col_type = ctype; + level = lev; + rep_count = repcount; + } + + csv_column_type col_type; + std::size_t level; + std::size_t rep_count; +}; + +namespace detail { + +template <class CharT,class Container> +void parse_column_names(const std::basic_string<CharT>& names, + Container& cont) +{ + column_state state = column_state::sequence; + typename Container::value_type buffer(cont.get_allocator()); + + auto p = names.begin(); + while (p != names.end()) + { + switch (state) + { + case column_state::sequence: + { + switch (*p) + { + case ' ': case '\t':case '\r': case '\n': + ++p; + break; + default: + buffer.clear(); + state = column_state::label; + break; + } + break; + } + case column_state::label: + { + switch (*p) + { + case ',': + cont.push_back(buffer); + buffer.clear(); + ++p; + state = column_state::sequence; + break; + default: + buffer.push_back(*p); + ++p; + break; + } + break; + } + } + } + if (state == column_state::label) + { + cont.push_back(buffer); + buffer.clear(); + } +} + +template <class CharT,class Container> +void parse_column_types(const std::basic_string<CharT>& types, + Container& column_types) +{ + const std::map<jsoncons::basic_string_view<CharT>,csv_column_type> type_dictionary = + { + + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"string"),csv_column_type::string_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"integer"),csv_column_type::integer_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"float"),csv_column_type::float_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"boolean"),csv_column_type::boolean_t} + }; + + column_state state = column_state::sequence; + int depth = 0; + std::basic_string<CharT> buffer; + + auto p = types.begin(); + while (p != types.end()) + { + switch (state) + { + case column_state::sequence: + { + switch (*p) + { + case ' ': case '\t':case '\r': case '\n': + ++p; + break; + case '[': + ++depth; + ++p; + break; + case ']': + JSONCONS_ASSERT(depth > 0); + --depth; + ++p; + break; + case '*': + { + JSONCONS_ASSERT(column_types.size() != 0); + std::size_t offset = 0; + std::size_t level = column_types.size() > 0 ? column_types.back().level: 0; + if (level > 0) + { + for (auto it = column_types.rbegin(); + it != column_types.rend() && level == it->level; + ++it) + { + ++offset; + } + } + else + { + offset = 1; + } + column_types.emplace_back(csv_column_type::repeat_t,depth,offset); + ++p; + break; + } + default: + buffer.clear(); + state = column_state::label; + break; + } + break; + } + case column_state::label: + { + switch (*p) + { + case '*': + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + state = column_state::sequence; + break; + } + case ',': + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + ++p; + state = column_state::sequence; + break; + } + case ']': + { + JSONCONS_ASSERT(depth > 0); + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + --depth; + ++p; + state = column_state::sequence; + break; + } + default: + { + buffer.push_back(*p); + ++p; + break; + } + } + break; + } + } + } + if (state == column_state::label) + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + } +} + +} // detail + +template <class CharT> +class basic_csv_options; + +template <class CharT> +class basic_csv_options_common +{ + friend class basic_csv_options<CharT>; +public: + using char_type = CharT; + using string_type = std::basic_string<CharT>; +private: + char_type field_delimiter_; + char_type quote_char_; + char_type quote_escape_char_; + char_type subfield_delimiter_; + + bool enable_nan_to_num_:1; + bool enable_inf_to_num_:1; + bool enable_neginf_to_num_:1; + bool enable_nan_to_str_:1; + bool enable_inf_to_str_:1; + bool enable_neginf_to_str_:1; + bool enable_str_to_nan_:1; + bool enable_str_to_inf_:1; + bool enable_str_to_neginf_:1; + + string_type nan_to_num_; + string_type inf_to_num_; + string_type neginf_to_num_; + string_type nan_to_str_; + string_type inf_to_str_; + string_type neginf_to_str_; + string_type column_names_; + +protected: + basic_csv_options_common() + : field_delimiter_(','), + quote_char_('\"'), + quote_escape_char_('\"'), + subfield_delimiter_(char_type()), + enable_nan_to_num_(false), + enable_inf_to_num_(false), + enable_neginf_to_num_(false), + enable_nan_to_str_(false), + enable_inf_to_str_(false), + enable_neginf_to_str_(false), + enable_str_to_nan_(false), + enable_str_to_inf_(false), + enable_str_to_neginf_(false) + { + } + + basic_csv_options_common(const basic_csv_options_common&) = default; + basic_csv_options_common& operator=(const basic_csv_options_common&) = default; + + virtual ~basic_csv_options_common() noexcept = default; +public: + + char_type field_delimiter() const + { + return field_delimiter_; + } + + const char_type subfield_delimiter() const + { + return subfield_delimiter_; + } + + char_type quote_char() const + { + return quote_char_; + } + + char_type quote_escape_char() const + { + return quote_escape_char_; + } + + string_type column_names() const + { + return column_names_; + } + + bool enable_nan_to_num() const + { + return enable_nan_to_num_; + } + + bool enable_inf_to_num() const + { + return enable_inf_to_num_; + } + + bool enable_neginf_to_num() const + { + return enable_neginf_to_num_ || enable_inf_to_num_; + } + + bool enable_nan_to_str() const + { + return enable_nan_to_str_; + } + + bool enable_str_to_nan() const + { + return enable_str_to_nan_; + } + + bool enable_inf_to_str() const + { + return enable_inf_to_str_; + } + + bool enable_str_to_inf() const + { + return enable_str_to_inf_; + } + + bool enable_neginf_to_str() const + { + return enable_neginf_to_str_ || enable_inf_to_str_; + } + + bool enable_str_to_neginf() const + { + return enable_str_to_neginf_ || enable_str_to_inf_; + } + + string_type nan_to_num() const + { + return nan_to_num_; + } + + string_type inf_to_num() const + { + return inf_to_num_; + } + + string_type neginf_to_num() const + { + if (enable_neginf_to_num_) + { + return neginf_to_num_; + } + else if (enable_inf_to_num_) + { + string_type s; + s.push_back('-'); + s.append(inf_to_num_); + return s; + } + else + { + return neginf_to_num_; + } + } + + string_type nan_to_str() const + { + return nan_to_str_; + } + + string_type inf_to_str() const + { + return inf_to_str_; + } + + string_type neginf_to_str() const + { + if (enable_neginf_to_str_) + { + return neginf_to_str_; + } + else if (enable_inf_to_str_) + { + string_type s; + s.push_back('-'); + s.append(inf_to_str_); + return s; + } + else + { + return neginf_to_str_; // empty string + } + } +}; + +template <class CharT> +class basic_csv_decode_options : public virtual basic_csv_options_common<CharT> +{ + friend class basic_csv_options<CharT>; + using super_type = basic_csv_options_common<CharT>; +public: + using typename super_type::char_type; + using typename super_type::string_type; + +private: + bool assume_header_:1; + bool ignore_empty_values_:1; + bool ignore_empty_lines_:1; + bool trim_leading_:1; + bool trim_trailing_:1; + bool trim_leading_inside_quotes_:1; + bool trim_trailing_inside_quotes_:1; + bool unquoted_empty_value_is_null_:1; + bool infer_types_:1; + bool lossless_number_:1; + char_type comment_starter_; + csv_mapping_kind mapping_; + std::size_t header_lines_; + std::size_t max_lines_; + string_type column_types_; + string_type column_defaults_; +public: + basic_csv_decode_options() + : assume_header_(false), + ignore_empty_values_(false), + ignore_empty_lines_(true), + trim_leading_(false), + trim_trailing_(false), + trim_leading_inside_quotes_(false), + trim_trailing_inside_quotes_(false), + unquoted_empty_value_is_null_(false), + infer_types_(true), + lossless_number_(false), + comment_starter_('\0'), + mapping_(), + header_lines_(0), + max_lines_((std::numeric_limits<std::size_t>::max)()) + {} + + basic_csv_decode_options(const basic_csv_decode_options& other) = default; + + basic_csv_decode_options(basic_csv_decode_options&& other) + : super_type(std::forward<basic_csv_decode_options>(other)), + assume_header_(other.assume_header_), + ignore_empty_values_(other.ignore_empty_values_), + ignore_empty_lines_(other.ignore_empty_lines_), + trim_leading_(other.trim_leading_), + trim_trailing_(other.trim_trailing_), + trim_leading_inside_quotes_(other.trim_leading_inside_quotes_), + trim_trailing_inside_quotes_(other.trim_trailing_inside_quotes_), + unquoted_empty_value_is_null_(other.unquoted_empty_value_is_null_), + infer_types_(other.infer_types_), + lossless_number_(other.lossless_number_), + comment_starter_(other.comment_starter_), + mapping_(other.mapping_), + header_lines_(other.header_lines_), + max_lines_(other.max_lines_), + column_types_(std::move(other.column_types_)), + column_defaults_(std::move(other.column_defaults_)) + {} + + std::size_t header_lines() const + { + return (assume_header_ && header_lines_ <= 1) ? 1 : header_lines_; + } + + bool assume_header() const + { + return assume_header_; + } + + bool ignore_empty_values() const + { + return ignore_empty_values_; + } + + bool ignore_empty_lines() const + { + return ignore_empty_lines_; + } + + bool trim_leading() const + { + return trim_leading_; + } + + bool trim_trailing() const + { + return trim_trailing_; + } + + bool trim_leading_inside_quotes() const + { + return trim_leading_inside_quotes_; + } + + bool trim_trailing_inside_quotes() const + { + return trim_trailing_inside_quotes_; + } + + bool trim() const + { + return trim_leading_ && trim_trailing_; + } + + bool trim_inside_quotes() const + { + return trim_leading_inside_quotes_ && trim_trailing_inside_quotes_; + } + + bool unquoted_empty_value_is_null() const + { + return unquoted_empty_value_is_null_; + } + + bool infer_types() const + { + return infer_types_; + } + + bool lossless_number() const + { + return lossless_number_; + } + + char_type comment_starter() const + { + return comment_starter_; + } + + csv_mapping_kind mapping_kind() const + { + return mapping_ != csv_mapping_kind() ? mapping_ : (assume_header() || this->column_names().size() > 0 ? csv_mapping_kind::n_objects : csv_mapping_kind::n_rows); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + csv_mapping_kind mapping() const + { + return mapping_kind(); + } +#endif + + std::size_t max_lines() const + { + return max_lines_; + } + + string_type column_types() const + { + return column_types_; + } + + string_type column_defaults() const + { + return column_defaults_; + } +}; + +template <class CharT> +class basic_csv_encode_options : public virtual basic_csv_options_common<CharT> +{ + friend class basic_csv_options<CharT>; + using super_type = basic_csv_options_common<CharT>; +public: + using typename super_type::char_type; + using typename super_type::string_type; +private: + quote_style_kind quote_style_; + float_chars_format float_format_; + int8_t precision_; + string_type line_delimiter_; +public: + basic_csv_encode_options() + : quote_style_(quote_style_kind::minimal), + float_format_(float_chars_format::general), + precision_(0) + { + line_delimiter_.push_back('\n'); + } + + basic_csv_encode_options(const basic_csv_encode_options& other) = default; + + basic_csv_encode_options(basic_csv_encode_options&& other) + : super_type(std::forward<basic_csv_encode_options>(other)), + quote_style_(other.quote_style_), + float_format_(other.float_format_), + precision_(other.precision_), + line_delimiter_(std::move(other.line_delimiter_)) + { + } + + quote_style_kind quote_style() const + { + return quote_style_; + } + + float_chars_format float_format() const + { + return float_format_; + } + + int8_t precision() const + { + return precision_; + } + + string_type line_delimiter() const + { + return line_delimiter_; + } +}; + +template <class CharT> +class basic_csv_options final : public basic_csv_decode_options<CharT>, public basic_csv_encode_options<CharT> +{ + using char_type = CharT; + using string_type = std::basic_string<CharT>; + +public: + using basic_csv_decode_options<CharT>::enable_str_to_nan; + using basic_csv_decode_options<CharT>::enable_str_to_inf; + using basic_csv_decode_options<CharT>::enable_str_to_neginf; + using basic_csv_decode_options<CharT>::nan_to_str; + using basic_csv_decode_options<CharT>::inf_to_str; + using basic_csv_decode_options<CharT>::neginf_to_str; + using basic_csv_decode_options<CharT>::nan_to_num; + using basic_csv_decode_options<CharT>::inf_to_num; + using basic_csv_decode_options<CharT>::neginf_to_num; + using basic_csv_decode_options<CharT>::field_delimiter; + using basic_csv_decode_options<CharT>::subfield_delimiter; + using basic_csv_decode_options<CharT>::quote_char; + using basic_csv_decode_options<CharT>::quote_escape_char; + using basic_csv_decode_options<CharT>::column_names; + using basic_csv_decode_options<CharT>::header_lines; + using basic_csv_decode_options<CharT>::assume_header; + using basic_csv_decode_options<CharT>::ignore_empty_values; + using basic_csv_decode_options<CharT>::ignore_empty_lines; + using basic_csv_decode_options<CharT>::trim_leading; + using basic_csv_decode_options<CharT>::trim_trailing; + using basic_csv_decode_options<CharT>::trim_leading_inside_quotes; + using basic_csv_decode_options<CharT>::trim_trailing_inside_quotes; + using basic_csv_decode_options<CharT>::trim; + using basic_csv_decode_options<CharT>::trim_inside_quotes; + using basic_csv_decode_options<CharT>::unquoted_empty_value_is_null; + using basic_csv_decode_options<CharT>::infer_types; + using basic_csv_decode_options<CharT>::lossless_number; + using basic_csv_decode_options<CharT>::comment_starter; + using basic_csv_decode_options<CharT>::mapping; + using basic_csv_decode_options<CharT>::max_lines; + using basic_csv_decode_options<CharT>::column_types; + using basic_csv_decode_options<CharT>::column_defaults; + using basic_csv_encode_options<CharT>::float_format; + using basic_csv_encode_options<CharT>::precision; + using basic_csv_encode_options<CharT>::line_delimiter; + using basic_csv_encode_options<CharT>::quote_style; + + static constexpr size_t default_indent = 4; + +// Constructors + + basic_csv_options() = default; + basic_csv_options(const basic_csv_options&) = default; + basic_csv_options(basic_csv_options&&) = default; + basic_csv_options& operator=(const basic_csv_options&) = default; + basic_csv_options& operator=(basic_csv_options&&) = default; + + basic_csv_options& float_format(float_chars_format value) + { + this->float_format_ = value; + return *this; + } + + basic_csv_options& precision(int8_t value) + { + this->precision_ = value; + return *this; + } + + basic_csv_options& header_lines(std::size_t value) + { + this->header_lines_ = value; + return *this; + } + + basic_csv_options& assume_header(bool value) + { + this->assume_header_ = value; + return *this; + } + + basic_csv_options& ignore_empty_values(bool value) + { + this->ignore_empty_values_ = value; + return *this; + } + + basic_csv_options& ignore_empty_lines(bool value) + { + this->ignore_empty_lines_ = value; + return *this; + } + + basic_csv_options& trim_leading(bool value) + { + this->trim_leading_ = value; + return *this; + } + + basic_csv_options& trim_trailing(bool value) + { + this->trim_trailing_ = value; + return *this; + } + + basic_csv_options& trim_leading_inside_quotes(bool value) + { + this->trim_leading_inside_quotes_ = value; + return *this; + } + + basic_csv_options& trim_trailing_inside_quotes(bool value) + { + this->trim_trailing_inside_quotes_ = value; + return *this; + } + + basic_csv_options& trim(bool value) + { + this->trim_leading_ = value; + this->trim_trailing_ = value; + return *this; + } + + basic_csv_options& trim_inside_quotes(bool value) + { + this->trim_leading_inside_quotes_ = value; + this->trim_trailing_inside_quotes_ = value; + return *this; + } + + basic_csv_options& unquoted_empty_value_is_null(bool value) + { + this->unquoted_empty_value_is_null_ = value; + return *this; + } + + basic_csv_options& column_names(const string_type& value) + { + this->column_names_ = value; + return *this; + } + + basic_csv_options& column_types(const string_type& value) + { + this->column_types_ = value; + return *this; + } + + basic_csv_options& column_defaults(const string_type& value) + { + this->column_defaults_ = value; + return *this; + } + + basic_csv_options& field_delimiter(char_type value) + { + this->field_delimiter_ = value; + return *this; + } + + basic_csv_options& subfield_delimiter(char_type value) + { + this->subfield_delimiter_ = value; + return *this; + } + + basic_csv_options& line_delimiter(const string_type& value) + { + this->line_delimiter_ = value; + return *this; + } + + basic_csv_options& quote_char(char_type value) + { + this->quote_char_ = value; + return *this; + } + + basic_csv_options& infer_types(bool value) + { + this->infer_types_ = value; + return *this; + } + + basic_csv_options& lossless_number(bool value) + { + this->lossless_number_ = value; + return *this; + } + + basic_csv_options& quote_escape_char(char_type value) + { + this->quote_escape_char_ = value; + return *this; + } + + basic_csv_options& comment_starter(char_type value) + { + this->comment_starter_ = value; + return *this; + } + + basic_csv_options& quote_style(quote_style_kind value) + { + this->quote_style_ = value; + return *this; + } + +//#if !defined(JSONCONS_NO_DEPRECATED) + basic_csv_options& mapping(csv_mapping_kind value) + { + this->mapping_ = value; + return *this; + } +//#endif + + basic_csv_options& mapping_kind(csv_mapping_kind value) + { + this->mapping_ = value; + return *this; + } + + basic_csv_options& max_lines(std::size_t value) + { + this->max_lines_ = value; + return *this; + } + + basic_csv_options& nan_to_num(const string_type& value) + { + this->enable_nan_to_num_ = true; + this->nan_to_str_.clear(); + this->nan_to_num_ = value; + return *this; + } + + basic_csv_options& inf_to_num(const string_type& value) + { + this->enable_inf_to_num_ = true; + this->inf_to_str_.clear(); + this->inf_to_num_ = value; + return *this; + } + + basic_csv_options& neginf_to_num(const string_type& value) + { + this->enable_neginf_to_num_ = true; + this->neginf_to_str_.clear(); + this->neginf_to_num_ = value; + return *this; + } + + basic_csv_options& nan_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_nan_to_str_ = true; + this->enable_str_to_nan_ = enable_inverse; + this->nan_to_num_.clear(); + this->nan_to_str_ = value; + return *this; + } + + basic_csv_options& inf_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_inf_to_str_ = true; + this->enable_inf_to_str_ = enable_inverse; + this->inf_to_num_.clear(); + this->inf_to_str_ = value; + return *this; + } + + basic_csv_options& neginf_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_neginf_to_str_ = true; + this->enable_neginf_to_str_ = enable_inverse; + this->neginf_to_num_.clear(); + this->neginf_to_str_ = value; + return *this; + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + JSONCONS_DEPRECATED_MSG("Instead, use float_format(float_chars_format)") + basic_csv_options& floating_point_format(float_chars_format value) + { + this->float_format_ = value; + return *this; + } +#endif + +}; + +using csv_options = basic_csv_options<char>; +using wcsv_options = basic_csv_options<wchar_t>; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use csv_options") typedef csv_options csv_parameters; +JSONCONS_DEPRECATED_MSG("Instead, use wcsv_options") typedef wcsv_options wcsv_parameters; +JSONCONS_DEPRECATED_MSG("Instead, use csv_options") typedef csv_options csv_serializing_options; +JSONCONS_DEPRECATED_MSG("Instead, use wcsv_options") typedef wcsv_options wcsv_serializing_options; +#endif + + +}} +#endif diff --git a/include/jsoncons_ext/csv/csv_parser.hpp b/include/jsoncons_ext/csv/csv_parser.hpp new file mode 100644 index 0000000..37887e2 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_parser.hpp @@ -0,0 +1,2097 @@ +// Copyright 2015 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_PARSER_HPP +#define JSONCONS_CSV_CSV_PARSER_HPP + +#include <memory> // std::allocator +#include <string> +#include <sstream> +#include <vector> +#include <stdexcept> +#include <system_error> +#include <cctype> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons/json_reader.hpp> +#include <jsoncons/json_filter.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons/detail/parse_number.hpp> +#include <jsoncons_ext/csv/csv_error.hpp> +#include <jsoncons_ext/csv/csv_options.hpp> + +namespace jsoncons { namespace csv { + +enum class csv_mode +{ + initial, + header, + data, + subfields +}; + +enum class csv_parse_state +{ + start, + cr, + column_labels, + expect_comment_or_record, + expect_record, + end_record, + no_more_records, + comment, + between_values, + quoted_string, + unquoted_string, + before_unquoted_string, + escaped_value, + minus, + zero, + integer, + fraction, + exp1, + exp2, + exp3, + accept, + before_unquoted_field, + before_unquoted_field_tail, + before_unquoted_field_tail1, + before_last_unquoted_field, + before_last_unquoted_field_tail, + before_unquoted_subfield, + before_unquoted_subfield_tail, + before_quoted_subfield, + before_quoted_subfield_tail, + before_quoted_field, + before_quoted_field_tail, + before_last_quoted_field, + before_last_quoted_field_tail, + done +}; + +enum class cached_state +{ + begin_object, + end_object, + begin_array, + end_array, + name, + item, + done +}; + +struct default_csv_parsing +{ + bool operator()(csv_errc, const ser_context&) noexcept + { + return false; + } +}; + +namespace detail { + + template <class CharT,class TempAllocator> + class parse_event + { + using temp_allocator_type = TempAllocator; + using string_view_type = typename basic_json_visitor<CharT>::string_view_type; + using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>; + using byte_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<uint8_t>; + using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>; + using byte_string_type = basic_byte_string<byte_allocator_type>; + + staj_event_type event_type; + string_type string_value; + byte_string_type byte_string_value; + union + { + bool bool_value; + int64_t int64_value; + uint64_t uint64_value; + double double_value; + }; + semantic_tag tag; + public: + parse_event(staj_event_type event_type, semantic_tag tag, const TempAllocator& alloc) + : event_type(event_type), + string_value(alloc), + byte_string_value(alloc), + tag(tag) + { + } + + parse_event(const string_view_type& value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::string_value), + string_value(value.data(),value.length(),alloc), + byte_string_value(alloc), + tag(tag) + { + } + + parse_event(const byte_string_view& value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::byte_string_value), + string_value(alloc), + byte_string_value(value.data(),value.size(),alloc), + tag(tag) + { + } + + parse_event(bool value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::bool_value), + string_value(alloc), + byte_string_value(alloc), + bool_value(value), + tag(tag) + { + } + + parse_event(int64_t value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::int64_value), + string_value(alloc), + byte_string_value(alloc), + int64_value(value), + tag(tag) + { + } + + parse_event(uint64_t value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::uint64_value), + string_value(alloc), + byte_string_value(alloc), + uint64_value(value), + tag(tag) + { + } + + parse_event(double value, semantic_tag tag, const TempAllocator& alloc) + : event_type(staj_event_type::double_value), + string_value(alloc), + byte_string_value(alloc), + double_value(value), + tag(tag) + { + } + + parse_event(const parse_event&) = default; + parse_event(parse_event&&) = default; + parse_event& operator=(const parse_event&) = default; + parse_event& operator=(parse_event&&) = default; + + bool replay(basic_json_visitor<CharT>& visitor) const + { + switch (event_type) + { + case staj_event_type::begin_array: + return visitor.begin_array(tag, ser_context()); + case staj_event_type::end_array: + return visitor.end_array(ser_context()); + case staj_event_type::string_value: + return visitor.string_value(string_value, tag, ser_context()); + case staj_event_type::byte_string_value: + case staj_event_type::null_value: + return visitor.null_value(tag, ser_context()); + case staj_event_type::bool_value: + return visitor.bool_value(bool_value, tag, ser_context()); + case staj_event_type::int64_value: + return visitor.int64_value(int64_value, tag, ser_context()); + case staj_event_type::uint64_value: + return visitor.uint64_value(uint64_value, tag, ser_context()); + case staj_event_type::double_value: + return visitor.double_value(double_value, tag, ser_context()); + default: + return false; + } + } + }; + + template <class CharT, class TempAllocator> + class m_columns_filter : public basic_json_visitor<CharT> + { + public: + using string_view_type = typename basic_json_visitor<CharT>::string_view_type; + using char_type = CharT; + using temp_allocator_type = TempAllocator; + + using char_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT>; + using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>; + + using string_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type>; + using parse_event_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event<CharT,TempAllocator>>; + using parse_event_vector_type = std::vector<parse_event<CharT,TempAllocator>, parse_event_allocator_type>; + using parse_event_vector_allocator_type = typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<parse_event_vector_type>; + private: + TempAllocator alloc_; + std::size_t name_index_; + int level_; + cached_state state_; + std::size_t column_index_; + std::size_t row_index_; + + std::vector<string_type, string_allocator_type> column_names_; + std::vector<parse_event_vector_type,parse_event_vector_allocator_type> cached_events_; + public: + + m_columns_filter(const TempAllocator& alloc) + : alloc_(alloc), + name_index_(0), + level_(0), + state_(cached_state::begin_object), + column_index_(0), + row_index_(0), + column_names_(alloc), + cached_events_(alloc) + { + } + + void reset() + { + name_index_ = 0; + level_ = 0; + state_ = cached_state::begin_object; + column_index_ = 0; + row_index_ = 0; + column_names_.clear(); + cached_events_.clear(); + } + + bool done() const + { + return state_ == cached_state::done; + } + + void initialize(const std::vector<string_type, string_allocator_type>& column_names) + { + for (const auto& name : column_names) + { + column_names_.push_back(name); + cached_events_.emplace_back(alloc_); + } + name_index_ = 0; + level_ = 0; + column_index_ = 0; + row_index_ = 0; + state_ = cached_state::begin_object; + } + + void skip_column() + { + ++name_index_; + } + + bool replay_parse_events(basic_json_visitor<CharT>& visitor) + { + bool more = true; + while (more) + { + switch (state_) + { + case cached_state::begin_object: + more = visitor.begin_object(semantic_tag::none, ser_context()); + column_index_ = 0; + state_ = cached_state::name; + break; + case cached_state::end_object: + more = visitor.end_object(ser_context()); + state_ = cached_state::done; + break; + case cached_state::name: + if (column_index_ < column_names_.size()) + { + more = visitor.key(column_names_[column_index_], ser_context()); + state_ = cached_state::begin_array; + } + else + { + state_ = cached_state::end_object; + } + break; + case cached_state::begin_array: + more = visitor.begin_array(semantic_tag::none, ser_context()); + row_index_ = 0; + state_ = cached_state::item; + break; + case cached_state::end_array: + more = visitor.end_array(ser_context()); + ++column_index_; + state_ = cached_state::name; + break; + case cached_state::item: + if (row_index_ < cached_events_[column_index_].size()) + { + more = cached_events_[column_index_][row_index_].replay(visitor); + ++row_index_; + } + else + { + state_ = cached_state::end_array; + } + break; + default: + more = false; + break; + } + } + return more; + } + + void visit_flush() override + { + } + + bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override + { + ec = csv_errc::invalid_parse_state; + return false; + } + + bool visit_end_object(const ser_context&, std::error_code& ec) override + { + ec = csv_errc::invalid_parse_state; + return false; + } + + bool visit_begin_array(semantic_tag tag, const ser_context&, std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(staj_event_type::begin_array, tag, alloc_); + + ++level_; + } + return true; + } + + bool visit_end_array(const ser_context&, std::error_code&) override + { + if (level_ > 0) + { + cached_events_[name_index_].emplace_back(staj_event_type::end_array, semantic_tag::none, alloc_); + ++name_index_; + --level_; + } + else + { + name_index_ = 0; + } + return true; + } + + bool visit_key(const string_view_type&, const ser_context&, std::error_code& ec) override + { + ec = csv_errc::invalid_parse_state; + return false; + } + + bool visit_null(semantic_tag tag, const ser_context&, std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(staj_event_type::null_value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_string(const string_view_type& value, semantic_tag tag, const ser_context&, std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_byte_string(const byte_string_view& value, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_double(double value, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_int64(int64_t value, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_uint64(uint64_t value, + semantic_tag tag, + const ser_context&, + std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + + bool visit_bool(bool value, semantic_tag tag, const ser_context&, std::error_code&) override + { + if (name_index_ < column_names_.size()) + { + cached_events_[name_index_].emplace_back(value, tag, alloc_); + if (level_ == 0) + { + ++name_index_; + } + } + return true; + } + }; + +} // namespace detail + +template<class CharT,class TempAllocator=std::allocator<char>> +class basic_csv_parser : public ser_context +{ +public: + using string_view_type = jsoncons::basic_string_view<CharT>; + using char_type = CharT; +private: + struct string_maps_to_double + { + string_view_type s; + + bool operator()(const std::pair<string_view_type,double>& val) const + { + return val.first == s; + } + }; + + using temp_allocator_type = TempAllocator; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type; + using string_type = std::basic_string<CharT,std::char_traits<CharT>,char_allocator_type>; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<string_type> string_allocator_type; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_mode> csv_mode_allocator_type; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_type_info> csv_type_info_allocator_type; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<std::vector<string_type,string_allocator_type>> string_vector_allocator_type; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<csv_parse_state> csv_parse_state_allocator_type; + + static constexpr int default_depth = 3; + + temp_allocator_type alloc_; + csv_parse_state state_; + basic_json_visitor<CharT>* visitor_; + std::function<bool(csv_errc,const ser_context&)> err_handler_; + std::size_t column_; + std::size_t line_; + int depth_; + const basic_csv_decode_options<CharT> options_; + std::size_t column_index_; + std::size_t level_; + std::size_t offset_; + jsoncons::detail::chars_to to_double_; + const CharT* begin_input_; + const CharT* input_end_; + const CharT* input_ptr_; + bool more_; + std::size_t header_line_; + + detail::m_columns_filter<CharT,TempAllocator> m_columns_filter_; + std::vector<csv_mode,csv_mode_allocator_type> stack_; + std::vector<string_type,string_allocator_type> column_names_; + std::vector<csv_type_info,csv_type_info_allocator_type> column_types_; + std::vector<string_type,string_allocator_type> column_defaults_; + std::vector<csv_parse_state,csv_parse_state_allocator_type> state_stack_; + string_type buffer_; + std::vector<std::pair<string_view_type,double>> string_double_map_; + +public: + basic_csv_parser(const TempAllocator& alloc = TempAllocator()) + : basic_csv_parser(basic_csv_decode_options<CharT>(), + default_csv_parsing(), + alloc) + { + } + + basic_csv_parser(const basic_csv_decode_options<CharT>& options, + const TempAllocator& alloc = TempAllocator()) + : basic_csv_parser(options, + default_csv_parsing(), + alloc) + { + } + + basic_csv_parser(std::function<bool(csv_errc,const ser_context&)> err_handler, + const TempAllocator& alloc = TempAllocator()) + : basic_csv_parser(basic_csv_decode_options<CharT>(), + err_handler, + alloc) + { + } + + basic_csv_parser(const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const TempAllocator& alloc = TempAllocator()) + : alloc_(alloc), + state_(csv_parse_state::start), + visitor_(nullptr), + err_handler_(err_handler), + column_(1), + line_(1), + depth_(default_depth), + options_(options), + column_index_(0), + level_(0), + offset_(0), + begin_input_(nullptr), + input_end_(nullptr), + input_ptr_(nullptr), + more_(true), + header_line_(1), + m_columns_filter_(alloc), + stack_(alloc), + column_names_(alloc), + column_types_(alloc), + column_defaults_(alloc), + state_stack_(alloc), + buffer_(alloc) + { + if (options_.enable_str_to_nan()) + { + string_double_map_.emplace_back(options_.nan_to_str(),std::nan("")); + } + if (options_.enable_str_to_inf()) + { + string_double_map_.emplace_back(options_.inf_to_str(),std::numeric_limits<double>::infinity()); + } + if (options_.enable_str_to_neginf()) + { + string_double_map_.emplace_back(options_.neginf_to_str(),-std::numeric_limits<double>::infinity()); + } + + initialize(); + } + + ~basic_csv_parser() noexcept + { + } + + bool done() const + { + return state_ == csv_parse_state::done; + } + + bool accept() const + { + return state_ == csv_parse_state::accept || state_ == csv_parse_state::done; + } + + bool stopped() const + { + return !more_; + } + + bool source_exhausted() const + { + return input_ptr_ == input_end_; + } + + const std::vector<string_type,string_allocator_type>& column_labels() const + { + return column_names_; + } + + void reinitialize() + { + state_ = csv_parse_state::start; + visitor_ = nullptr; + column_ = 1; + line_ = 1; + depth_ = default_depth; + column_index_ = 0; + level_ = 0; + offset_ = 0; + begin_input_ = nullptr; + input_end_ = nullptr; + input_ptr_ = nullptr; + more_ = true; + header_line_ = 1; + m_columns_filter_.reset(); + stack_.clear(); + column_names_.clear(); + column_types_.clear(); + column_defaults_.clear(); + state_stack_.clear(); + buffer_.clear(); + + initialize(); + } + + void restart() + { + more_ = true; + } + + void parse_some(basic_json_visitor<CharT>& visitor) + { + std::error_code ec; + parse_some(visitor,ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,line_,column_)); + } + } + + void parse_some(basic_json_visitor<CharT>& visitor, std::error_code& ec) + { + switch (options_.mapping_kind()) + { + case csv_mapping_kind::m_columns: + visitor_ = &m_columns_filter_; + break; + default: + visitor_ = std::addressof(visitor); + break; + } + + const CharT* local_input_end = input_end_; + + if (input_ptr_ == local_input_end && more_) + { + switch (state_) + { + case csv_parse_state::start: + ec = csv_errc::source_error; + more_ = false; + return; + case csv_parse_state::before_unquoted_field: + case csv_parse_state::before_last_unquoted_field: + end_unquoted_string_value(ec); + state_ = csv_parse_state::before_last_unquoted_field_tail; + break; + case csv_parse_state::before_last_unquoted_field_tail: + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + ++column_index_; + state_ = csv_parse_state::end_record; + break; + case csv_parse_state::before_unquoted_string: + buffer_.clear(); + JSONCONS_FALLTHROUGH; + case csv_parse_state::unquoted_string: + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + if (options_.ignore_empty_values() && buffer_.empty()) + { + state_ = csv_parse_state::end_record; + } + else + { + before_value(ec); + state_ = csv_parse_state::before_unquoted_field; + } + break; + case csv_parse_state::before_last_quoted_field: + end_quoted_string_value(ec); + ++column_index_; + state_ = csv_parse_state::end_record; + break; + case csv_parse_state::escaped_value: + if (options_.quote_escape_char() == options_.quote_char()) + { + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + before_value(ec); + ++column_; + state_ = csv_parse_state::before_last_quoted_field; + } + else + { + state_ = csv_parse_state::end_record; + } + } + else + { + ec = csv_errc::invalid_escaped_char; + more_ = false; + return; + } + break; + case csv_parse_state::end_record: + if (column_index_ > 0) + { + after_record(ec); + } + state_ = csv_parse_state::no_more_records; + break; + case csv_parse_state::no_more_records: + switch (stack_.back()) + { + case csv_mode::header: + stack_.pop_back(); + break; + case csv_mode::data: + stack_.pop_back(); + break; + default: + break; + } + more_ = visitor_->end_array(*this, ec); + if (options_.mapping_kind() == csv_mapping_kind::m_columns) + { + if (!m_columns_filter_.done()) + { + more_ = m_columns_filter_.replay_parse_events(visitor); + } + else + { + state_ = csv_parse_state::accept; + } + } + else + { + state_ = csv_parse_state::accept; + } + break; + case csv_parse_state::accept: + if (!(stack_.size() == 1 && stack_.back() == csv_mode::initial)) + { + err_handler_(csv_errc::unexpected_eof, *this); + ec = csv_errc::unexpected_eof; + more_ = false; + return; + } + stack_.pop_back(); + visitor_->flush(); + state_ = csv_parse_state::done; + more_ = false; + return; + default: + state_ = csv_parse_state::end_record; + break; + } + } + + for (; (input_ptr_ < local_input_end) && more_;) + { + CharT curr_char = *input_ptr_; + + switch (state_) + { + case csv_parse_state::cr: + ++line_; + column_ = 1; + switch (*input_ptr_) + { + case '\n': + ++input_ptr_; + state_ = pop_state(); + break; + default: + state_ = pop_state(); + break; + } + break; + case csv_parse_state::start: + if (options_.mapping_kind() != csv_mapping_kind::m_columns) + { + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + } + if (options_.assume_header() && options_.mapping_kind() == csv_mapping_kind::n_rows && options_.column_names().size() > 0) + { + column_index_ = 0; + state_ = csv_parse_state::column_labels; + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + state_ = csv_parse_state::expect_comment_or_record; + } + else + { + state_ = csv_parse_state::expect_comment_or_record; + } + break; + case csv_parse_state::column_labels: + if (column_index_ < column_names_.size()) + { + more_ = visitor_->string_value(column_names_[column_index_], semantic_tag::none, *this, ec); + ++column_index_; + } + else + { + more_ = visitor_->end_array(*this, ec); + state_ = csv_parse_state::expect_comment_or_record; + //stack_.back() = csv_mode::data; + column_index_ = 0; + } + break; + case csv_parse_state::comment: + switch (curr_char) + { + case '\n': + { + ++line_; + if (stack_.back() == csv_mode::header) + { + ++header_line_; + } + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + break; + } + case '\r': + ++line_; + if (stack_.back() == csv_mode::header) + { + ++header_line_; + } + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + push_state(state_); + state_ = csv_parse_state::cr; + break; + default: + ++column_; + break; + } + ++input_ptr_; + break; + + case csv_parse_state::expect_comment_or_record: + buffer_.clear(); + if (curr_char == options_.comment_starter()) + { + state_ = csv_parse_state::comment; + ++column_; + ++input_ptr_; + } + else + { + state_ = csv_parse_state::expect_record; + } + break; + case csv_parse_state::quoted_string: + { + if (curr_char == options_.quote_escape_char()) + { + state_ = csv_parse_state::escaped_value; + } + else if (curr_char == options_.quote_char()) + { + state_ = csv_parse_state::between_values; + } + else + { + buffer_.push_back(static_cast<CharT>(curr_char)); + } + } + ++column_; + ++input_ptr_; + break; + case csv_parse_state::escaped_value: + { + if (curr_char == options_.quote_char()) + { + buffer_.push_back(static_cast<CharT>(curr_char)); + state_ = csv_parse_state::quoted_string; + ++column_; + ++input_ptr_; + } + else if (options_.quote_escape_char() == options_.quote_char()) + { + state_ = csv_parse_state::between_values; + } + else + { + ec = csv_errc::invalid_escaped_char; + more_ = false; + return; + } + } + break; + case csv_parse_state::between_values: + switch (curr_char) + { + case '\r': + case '\n': + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + before_value(ec); + state_ = csv_parse_state::before_last_quoted_field; + } + else + { + state_ = csv_parse_state::end_record; + } + break; + } + default: + if (curr_char == options_.field_delimiter()) + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + before_value(ec); + state_ = csv_parse_state::before_quoted_field; + } + else if (options_.subfield_delimiter() != char_type() && curr_char == options_.subfield_delimiter()) + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + before_value(ec); + state_ = csv_parse_state::before_quoted_subfield; + } + else if (curr_char == ' ' || curr_char == '\t') + { + ++column_; + ++input_ptr_; + } + else + { + ec = csv_errc::unexpected_char_between_fields; + more_ = false; + return; + } + break; + } + break; + case csv_parse_state::before_unquoted_string: + { + buffer_.clear(); + state_ = csv_parse_state::unquoted_string; + break; + } + case csv_parse_state::before_unquoted_field: + end_unquoted_string_value(ec); + state_ = csv_parse_state::before_unquoted_field_tail; + break; + case csv_parse_state::before_unquoted_field_tail: + { + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + ++column_index_; + state_ = csv_parse_state::before_unquoted_string; + ++column_; + ++input_ptr_; + break; + } + case csv_parse_state::before_unquoted_field_tail1: + { + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + state_ = csv_parse_state::end_record; + ++column_; + ++input_ptr_; + break; + } + + case csv_parse_state::before_last_unquoted_field: + end_unquoted_string_value(ec); + state_ = csv_parse_state::before_last_unquoted_field_tail; + break; + + case csv_parse_state::before_last_unquoted_field_tail: + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + ++column_index_; + state_ = csv_parse_state::end_record; + break; + + case csv_parse_state::before_unquoted_subfield: + if (stack_.back() == csv_mode::data) + { + stack_.push_back(csv_mode::subfields); + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + } + state_ = csv_parse_state::before_unquoted_subfield_tail; + break; + case csv_parse_state::before_unquoted_subfield_tail: + end_unquoted_string_value(ec); + state_ = csv_parse_state::before_unquoted_string; + ++column_; + ++input_ptr_; + break; + case csv_parse_state::before_quoted_field: + end_quoted_string_value(ec); + state_ = csv_parse_state::before_unquoted_field_tail; // return to unquoted + break; + case csv_parse_state::before_quoted_subfield: + if (stack_.back() == csv_mode::data) + { + stack_.push_back(csv_mode::subfields); + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + } + state_ = csv_parse_state::before_quoted_subfield_tail; + break; + case csv_parse_state::before_quoted_subfield_tail: + end_quoted_string_value(ec); + state_ = csv_parse_state::before_unquoted_string; + ++column_; + ++input_ptr_; + break; + case csv_parse_state::before_last_quoted_field: + end_quoted_string_value(ec); + state_ = csv_parse_state::before_last_quoted_field_tail; + break; + case csv_parse_state::before_last_quoted_field_tail: + if (stack_.back() == csv_mode::subfields) + { + stack_.pop_back(); + more_ = visitor_->end_array(*this, ec); + } + ++column_index_; + state_ = csv_parse_state::end_record; + break; + case csv_parse_state::unquoted_string: + { + switch (curr_char) + { + case '\n': + case '\r': + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + before_value(ec); + state_ = csv_parse_state::before_last_unquoted_field; + } + else + { + state_ = csv_parse_state::end_record; + } + break; + } + default: + if (curr_char == options_.field_delimiter()) + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + before_value(ec); + state_ = csv_parse_state::before_unquoted_field; + } + else if (options_.subfield_delimiter() != char_type() && curr_char == options_.subfield_delimiter()) + { + if (options_.trim_leading() || options_.trim_trailing()) + { + trim_string_buffer(options_.trim_leading(),options_.trim_trailing()); + } + before_value(ec); + state_ = csv_parse_state::before_unquoted_subfield; + } + else if (curr_char == options_.quote_char()) + { + buffer_.clear(); + state_ = csv_parse_state::quoted_string; + ++column_; + ++input_ptr_; + } + else + { + buffer_.push_back(static_cast<CharT>(curr_char)); + ++column_; + ++input_ptr_; + } + break; + } + break; + } + case csv_parse_state::expect_record: + { + switch (curr_char) + { + case '\n': + { + if (!options_.ignore_empty_lines()) + { + before_record(ec); + state_ = csv_parse_state::end_record; + } + else + { + ++line_; + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + ++input_ptr_; + } + break; + } + case '\r': + if (!options_.ignore_empty_lines()) + { + before_record(ec); + state_ = csv_parse_state::end_record; + } + else + { + ++line_; + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + ++input_ptr_; + push_state(state_); + state_ = csv_parse_state::cr; + } + break; + case ' ': + case '\t': + if (!options_.trim_leading()) + { + buffer_.push_back(static_cast<CharT>(curr_char)); + before_record(ec); + state_ = csv_parse_state::unquoted_string; + } + ++column_; + ++input_ptr_; + break; + default: + before_record(ec); + if (curr_char == options_.quote_char()) + { + buffer_.clear(); + state_ = csv_parse_state::quoted_string; + ++column_; + ++input_ptr_; + } + else + { + state_ = csv_parse_state::unquoted_string; + } + break; + } + break; + } + case csv_parse_state::end_record: + { + switch (curr_char) + { + case '\n': + { + ++line_; + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + after_record(ec); + ++input_ptr_; + break; + } + case '\r': + ++line_; + column_ = 1; + state_ = csv_parse_state::expect_comment_or_record; + after_record(ec); + push_state(state_); + state_ = csv_parse_state::cr; + ++input_ptr_; + break; + case ' ': + case '\t': + ++column_; + ++input_ptr_; + break; + default: + err_handler_(csv_errc::syntax_error, *this); + ec = csv_errc::syntax_error; + more_ = false; + return; + } + break; + } + default: + err_handler_(csv_errc::invalid_parse_state, *this); + ec = csv_errc::invalid_parse_state; + more_ = false; + return; + } + if (line_ > options_.max_lines()) + { + state_ = csv_parse_state::done; + more_ = false; + } + } + } + + void finish_parse() + { + std::error_code ec; + finish_parse(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,line_,column_)); + } + } + + void finish_parse(std::error_code& ec) + { + while (more_) + { + parse_some(ec); + } + } + + csv_parse_state state() const + { + return state_; + } + + void update(const string_view_type sv) + { + update(sv.data(),sv.length()); + } + + void update(const CharT* data, std::size_t length) + { + begin_input_ = data; + input_end_ = data + length; + input_ptr_ = begin_input_; + } + + std::size_t line() const override + { + return line_; + } + + std::size_t column() const override + { + return column_; + } + +private: + void initialize() + { + jsoncons::csv::detail::parse_column_names(options_.column_names(), column_names_); + jsoncons::csv::detail::parse_column_types(options_.column_types(), column_types_); + jsoncons::csv::detail::parse_column_names(options_.column_defaults(), column_defaults_); + + stack_.reserve(default_depth); + stack_.push_back(csv_mode::initial); + stack_.push_back((options_.header_lines() > 0) ? csv_mode::header + : csv_mode::data); + } + + // name + void before_value(std::error_code& ec) + { + switch (stack_.back()) + { + case csv_mode::header: + if (options_.trim_leading_inside_quotes() || options_.trim_trailing_inside_quotes()) + { + trim_string_buffer(options_.trim_leading_inside_quotes(),options_.trim_trailing_inside_quotes()); + } + if (line_ == header_line_) + { + column_names_.push_back(buffer_); + if (options_.assume_header() && options_.mapping_kind() == csv_mapping_kind::n_rows) + { + more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec); + } + } + break; + case csv_mode::data: + if (options_.mapping_kind() == csv_mapping_kind::n_objects) + { + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + if (column_index_ < column_names_.size() + offset_) + { + more_ = visitor_->key(column_names_[column_index_ - offset_], *this, ec); + } + } + } + break; + default: + break; + } + } + + // begin_array or begin_record + void before_record(std::error_code& ec) + { + offset_ = 0; + + switch (stack_.back()) + { + case csv_mode::header: + if (options_.assume_header() && line_ == header_line_) + { + if (options_.mapping_kind() == csv_mapping_kind::n_rows) + { + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + } + } + break; + case csv_mode::data: + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + break; + case csv_mapping_kind::n_objects: + more_ = visitor_->begin_object(semantic_tag::none, *this, ec); + break; + case csv_mapping_kind::m_columns: + break; + default: + break; + } + break; + default: + break; + } + } + + // end_array, begin_array, string_value (headers) + void after_record(std::error_code& ec) + { + if (column_types_.size() > 0) + { + if (level_ > 0) + { + more_ = visitor_->end_array(*this, ec); + level_ = 0; + } + } + switch (stack_.back()) + { + case csv_mode::header: + if (line_ >= options_.header_lines()) + { + stack_.back() = csv_mode::data; + } + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + if (options_.assume_header()) + { + more_ = visitor_->end_array(*this, ec); + } + break; + case csv_mapping_kind::m_columns: + m_columns_filter_.initialize(column_names_); + break; + default: + break; + } + break; + case csv_mode::data: + case csv_mode::subfields: + { + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + more_ = visitor_->end_array(*this, ec); + break; + case csv_mapping_kind::n_objects: + more_ = visitor_->end_object(*this, ec); + break; + case csv_mapping_kind::m_columns: + more_ = visitor_->end_array(*this, ec); + break; + } + break; + } + default: + break; + } + column_index_ = 0; + } + + void trim_string_buffer(bool trim_leading, bool trim_trailing) + { + std::size_t start = 0; + std::size_t length = buffer_.length(); + if (trim_leading) + { + bool done = false; + while (!done && start < buffer_.length()) + { + if ((buffer_[start] < 256) && std::isspace(buffer_[start])) + { + ++start; + } + else + { + done = true; + } + } + } + if (trim_trailing) + { + bool done = false; + while (!done && length > 0) + { + if ((buffer_[length-1] < 256) && std::isspace(buffer_[length-1])) + { + --length; + } + else + { + done = true; + } + } + } + if (start != 0 || length != buffer_.size()) + { + buffer_ = buffer_.substr(start,length-start); + } + } + + /* + end_array, begin_array, xxx_value (end_value) + */ + void end_unquoted_string_value(std::error_code& ec) + { + switch (stack_.back()) + { + case csv_mode::data: + case csv_mode::subfields: + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(options_.infer_types(), ec); + } + break; + case csv_mapping_kind::n_objects: + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + if (column_index_ < column_names_.size() + offset_) + { + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(options_.infer_types(), ec); + } + } + else if (level_ > 0) + { + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(options_.infer_types(), ec); + } + } + } + break; + case csv_mapping_kind::m_columns: + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + end_value(options_.infer_types(), ec); + } + else + { + m_columns_filter_.skip_column(); + } + break; + } + break; + default: + break; + } + } + + void end_quoted_string_value(std::error_code& ec) + { + switch (stack_.back()) + { + case csv_mode::data: + case csv_mode::subfields: + if (options_.trim_leading_inside_quotes() || options_.trim_trailing_inside_quotes()) + { + trim_string_buffer(options_.trim_leading_inside_quotes(),options_.trim_trailing_inside_quotes()); + } + switch (options_.mapping_kind()) + { + case csv_mapping_kind::n_rows: + end_value(false, ec); + break; + case csv_mapping_kind::n_objects: + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + if (column_index_ < column_names_.size() + offset_) + { + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(false, ec); + } + } + else if (level_ > 0) + { + if (options_.unquoted_empty_value_is_null() && buffer_.length() == 0) + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + else + { + end_value(false, ec); + } + } + } + break; + case csv_mapping_kind::m_columns: + if (!(options_.ignore_empty_values() && buffer_.empty())) + { + end_value(false, ec); + } + else + { + m_columns_filter_.skip_column(); + } + break; + } + break; + default: + break; + } + } + + void end_value(bool infer_types, std::error_code& ec) + { + auto it = std::find_if(string_double_map_.begin(), string_double_map_.end(), string_maps_to_double{ buffer_ }); + if (it != string_double_map_.end()) + { + more_ = visitor_->double_value(it->second, semantic_tag::none, *this, ec); + } + else if (column_index_ < column_types_.size() + offset_) + { + if (column_types_[column_index_ - offset_].col_type == csv_column_type::repeat_t) + { + offset_ = offset_ + column_types_[column_index_ - offset_].rep_count; + if (column_index_ - offset_ + 1 < column_types_.size()) + { + if (column_index_ == offset_ || level_ > column_types_[column_index_-offset_].level) + { + more_ = visitor_->end_array(*this, ec); + } + level_ = column_index_ == offset_ ? 0 : column_types_[column_index_ - offset_].level; + } + } + if (level_ < column_types_[column_index_ - offset_].level) + { + more_ = visitor_->begin_array(semantic_tag::none, *this, ec); + level_ = column_types_[column_index_ - offset_].level; + } + else if (level_ > column_types_[column_index_ - offset_].level) + { + more_ = visitor_->end_array(*this, ec); + level_ = column_types_[column_index_ - offset_].level; + } + switch (column_types_[column_index_ - offset_].col_type) + { + case csv_column_type::integer_t: + { + std::basic_istringstream<CharT,std::char_traits<CharT>,char_allocator_type> iss{buffer_}; + int64_t val; + iss >> val; + if (!iss.fail()) + { + more_ = visitor_->int64_value(val, semantic_tag::none, *this, ec); + } + else + { + if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0) + { + basic_json_parser<CharT,temp_allocator_type> parser(alloc_); + parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length()); + parser.parse_some(*visitor_); + parser.finish_parse(*visitor_); + } + else + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + } + } + break; + case csv_column_type::float_t: + { + if (options_.lossless_number()) + { + more_ = visitor_->string_value(buffer_,semantic_tag::bigdec, *this, ec); + } + else + { + std::basic_istringstream<CharT, std::char_traits<CharT>, char_allocator_type> iss{ buffer_ }; + double val; + iss >> val; + if (!iss.fail()) + { + more_ = visitor_->double_value(val, semantic_tag::none, *this, ec); + } + else + { + if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0) + { + basic_json_parser<CharT,temp_allocator_type> parser(alloc_); + parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length()); + parser.parse_some(*visitor_); + parser.finish_parse(*visitor_); + } + else + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + } + } + } + break; + case csv_column_type::boolean_t: + { + if (buffer_.length() == 1 && buffer_[0] == '0') + { + more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec); + } + else if (buffer_.length() == 1 && buffer_[0] == '1') + { + more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec); + } + else if (buffer_.length() == 5 && ((buffer_[0] == 'f' || buffer_[0] == 'F') && (buffer_[1] == 'a' || buffer_[1] == 'A') && (buffer_[2] == 'l' || buffer_[2] == 'L') && (buffer_[3] == 's' || buffer_[3] == 'S') && (buffer_[4] == 'e' || buffer_[4] == 'E'))) + { + more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec); + } + else if (buffer_.length() == 4 && ((buffer_[0] == 't' || buffer_[0] == 'T') && (buffer_[1] == 'r' || buffer_[1] == 'R') && (buffer_[2] == 'u' || buffer_[2] == 'U') && (buffer_[3] == 'e' || buffer_[3] == 'E'))) + { + more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec); + } + else + { + if (column_index_ - offset_ < column_defaults_.size() && column_defaults_[column_index_ - offset_].length() > 0) + { + basic_json_parser<CharT,temp_allocator_type> parser(alloc_); + parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length()); + parser.parse_some(*visitor_); + parser.finish_parse(*visitor_); + } + else + { + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + } + } + } + break; + default: + if (buffer_.length() > 0) + { + more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec); + } + else + { + if (column_index_ < column_defaults_.size() + offset_ && column_defaults_[column_index_ - offset_].length() > 0) + { + basic_json_parser<CharT,temp_allocator_type> parser(alloc_); + parser.update(column_defaults_[column_index_ - offset_].data(),column_defaults_[column_index_ - offset_].length()); + parser.parse_some(*visitor_); + parser.finish_parse(*visitor_); + } + else + { + more_ = visitor_->string_value(string_view_type(), semantic_tag::none, *this, ec); + } + } + break; + } + } + else + { + if (infer_types) + { + end_value_with_numeric_check(ec); + } + else + { + more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec); + } + } + } + + enum class numeric_check_state + { + initial, + null, + boolean_true, + boolean_false, + minus, + zero, + integer, + fraction1, + fraction, + exp1, + exp, + not_a_number + }; + + /* + xxx_value + */ + void end_value_with_numeric_check(std::error_code& ec) + { + numeric_check_state state = numeric_check_state::initial; + bool is_negative = false; + int precision = 0; + uint8_t decimal_places = 0; + + auto last = buffer_.end(); + + std::string buffer; + for (auto p = buffer_.begin(); state != numeric_check_state::not_a_number && p != last; ++p) + { + switch (state) + { + case numeric_check_state::initial: + { + switch (*p) + { + case 'n':case 'N': + if ((last-p) == 4 && (p[1] == 'u' || p[1] == 'U') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 'l' || p[3] == 'L')) + { + state = numeric_check_state::null; + } + else + { + state = numeric_check_state::not_a_number; + } + break; + case 't':case 'T': + if ((last-p) == 4 && (p[1] == 'r' || p[1] == 'R') && (p[2] == 'u' || p[2] == 'U') && (p[3] == 'e' || p[3] == 'U')) + { + state = numeric_check_state::boolean_true; + } + else + { + state = numeric_check_state::not_a_number; + } + break; + case 'f':case 'F': + if ((last-p) == 5 && (p[1] == 'a' || p[1] == 'A') && (p[2] == 'l' || p[2] == 'L') && (p[3] == 's' || p[3] == 'S') && (p[4] == 'e' || p[4] == 'E')) + { + state = numeric_check_state::boolean_false; + } + else + { + state = numeric_check_state::not_a_number; + } + break; + case '-': + is_negative = true; + buffer.push_back(*p); + state = numeric_check_state::minus; + break; + case '0': + ++precision; + buffer.push_back(*p); + state = numeric_check_state::zero; + break; + case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + buffer.push_back(*p); + state = numeric_check_state::integer; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::zero: + { + switch (*p) + { + case '.': + buffer.push_back(to_double_.get_decimal_point()); + state = numeric_check_state::fraction1; + break; + case 'e':case 'E': + buffer.push_back(*p); + state = numeric_check_state::exp1; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::integer: + { + switch (*p) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + buffer.push_back(*p); + break; + case '.': + buffer.push_back(to_double_.get_decimal_point()); + state = numeric_check_state::fraction1; + break; + case 'e':case 'E': + buffer.push_back(*p); + state = numeric_check_state::exp1; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::minus: + { + switch (*p) + { + case '0': + ++precision; + buffer.push_back(*p); + state = numeric_check_state::zero; + break; + case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + buffer.push_back(*p); + state = numeric_check_state::integer; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::fraction1: + { + switch (*p) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + ++decimal_places; + buffer.push_back(*p); + state = numeric_check_state::fraction; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::fraction: + { + switch (*p) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + ++precision; + ++decimal_places; + buffer.push_back(*p); + break; + case 'e':case 'E': + buffer.push_back(*p); + state = numeric_check_state::exp1; + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::exp1: + { + switch (*p) + { + case '-': + buffer.push_back(*p); + break; + case '+': + break; + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + state = numeric_check_state::exp; + buffer.push_back(*p); + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + case numeric_check_state::exp: + { + switch (*p) + { + case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9': + buffer.push_back(*p); + break; + default: + state = numeric_check_state::not_a_number; + break; + } + break; + } + default: + break; + } + } + + switch (state) + { + case numeric_check_state::null: + more_ = visitor_->null_value(semantic_tag::none, *this, ec); + break; + case numeric_check_state::boolean_true: + more_ = visitor_->bool_value(true, semantic_tag::none, *this, ec); + break; + case numeric_check_state::boolean_false: + more_ = visitor_->bool_value(false, semantic_tag::none, *this, ec); + break; + case numeric_check_state::zero: + case numeric_check_state::integer: + { + if (is_negative) + { + int64_t val{ 0 }; + auto result = jsoncons::detail::to_integer_decimal(buffer_.data(), buffer_.length(), val); + if (result) + { + more_ = visitor_->int64_value(val, semantic_tag::none, *this, ec); + } + else // Must be overflow + { + more_ = visitor_->string_value(buffer_, semantic_tag::bigint, *this, ec); + } + } + else + { + uint64_t val{ 0 }; + auto result = jsoncons::detail::to_integer_decimal(buffer_.data(), buffer_.length(), val); + if (result) + { + more_ = visitor_->uint64_value(val, semantic_tag::none, *this, ec); + } + else if (result.ec == jsoncons::detail::to_integer_errc::overflow) + { + more_ = visitor_->string_value(buffer_, semantic_tag::bigint, *this, ec); + } + else + { + ec = result.ec; + more_ = false; + return; + } + } + break; + } + case numeric_check_state::fraction: + case numeric_check_state::exp: + { + if (options_.lossless_number()) + { + more_ = visitor_->string_value(buffer_,semantic_tag::bigdec, *this, ec); + } + else + { + double d = to_double_(buffer.c_str(), buffer.length()); + more_ = visitor_->double_value(d, semantic_tag::none, *this, ec); + } + break; + } + default: + { + more_ = visitor_->string_value(buffer_, semantic_tag::none, *this, ec); + break; + } + } + } + + void push_state(csv_parse_state state) + { + state_stack_.push_back(state); + } + + csv_parse_state pop_state() + { + JSONCONS_ASSERT(!state_stack_.empty()) + csv_parse_state state = state_stack_.back(); + state_stack_.pop_back(); + return state; + } +}; + +using csv_parser = basic_csv_parser<char>; +using wcsv_parser = basic_csv_parser<wchar_t>; + +}} + +#endif + diff --git a/include/jsoncons_ext/csv/csv_reader.hpp b/include/jsoncons_ext/csv/csv_reader.hpp new file mode 100644 index 0000000..f10211a --- /dev/null +++ b/include/jsoncons_ext/csv/csv_reader.hpp @@ -0,0 +1,348 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_READER_HPP +#define JSONCONS_CSV_CSV_READER_HPP + +#include <string> +#include <vector> +#include <stdexcept> +#include <memory> // std::allocator +#include <utility> // std::move +#include <istream> // std::basic_istream +#include <jsoncons/source.hpp> +#include <jsoncons/json_exception.hpp> +#include <jsoncons/json_visitor.hpp> +#include <jsoncons_ext/csv/csv_error.hpp> +#include <jsoncons_ext/csv/csv_parser.hpp> +#include <jsoncons/json.hpp> +#include <jsoncons/json_reader.hpp> +#include <jsoncons/json_decoder.hpp> +#include <jsoncons/source_adaptor.hpp> +#include <jsoncons_ext/csv/csv_options.hpp> + +namespace jsoncons { namespace csv { + + template<class CharT,class Source=jsoncons::stream_source<CharT>,class Allocator=std::allocator<char>> + class basic_csv_reader + { + struct stack_item + { + stack_item() noexcept + : array_begun_(false) + { + } + + bool array_begun_; + }; + using char_type = CharT; + using temp_allocator_type = Allocator; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type; + + basic_csv_reader(const basic_csv_reader&) = delete; + basic_csv_reader& operator = (const basic_csv_reader&) = delete; + + basic_default_json_visitor<CharT> default_visitor_; + text_source_adaptor<Source> source_; + basic_json_visitor<CharT>& visitor_; + basic_csv_parser<CharT,Allocator> parser_; + + public: + // Structural characters + static constexpr size_t default_max_buffer_size = 16384; + //! Parse an input stream of CSV text into a json object + /*! + \param is The input stream to read from + */ + + template <class Sourceable> + basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const Allocator& alloc = Allocator()) + + : basic_csv_reader(std::forward<Sourceable>(source), + visitor, + basic_csv_decode_options<CharT>(), + default_csv_parsing(), + alloc) + { + } + + template <class Sourceable> + basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + const Allocator& alloc = Allocator()) + + : basic_csv_reader(std::forward<Sourceable>(source), + visitor, + options, + default_csv_parsing(), + alloc) + { + } + + template <class Sourceable> + basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator()) + : basic_csv_reader(std::forward<Sourceable>(source), + visitor, + basic_csv_decode_options<CharT>(), + err_handler, + alloc) + { + } + + template <class Sourceable> + basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator()) + : source_(std::forward<Sourceable>(source)), + visitor_(visitor), + parser_(options, err_handler, alloc) + + { + } + + ~basic_csv_reader() noexcept = default; + + void read() + { + std::error_code ec; + read(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read(std::error_code& ec) + { + read_internal(ec); + } + + std::size_t line() const + { + return parser_.line(); + } + + std::size_t column() const + { + return parser_.column(); + } + + bool eof() const + { + return parser_.source_exhausted() && source_.eof(); + } + + private: + + void read_internal(std::error_code& ec) + { + if (source_.is_error()) + { + ec = csv_errc::source_error; + return; + } + while (!parser_.stopped()) + { + if (parser_.source_exhausted()) + { + auto s = source_.read_buffer(ec); + if (ec) return; + if (s.size() > 0) + { + parser_.update(s.data(),s.size()); + } + } + parser_.parse_some(visitor_, ec); + if (ec) return; + } + } + }; + + template<class CharT,class Source=jsoncons::stream_source<CharT>,class Allocator=std::allocator<char>> + class legacy_basic_csv_reader + { + struct stack_item + { + stack_item() noexcept + : array_begun_(false) + { + } + + bool array_begun_; + }; + using char_type = CharT; + using temp_allocator_type = Allocator; + typedef typename std::allocator_traits<temp_allocator_type>:: template rebind_alloc<CharT> char_allocator_type; + + legacy_basic_csv_reader(const legacy_basic_csv_reader&) = delete; + legacy_basic_csv_reader& operator = (const legacy_basic_csv_reader&) = delete; + + basic_default_json_visitor<CharT> default_visitor_; + text_source_adaptor<Source> source_; + basic_json_visitor<CharT>& visitor_; + basic_csv_parser<CharT,Allocator> parser_; + + public: + // Structural characters + static constexpr size_t default_max_buffer_size = 16384; + //! Parse an input stream of CSV text into a json object + /*! + \param is The input stream to read from + */ + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const Allocator& alloc = Allocator()) + + : legacy_basic_csv_reader(std::forward<Sourceable>(source), + visitor, + basic_csv_decode_options<CharT>(), + default_csv_parsing(), + alloc) + { + } + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + const Allocator& alloc = Allocator()) + + : legacy_basic_csv_reader(std::forward<Sourceable>(source), + visitor, + options, + default_csv_parsing(), + alloc) + { + } + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator()) + : legacy_basic_csv_reader(std::forward<Sourceable>(source), + visitor, + basic_csv_decode_options<CharT>(), + err_handler, + alloc) + { + } + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator(), + typename std::enable_if<!std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(std::forward<Sourceable>(source)), + visitor_(visitor), + parser_(options, err_handler, alloc) + { + } + + template <class Sourceable> + legacy_basic_csv_reader(Sourceable&& source, + basic_json_visitor<CharT>& visitor, + const basic_csv_decode_options<CharT>& options, + std::function<bool(csv_errc,const ser_context&)> err_handler, + const Allocator& alloc = Allocator(), + typename std::enable_if<std::is_constructible<jsoncons::basic_string_view<CharT>,Sourceable>::value>::type* = 0) + : source_(), + visitor_(visitor), + parser_(options, err_handler, alloc) + { + jsoncons::basic_string_view<CharT> sv(std::forward<Sourceable>(source)); + auto r = unicode_traits::detect_encoding_from_bom(sv.data(), sv.size()); + if (!(r.encoding == unicode_traits::encoding_kind::utf8 || r.encoding == unicode_traits::encoding_kind::undetected)) + { + JSONCONS_THROW(ser_error(json_errc::illegal_unicode_character,parser_.line(),parser_.column())); + } + std::size_t offset = (r.ptr - sv.data()); + parser_.update(sv.data()+offset,sv.size()-offset); + } + + ~legacy_basic_csv_reader() noexcept = default; + + void read() + { + std::error_code ec; + read(ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec,parser_.line(),parser_.column())); + } + } + + void read(std::error_code& ec) + { + read_internal(ec); + } + + std::size_t line() const + { + return parser_.line(); + } + + std::size_t column() const + { + return parser_.column(); + } + + bool eof() const + { + return parser_.source_exhausted() && source_.eof(); + } + + private: + + void read_internal(std::error_code& ec) + { + if (source_.is_error()) + { + ec = csv_errc::source_error; + return; + } + while (!parser_.stopped()) + { + if (parser_.source_exhausted()) + { + auto s = source_.read_buffer(ec); + if (ec) return; + if (s.size() > 0) + { + parser_.update(s.data(),s.size()); + } + } + parser_.parse_some(visitor_, ec); + if (ec) return; + } + } + }; + +#if !defined(JSONCONS_NO_DEPRECATED) + using csv_reader = legacy_basic_csv_reader<char>; + using wcsv_reader = legacy_basic_csv_reader<wchar_t>; +#endif + + using csv_string_reader = basic_csv_reader<char,string_source<char>>; + using wcsv_string_reader = basic_csv_reader<wchar_t,string_source<wchar_t>>; + using csv_stream_reader = basic_csv_reader<char,stream_source<char>>; + using wcsv_stream_reader = basic_csv_reader<wchar_t,stream_source<wchar_t>>; + +}} + +#endif diff --git a/include/jsoncons_ext/csv/csv_serializer.hpp b/include/jsoncons_ext/csv/csv_serializer.hpp new file mode 100644 index 0000000..ec73510 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_serializer.hpp @@ -0,0 +1,12 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_SERIALIZER_HPP +#define JSONCONS_CSV_CSV_SERIALIZER_HPP + +#include <jsoncons_ext/csv/csv_encoder.hpp> + +#endif diff --git a/include/jsoncons_ext/csv/decode_csv.hpp b/include/jsoncons_ext/csv/decode_csv.hpp new file mode 100644 index 0000000..b91c58b --- /dev/null +++ b/include/jsoncons_ext/csv/decode_csv.hpp @@ -0,0 +1,208 @@ +/// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_DECODE_CSV_HPP +#define JSONCONS_CSV_DECODE_CSV_HPP + +#include <jsoncons_ext/csv/csv_options.hpp> +#include <jsoncons_ext/csv/csv_reader.hpp> +#include <jsoncons_ext/csv/csv_encoder.hpp> +#include <jsoncons_ext/csv/csv_cursor.hpp> + +namespace jsoncons { +namespace csv { + + template <class T,class Source> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_sequence_of<Source,typename T::char_type>::value,T>::type + decode_csv(const Source& s, const basic_csv_decode_options<typename Source::value_type>& options = basic_csv_decode_options<typename Source::value_type>()) + { + using char_type = typename Source::value_type; + + json_decoder<T> decoder; + + basic_csv_reader<char_type,jsoncons::string_source<char_type>> reader(s,decoder,options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T,class Source> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_char_sequence<Source>::value,T>::type + decode_csv(const Source& s, const basic_csv_decode_options<typename Source::value_type>& options = basic_csv_decode_options<typename Source::value_type>()) + { + using char_type = typename Source::value_type; + + basic_csv_cursor<char_type> cursor(s, options); + jsoncons::json_decoder<basic_json<char_type>> decoder; + + std::error_code ec; + T val = decode_traits<T,char_type>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template <class T,class CharT> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_csv(std::basic_istream<CharT>& is, const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>()) + { + using char_type = CharT; + + json_decoder<T> decoder; + + basic_csv_reader<char_type,jsoncons::stream_source<char_type>> reader(is,decoder,options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T,class CharT> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_csv(std::basic_istream<CharT>& is, const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>()) + { + basic_csv_cursor<CharT> cursor(is, options); + jsoncons::json_decoder<basic_json<CharT>> decoder; + + std::error_code ec; + T val = decode_traits<T,CharT>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template <class T, class InputIt> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_csv(InputIt first, InputIt last, + const basic_csv_decode_options<typename std::iterator_traits<InputIt>::value_type>& options = + basic_csv_decode_options<typename std::iterator_traits<InputIt>::value_type>()) + { + using char_type = typename std::iterator_traits<InputIt>::value_type; + + jsoncons::json_decoder<T> decoder; + basic_csv_reader<char_type, iterator_source<InputIt>> reader(iterator_source<InputIt>(first,last), decoder, options); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T, class InputIt> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_csv(InputIt first, InputIt last, + const basic_csv_decode_options<typename std::iterator_traits<InputIt>::value_type>& options = + basic_csv_decode_options<typename std::iterator_traits<InputIt>::value_type>()) + { + using char_type = typename std::iterator_traits<InputIt>::value_type; + + basic_csv_cursor<char_type,iterator_source<InputIt>> cursor(iterator_source<InputIt>(first, last), options); + jsoncons::json_decoder<basic_json<char_type>> decoder; + std::error_code ec; + T val = decode_traits<T,char_type>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + // With leading allocator parameter + + template <class T,class Source,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_sequence_of<Source,typename T::char_type>::value,T>::type + decode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& s, + const basic_csv_decode_options<typename Source::value_type>& options = basic_csv_decode_options<typename Source::value_type>()) + { + using char_type = typename Source::value_type; + + json_decoder<T,TempAllocator> decoder(temp_alloc); + + basic_csv_reader<char_type,jsoncons::string_source<char_type>,TempAllocator> reader(s,decoder,options,temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T,class Source,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_char_sequence<Source>::value,T>::type + decode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const Source& s, + const basic_csv_decode_options<typename Source::value_type>& options = basic_csv_decode_options<typename Source::value_type>()) + { + using char_type = typename Source::value_type; + + basic_csv_cursor<char_type,stream_source<char_type>,TempAllocator> cursor(s, options, temp_alloc); + json_decoder<basic_json<char_type,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,char_type>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + + template <class T,class CharT,class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,T>::type + decode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::basic_istream<CharT>& is, + const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>()) + { + using char_type = CharT; + + json_decoder<T,TempAllocator> decoder(temp_alloc); + + basic_csv_reader<char_type,jsoncons::string_source<char_type>,TempAllocator> reader(is,decoder,options,temp_alloc); + reader.read(); + if (!decoder.is_valid()) + { + JSONCONS_THROW(ser_error(conv_errc::conversion_failed, reader.line(), reader.column())); + } + return decoder.get_result(); + } + + template <class T,class CharT,class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,T>::type + decode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + std::basic_istream<CharT>& is, + const basic_csv_decode_options<CharT>& options = basic_csv_decode_options<CharT>()) + { + basic_csv_cursor<CharT,stream_source<CharT>,TempAllocator> cursor(is, options, temp_alloc); + json_decoder<basic_json<CharT,sorted_policy,TempAllocator>,TempAllocator> decoder(temp_alloc, temp_alloc); + + std::error_code ec; + T val = decode_traits<T,CharT>::decode(cursor, decoder, ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec, cursor.context().line(), cursor.context().column())); + } + return val; + } + +} // namespace csv +} // namespace jsoncons + +#endif diff --git a/include/jsoncons_ext/csv/encode_csv.hpp b/include/jsoncons_ext/csv/encode_csv.hpp new file mode 100644 index 0000000..d919253 --- /dev/null +++ b/include/jsoncons_ext/csv/encode_csv.hpp @@ -0,0 +1,122 @@ +/// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_ENCODE_CSV_HPP +#define JSONCONS_CSV_ENCODE_CSV_HPP + +#include <jsoncons_ext/csv/csv_options.hpp> +#include <jsoncons_ext/csv/csv_reader.hpp> +#include <jsoncons_ext/csv/csv_encoder.hpp> + +namespace jsoncons { +namespace csv { + + template <class T,class Container> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_char_container<Container>::value>::type + encode_csv(const T& j, Container& s, const basic_csv_encode_options<typename Container::value_type>& options = basic_csv_encode_options<typename Container::value_type>()) + { + using char_type = typename Container::value_type; + basic_csv_encoder<char_type,jsoncons::string_sink<std::basic_string<char_type>>> encoder(s,options); + j.dump(encoder); + } + + template <class T,class Container> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_char_container<Container>::value>::type + encode_csv(const T& val, Container& s, const basic_csv_encode_options<typename Container::value_type>& options = basic_csv_encode_options<typename Container::value_type>()) + { + using char_type = typename Container::value_type; + basic_csv_encoder<char_type,jsoncons::string_sink<std::basic_string<char_type>>> encoder(s,options); + std::error_code ec; + encode_traits<T,char_type>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template <class T, class CharT> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_csv(const T& j, std::basic_ostream<CharT>& os, const basic_csv_encode_options<CharT>& options = basic_csv_encode_options<CharT>()) + { + using char_type = CharT; + basic_csv_encoder<char_type,jsoncons::stream_sink<char_type>> encoder(os,options); + j.dump(encoder); + } + + template <class T, class CharT> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_csv(const T& val, std::basic_ostream<CharT>& os, const basic_csv_encode_options<CharT>& options = basic_csv_encode_options<CharT>()) + { + using char_type = CharT; + basic_csv_encoder<char_type,jsoncons::stream_sink<char_type>> encoder(os,options); + std::error_code ec; + encode_traits<T,CharT>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + // with temp_allocator_arg_t + + template <class T, class Container, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_char_container<Container>::value>::type + encode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, Container& s, const basic_csv_encode_options<typename Container::value_type>& options = basic_csv_encode_options<typename Container::value_type>()) + { + using char_type = typename Container::value_type; + basic_csv_encoder<char_type,jsoncons::string_sink<std::basic_string<char_type>>,TempAllocator> encoder(s, options, temp_alloc); + j.dump(encoder); + } + + template <class T, class Container, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value && + type_traits::is_back_insertable_char_container<Container>::value>::type + encode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, Container& s, const basic_csv_encode_options<typename Container::value_type>& options = basic_csv_encode_options<typename Container::value_type>()) + { + using char_type = typename Container::value_type; + basic_csv_encoder<char_type,jsoncons::string_sink<std::basic_string<char_type>>,TempAllocator> encoder(s, options, temp_alloc); + std::error_code ec; + encode_traits<T,char_type>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + + template <class T, class CharT, class TempAllocator> + typename std::enable_if<type_traits::is_basic_json<T>::value,void>::type + encode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& j, std::basic_ostream<CharT>& os, const basic_csv_encode_options<CharT>& options = basic_csv_encode_options<CharT>()) + { + using char_type = CharT; + basic_csv_encoder<char_type,jsoncons::stream_sink<char_type>,TempAllocator> encoder(os, options, temp_alloc); + j.dump(encoder); + } + + template <class T, class CharT, class TempAllocator> + typename std::enable_if<!type_traits::is_basic_json<T>::value,void>::type + encode_csv(temp_allocator_arg_t, const TempAllocator& temp_alloc, + const T& val, std::basic_ostream<CharT>& os, const basic_csv_encode_options<CharT>& options = basic_csv_encode_options<CharT>()) + { + using char_type = CharT; + basic_csv_encoder<char_type,jsoncons::stream_sink<char_type>,TempAllocator> encoder(os, options, temp_alloc); + std::error_code ec; + encode_traits<T,CharT>::encode(val, encoder, json(), ec); + if (ec) + { + JSONCONS_THROW(ser_error(ec)); + } + } + +} // namespace csv +} // namespace jsoncons + +#endif |