// Copyright 2013 Daniel Parker // Distributed under the Boost license, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // See https://github.com/danielaparker/jsoncons for latest version #ifndef JSONCONS_CSV_CSV_ENCODER_HPP #define JSONCONS_CSV_CSV_ENCODER_HPP #include // std::array #include #include #include #include // std::move #include // std::unordered_map #include // std::allocator #include // std::numeric_limits #include #include #include #include #include namespace jsoncons { namespace csv { template,class Allocator=std::allocator> class basic_csv_encoder final : public basic_json_visitor { public: using char_type = CharT; using typename basic_json_visitor::string_view_type; using sink_type = Sink; using allocator_type = Allocator; using char_allocator_type = typename std::allocator_traits:: template rebind_alloc; using string_type = std::basic_string, char_allocator_type>; using string_allocator_type = typename std::allocator_traits:: template rebind_alloc; using string_string_allocator_type = typename std::allocator_traits:: template rebind_alloc>; private: static jsoncons::basic_string_view null_constant() { static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"null"); return k; } static jsoncons::basic_string_view true_constant() { static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"true"); return k; } static jsoncons::basic_string_view false_constant() { static jsoncons::basic_string_view k = JSONCONS_STRING_VIEW_CONSTANT(CharT,"false"); return k; } enum class stack_item_kind { row_mapping, column_mapping, object, row, column, object_multi_valued_field, row_multi_valued_field, column_multi_valued_field }; struct stack_item { stack_item_kind item_kind_; std::size_t count_; stack_item(stack_item_kind item_kind) noexcept : item_kind_(item_kind), count_(0) { } bool is_object() const { return item_kind_ == stack_item_kind::object; } stack_item_kind item_kind() const { return item_kind_; } }; Sink sink_; const basic_csv_encode_options options_; allocator_type alloc_; std::vector stack_; jsoncons::detail::write_double fp_; std::vector strings_buffer_; std::unordered_map,std::equal_to,string_string_allocator_type> buffered_line_; string_type name_; std::size_t column_index_; std::vector row_counts_; // Noncopyable and nonmoveable basic_csv_encoder(const basic_csv_encoder&) = delete; basic_csv_encoder& operator=(const basic_csv_encoder&) = delete; public: basic_csv_encoder(Sink&& sink, const Allocator& alloc = Allocator()) : basic_csv_encoder(std::forward(sink), basic_csv_encode_options(), alloc) { } basic_csv_encoder(Sink&& sink, const basic_csv_encode_options& options, const Allocator& alloc = Allocator()) : sink_(std::forward(sink)), options_(options), alloc_(alloc), stack_(), fp_(options.float_format(), options.precision()), column_index_(0) { jsoncons::csv::detail::parse_column_names(options.column_names(), strings_buffer_); } ~basic_csv_encoder() noexcept { JSONCONS_TRY { sink_.flush(); } JSONCONS_CATCH(...) { } } void reset() { stack_.clear(); strings_buffer_.clear(); buffered_line_.clear(); name_.clear(); column_index_ = 0; row_counts_.clear(); } void reset(Sink&& sink) { sink_ = std::move(sink); reset(); } private: template void escape_string(const CharT* s, std::size_t length, CharT quote_char, CharT quote_escape_char, AnyWriter& sink) { const CharT* begin = s; const CharT* end = s + length; for (const CharT* it = begin; it != end; ++it) { CharT c = *it; if (c == quote_char) { sink.push_back(quote_escape_char); sink.push_back(quote_char); } else { sink.push_back(c); } } } void visit_flush() override { sink_.flush(); } bool visit_begin_object(semantic_tag, const ser_context&, std::error_code& ec) override { if (stack_.empty()) { stack_.emplace_back(stack_item_kind::column_mapping); return true; } switch (stack_.back().item_kind_) { case stack_item_kind::row_mapping: stack_.emplace_back(stack_item_kind::object); return true; default: // error ec = csv_errc::source_error; return false; } } bool visit_end_object(const ser_context&, std::error_code&) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::object: if (stack_[0].count_ == 0) { for (std::size_t i = 0; i < strings_buffer_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } sink_.append(strings_buffer_[i].data(), strings_buffer_[i].length()); } sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } for (std::size_t i = 0; i < strings_buffer_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } auto it = buffered_line_.find(strings_buffer_[i]); if (it != buffered_line_.end()) { sink_.append(it->second.data(),it->second.length()); it->second.clear(); } } sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); break; case stack_item_kind::column_mapping: { for (const auto& item : strings_buffer_) { sink_.append(item.data(), item.size()); sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } break; } default: break; } stack_.pop_back(); if (!stack_.empty()) { end_value(); } return true; } bool visit_begin_array(semantic_tag, const ser_context&, std::error_code& ec) override { if (stack_.empty()) { stack_.emplace_back(stack_item_kind::row_mapping); return true; } switch (stack_.back().item_kind_) { case stack_item_kind::row_mapping: stack_.emplace_back(stack_item_kind::row); if (stack_[0].count_ == 0) { for (std::size_t i = 0; i < strings_buffer_.size(); ++i) { if (i > 0) { sink_.push_back(options_.field_delimiter()); } sink_.append(strings_buffer_[i].data(),strings_buffer_[i].length()); } if (strings_buffer_.size() > 0) { sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); } } return true; case stack_item_kind::object: stack_.emplace_back(stack_item_kind::object_multi_valued_field); return true; case stack_item_kind::column_mapping: stack_.emplace_back(stack_item_kind::column); row_counts_.push_back(1); if (strings_buffer_.size() <= row_counts_.back()) { strings_buffer_.emplace_back(); } return true; case stack_item_kind::column: { if (strings_buffer_.size() <= row_counts_.back()) { strings_buffer_.emplace_back(); } jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); begin_value(bo); stack_.emplace_back(stack_item_kind::column_multi_valued_field); return true; } case stack_item_kind::row: begin_value(sink_); stack_.emplace_back(stack_item_kind::row_multi_valued_field); return true; default: // error ec = csv_errc::source_error; return false; } } bool visit_end_array(const ser_context&, std::error_code&) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::row: sink_.append(options_.line_delimiter().data(), options_.line_delimiter().length()); break; case stack_item_kind::column: ++column_index_; break; default: break; } stack_.pop_back(); if (!stack_.empty()) { end_value(); } return true; } bool visit_key(const string_view_type& name, const ser_context&, std::error_code&) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::object: { name_ = string_type(name); buffered_line_[string_type(name)] = std::basic_string(); if (stack_[0].count_ == 0 && options_.column_names().size() == 0) { strings_buffer_.emplace_back(name); } break; } case stack_item_kind::column_mapping: { if (strings_buffer_.empty()) { strings_buffer_.emplace_back(name); } else { strings_buffer_[0].push_back(options_.field_delimiter()); strings_buffer_[0].append(string_type(name)); } break; } default: break; } return true; } bool visit_null(semantic_tag, const ser_context&, std::error_code&) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { auto it = buffered_line_.find(name_); if (it != buffered_line_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); write_null_value(bo); bo.flush(); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } it->second.append(s); } break; } case stack_item_kind::row: case stack_item_kind::row_multi_valued_field: write_null_value(sink_); break; case stack_item_kind::column: { if (strings_buffer_.size() <= row_counts_.back()) { strings_buffer_.emplace_back(); } jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_null_value(bo); break; } case stack_item_kind::column_multi_valued_field: { jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_null_value(bo); break; } default: break; } return true; } bool visit_string(const string_view_type& sv, semantic_tag, const ser_context&, std::error_code&) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { auto it = buffered_line_.find(name_); if (it != buffered_line_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); write_string_value(sv,bo); bo.flush(); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } it->second.append(s); } break; } case stack_item_kind::row: case stack_item_kind::row_multi_valued_field: write_string_value(sv,sink_); break; case stack_item_kind::column: { if (strings_buffer_.size() <= row_counts_.back()) { strings_buffer_.emplace_back(); } jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_string_value(sv,bo); break; } case stack_item_kind::column_multi_valued_field: { jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_string_value(sv,bo); break; } default: break; } return true; } bool visit_byte_string(const byte_string_view& b, semantic_tag tag, const ser_context& context, std::error_code& ec) override { byte_string_chars_format encoding_hint; switch (tag) { case semantic_tag::base16: encoding_hint = byte_string_chars_format::base16; break; case semantic_tag::base64: encoding_hint = byte_string_chars_format::base64; break; case semantic_tag::base64url: encoding_hint = byte_string_chars_format::base64url; break; default: encoding_hint = byte_string_chars_format::none; break; } byte_string_chars_format format = jsoncons::detail::resolve_byte_string_chars_format(encoding_hint,byte_string_chars_format::none,byte_string_chars_format::base64url); std::basic_string s; switch (format) { case byte_string_chars_format::base16: { encode_base16(b.begin(),b.end(),s); visit_string(s, semantic_tag::none, context, ec); break; } case byte_string_chars_format::base64: { encode_base64(b.begin(),b.end(),s); visit_string(s, semantic_tag::none, context, ec); break; } case byte_string_chars_format::base64url: { encode_base64url(b.begin(),b.end(),s); visit_string(s, semantic_tag::none, context, ec); break; } default: { JSONCONS_UNREACHABLE(); } } return true; } bool visit_double(double val, semantic_tag, const ser_context& context, std::error_code& ec) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { auto it = buffered_line_.find(name_); if (it != buffered_line_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); write_double_value(val, context, bo, ec); bo.flush(); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } it->second.append(s); } break; } case stack_item_kind::row: case stack_item_kind::row_multi_valued_field: write_double_value(val, context, sink_, ec); break; case stack_item_kind::column: { if (strings_buffer_.size() <= row_counts_.back()) { strings_buffer_.emplace_back(); } jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_double_value(val, context, bo, ec); break; } case stack_item_kind::column_multi_valued_field: { jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_double_value(val, context, bo, ec); break; } default: break; } return true; } bool visit_int64(int64_t val, semantic_tag, const ser_context&, std::error_code&) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { auto it = buffered_line_.find(name_); if (it != buffered_line_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); write_int64_value(val,bo); bo.flush(); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } it->second.append(s); } break; } case stack_item_kind::row: case stack_item_kind::row_multi_valued_field: write_int64_value(val,sink_); break; case stack_item_kind::column: { if (strings_buffer_.size() <= row_counts_.back()) { strings_buffer_.emplace_back(); } jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_int64_value(val, bo); break; } case stack_item_kind::column_multi_valued_field: { jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_int64_value(val, bo); break; } default: break; } return true; } bool visit_uint64(uint64_t val, semantic_tag, const ser_context&, std::error_code&) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { auto it = buffered_line_.find(name_); if (it != buffered_line_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); write_uint64_value(val, bo); bo.flush(); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } it->second.append(s); } break; } case stack_item_kind::row: case stack_item_kind::row_multi_valued_field: write_uint64_value(val,sink_); break; case stack_item_kind::column: { if (strings_buffer_.size() <= row_counts_.back()) { strings_buffer_.emplace_back(); } jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_uint64_value(val, bo); break; } case stack_item_kind::column_multi_valued_field: { jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_uint64_value(val, bo); break; } default: break; } return true; } bool visit_bool(bool val, semantic_tag, const ser_context&, std::error_code&) override { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::object: case stack_item_kind::object_multi_valued_field: { auto it = buffered_line_.find(name_); if (it != buffered_line_.end()) { std::basic_string s; jsoncons::string_sink> bo(s); write_bool_value(val,bo); bo.flush(); if (!it->second.empty() && options_.subfield_delimiter() != char_type()) { it->second.push_back(options_.subfield_delimiter()); } it->second.append(s); } break; } case stack_item_kind::row: case stack_item_kind::row_multi_valued_field: write_bool_value(val,sink_); break; case stack_item_kind::column: { if (strings_buffer_.size() <= row_counts_.back()) { strings_buffer_.emplace_back(); } jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_bool_value(val, bo); break; } case stack_item_kind::column_multi_valued_field: { jsoncons::string_sink> bo(strings_buffer_[row_counts_.back()]); write_bool_value(val, bo); break; } default: break; } return true; } template bool do_string_value(const CharT* s, std::size_t length, AnyWriter& sink) { bool quote = false; if (options_.quote_style() == quote_style_kind::all || options_.quote_style() == quote_style_kind::nonnumeric || (options_.quote_style() == quote_style_kind::minimal && (std::char_traits::find(s, length, options_.field_delimiter()) != nullptr || std::char_traits::find(s, length, options_.quote_char()) != nullptr))) { quote = true; sink.push_back(options_.quote_char()); } escape_string(s, length, options_.quote_char(), options_.quote_escape_char(), sink); if (quote) { sink.push_back(options_.quote_char()); } return true; } template void write_string_value(const string_view_type& value, AnyWriter& sink) { begin_value(sink); do_string_value(value.data(),value.length(),sink); end_value(); } template void write_double_value(double val, const ser_context& context, AnyWriter& sink, std::error_code& ec) { begin_value(sink); if (!std::isfinite(val)) { if ((std::isnan)(val)) { if (options_.enable_nan_to_num()) { sink.append(options_.nan_to_num().data(), options_.nan_to_num().length()); } else if (options_.enable_nan_to_str()) { visit_string(options_.nan_to_str(), semantic_tag::none, context, ec); } else { sink.append(null_constant().data(), null_constant().size()); } } else if (val == std::numeric_limits::infinity()) { if (options_.enable_inf_to_num()) { sink.append(options_.inf_to_num().data(), options_.inf_to_num().length()); } else if (options_.enable_inf_to_str()) { visit_string(options_.inf_to_str(), semantic_tag::none, context, ec); } else { sink.append(null_constant().data(), null_constant().size()); } } else { if (options_.enable_neginf_to_num()) { sink.append(options_.neginf_to_num().data(), options_.neginf_to_num().length()); } else if (options_.enable_neginf_to_str()) { visit_string(options_.neginf_to_str(), semantic_tag::none, context, ec); } else { sink.append(null_constant().data(), null_constant().size()); } } } else { fp_(val, sink); } end_value(); } template void write_int64_value(int64_t val, AnyWriter& sink) { begin_value(sink); jsoncons::detail::from_integer(val,sink); end_value(); } template void write_uint64_value(uint64_t val, AnyWriter& sink) { begin_value(sink); jsoncons::detail::from_integer(val,sink); end_value(); } template void write_bool_value(bool val, AnyWriter& sink) { begin_value(sink); if (val) { sink.append(true_constant().data(), true_constant().size()); } else { sink.append(false_constant().data(), false_constant().size()); } end_value(); } template bool write_null_value(AnyWriter& sink) { begin_value(sink); sink.append(null_constant().data(), null_constant().size()); end_value(); return true; } template void begin_value(AnyWriter& sink) { JSONCONS_ASSERT(!stack_.empty()); switch (stack_.back().item_kind_) { case stack_item_kind::row: if (stack_.back().count_ > 0) { sink.push_back(options_.field_delimiter()); } break; case stack_item_kind::column: { if (row_counts_.size() >= 3) { for (std::size_t i = row_counts_.size()-2; i-- > 0;) { if (row_counts_[i] <= row_counts_.back()) { sink.push_back(options_.field_delimiter()); } else { break; } } } if (column_index_ > 0) { sink.push_back(options_.field_delimiter()); } break; } case stack_item_kind::row_multi_valued_field: case stack_item_kind::column_multi_valued_field: if (stack_.back().count_ > 0 && options_.subfield_delimiter() != char_type()) { sink.push_back(options_.subfield_delimiter()); } break; default: break; } } void end_value() { JSONCONS_ASSERT(!stack_.empty()); switch(stack_.back().item_kind_) { case stack_item_kind::row: { ++stack_.back().count_; break; } case stack_item_kind::column: { ++row_counts_.back(); break; } default: ++stack_.back().count_; break; } } }; using csv_stream_encoder = basic_csv_encoder; using csv_string_encoder = basic_csv_encoder>; using csv_wstream_encoder = basic_csv_encoder; using wcsv_string_encoder = basic_csv_encoder>; #if !defined(JSONCONS_NO_DEPRECATED) template, class Allocator = std::allocator> using basic_csv_serializer = basic_csv_encoder; JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_serializer; JSONCONS_DEPRECATED_MSG("Instead, use csv_string_encoder") typedef csv_string_encoder csv_string_serializer; JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_serializer; JSONCONS_DEPRECATED_MSG("Instead, use csv_string_encoder") typedef csv_string_encoder csv_string_serializer; JSONCONS_DEPRECATED_MSG("Instead, use csv_stream_encoder") typedef csv_stream_encoder csv_encoder; JSONCONS_DEPRECATED_MSG("Instead, use wcsv_stream_encoder") typedef csv_stream_encoder wcsv_encoder; #endif }} #endif