From 1d055261b4144dbf86b2658437015b15d4dd9bff Mon Sep 17 00:00:00 2001 From: Richard Date: Sun, 4 Sep 2022 00:32:56 +0100 Subject: initial --- include/jsoncons_ext/csv/csv_options.hpp | 973 +++++++++++++++++++++++++++++++ 1 file changed, 973 insertions(+) create mode 100644 include/jsoncons_ext/csv/csv_options.hpp (limited to 'include/jsoncons_ext/csv/csv_options.hpp') diff --git a/include/jsoncons_ext/csv/csv_options.hpp b/include/jsoncons_ext/csv/csv_options.hpp new file mode 100644 index 0000000..8bd2e22 --- /dev/null +++ b/include/jsoncons_ext/csv/csv_options.hpp @@ -0,0 +1,973 @@ +// Copyright 2013 Daniel Parker +// Distributed under the Boost license, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// See https://github.com/danielaparker/jsoncons for latest version + +#ifndef JSONCONS_CSV_CSV_OPTIONS_HPP +#define JSONCONS_CSV_CSV_OPTIONS_HPP + +#include +#include +#include // std::pair +#include // std::unordered_map +#include +#include // std::numeric_limits +#include +#include + +namespace jsoncons { namespace csv { + +enum class csv_column_type : uint8_t +{ + string_t,integer_t,float_t,boolean_t,repeat_t +}; + +enum class quote_style_kind : uint8_t +{ + minimal,all,nonnumeric,none +}; + +enum class csv_mapping_kind : uint8_t +{ + n_rows = 1, + n_objects, + m_columns +}; + +#if !defined(JSONCONS_NO_DEPRECATED) +using mapping_kind = csv_mapping_kind; +JSONCONS_DEPRECATED_MSG("Instead, use quote_style_kind") typedef quote_style_kind quote_styles; +JSONCONS_DEPRECATED_MSG("Instead, use quote_style_kind") typedef quote_style_kind quote_style_type; +JSONCONS_DEPRECATED_MSG("Instead, use csv_mapping_kind") typedef csv_mapping_kind mapping_type; +#endif + +enum class column_state {sequence,label}; + +struct csv_type_info +{ + csv_type_info() = default; + csv_type_info(const csv_type_info&) = default; + csv_type_info(csv_type_info&&) = default; + + csv_type_info(csv_column_type ctype, std::size_t lev, std::size_t repcount = 0) noexcept + { + col_type = ctype; + level = lev; + rep_count = repcount; + } + + csv_column_type col_type; + std::size_t level; + std::size_t rep_count; +}; + +namespace detail { + +template +void parse_column_names(const std::basic_string& names, + Container& cont) +{ + column_state state = column_state::sequence; + typename Container::value_type buffer(cont.get_allocator()); + + auto p = names.begin(); + while (p != names.end()) + { + switch (state) + { + case column_state::sequence: + { + switch (*p) + { + case ' ': case '\t':case '\r': case '\n': + ++p; + break; + default: + buffer.clear(); + state = column_state::label; + break; + } + break; + } + case column_state::label: + { + switch (*p) + { + case ',': + cont.push_back(buffer); + buffer.clear(); + ++p; + state = column_state::sequence; + break; + default: + buffer.push_back(*p); + ++p; + break; + } + break; + } + } + } + if (state == column_state::label) + { + cont.push_back(buffer); + buffer.clear(); + } +} + +template +void parse_column_types(const std::basic_string& types, + Container& column_types) +{ + const std::map,csv_column_type> type_dictionary = + { + + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"string"),csv_column_type::string_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"integer"),csv_column_type::integer_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"float"),csv_column_type::float_t}, + {JSONCONS_STRING_VIEW_CONSTANT(CharT,"boolean"),csv_column_type::boolean_t} + }; + + column_state state = column_state::sequence; + int depth = 0; + std::basic_string buffer; + + auto p = types.begin(); + while (p != types.end()) + { + switch (state) + { + case column_state::sequence: + { + switch (*p) + { + case ' ': case '\t':case '\r': case '\n': + ++p; + break; + case '[': + ++depth; + ++p; + break; + case ']': + JSONCONS_ASSERT(depth > 0); + --depth; + ++p; + break; + case '*': + { + JSONCONS_ASSERT(column_types.size() != 0); + std::size_t offset = 0; + std::size_t level = column_types.size() > 0 ? column_types.back().level: 0; + if (level > 0) + { + for (auto it = column_types.rbegin(); + it != column_types.rend() && level == it->level; + ++it) + { + ++offset; + } + } + else + { + offset = 1; + } + column_types.emplace_back(csv_column_type::repeat_t,depth,offset); + ++p; + break; + } + default: + buffer.clear(); + state = column_state::label; + break; + } + break; + } + case column_state::label: + { + switch (*p) + { + case '*': + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + state = column_state::sequence; + break; + } + case ',': + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + ++p; + state = column_state::sequence; + break; + } + case ']': + { + JSONCONS_ASSERT(depth > 0); + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + --depth; + ++p; + state = column_state::sequence; + break; + } + default: + { + buffer.push_back(*p); + ++p; + break; + } + } + break; + } + } + } + if (state == column_state::label) + { + auto it = type_dictionary.find(buffer); + if (it != type_dictionary.end()) + { + column_types.emplace_back(it->second,depth); + buffer.clear(); + } + else + { + JSONCONS_ASSERT(false); + } + } +} + +} // detail + +template +class basic_csv_options; + +template +class basic_csv_options_common +{ + friend class basic_csv_options; +public: + using char_type = CharT; + using string_type = std::basic_string; +private: + char_type field_delimiter_; + char_type quote_char_; + char_type quote_escape_char_; + char_type subfield_delimiter_; + + bool enable_nan_to_num_:1; + bool enable_inf_to_num_:1; + bool enable_neginf_to_num_:1; + bool enable_nan_to_str_:1; + bool enable_inf_to_str_:1; + bool enable_neginf_to_str_:1; + bool enable_str_to_nan_:1; + bool enable_str_to_inf_:1; + bool enable_str_to_neginf_:1; + + string_type nan_to_num_; + string_type inf_to_num_; + string_type neginf_to_num_; + string_type nan_to_str_; + string_type inf_to_str_; + string_type neginf_to_str_; + string_type column_names_; + +protected: + basic_csv_options_common() + : field_delimiter_(','), + quote_char_('\"'), + quote_escape_char_('\"'), + subfield_delimiter_(char_type()), + enable_nan_to_num_(false), + enable_inf_to_num_(false), + enable_neginf_to_num_(false), + enable_nan_to_str_(false), + enable_inf_to_str_(false), + enable_neginf_to_str_(false), + enable_str_to_nan_(false), + enable_str_to_inf_(false), + enable_str_to_neginf_(false) + { + } + + basic_csv_options_common(const basic_csv_options_common&) = default; + basic_csv_options_common& operator=(const basic_csv_options_common&) = default; + + virtual ~basic_csv_options_common() noexcept = default; +public: + + char_type field_delimiter() const + { + return field_delimiter_; + } + + const char_type subfield_delimiter() const + { + return subfield_delimiter_; + } + + char_type quote_char() const + { + return quote_char_; + } + + char_type quote_escape_char() const + { + return quote_escape_char_; + } + + string_type column_names() const + { + return column_names_; + } + + bool enable_nan_to_num() const + { + return enable_nan_to_num_; + } + + bool enable_inf_to_num() const + { + return enable_inf_to_num_; + } + + bool enable_neginf_to_num() const + { + return enable_neginf_to_num_ || enable_inf_to_num_; + } + + bool enable_nan_to_str() const + { + return enable_nan_to_str_; + } + + bool enable_str_to_nan() const + { + return enable_str_to_nan_; + } + + bool enable_inf_to_str() const + { + return enable_inf_to_str_; + } + + bool enable_str_to_inf() const + { + return enable_str_to_inf_; + } + + bool enable_neginf_to_str() const + { + return enable_neginf_to_str_ || enable_inf_to_str_; + } + + bool enable_str_to_neginf() const + { + return enable_str_to_neginf_ || enable_str_to_inf_; + } + + string_type nan_to_num() const + { + return nan_to_num_; + } + + string_type inf_to_num() const + { + return inf_to_num_; + } + + string_type neginf_to_num() const + { + if (enable_neginf_to_num_) + { + return neginf_to_num_; + } + else if (enable_inf_to_num_) + { + string_type s; + s.push_back('-'); + s.append(inf_to_num_); + return s; + } + else + { + return neginf_to_num_; + } + } + + string_type nan_to_str() const + { + return nan_to_str_; + } + + string_type inf_to_str() const + { + return inf_to_str_; + } + + string_type neginf_to_str() const + { + if (enable_neginf_to_str_) + { + return neginf_to_str_; + } + else if (enable_inf_to_str_) + { + string_type s; + s.push_back('-'); + s.append(inf_to_str_); + return s; + } + else + { + return neginf_to_str_; // empty string + } + } +}; + +template +class basic_csv_decode_options : public virtual basic_csv_options_common +{ + friend class basic_csv_options; + using super_type = basic_csv_options_common; +public: + using typename super_type::char_type; + using typename super_type::string_type; + +private: + bool assume_header_:1; + bool ignore_empty_values_:1; + bool ignore_empty_lines_:1; + bool trim_leading_:1; + bool trim_trailing_:1; + bool trim_leading_inside_quotes_:1; + bool trim_trailing_inside_quotes_:1; + bool unquoted_empty_value_is_null_:1; + bool infer_types_:1; + bool lossless_number_:1; + char_type comment_starter_; + csv_mapping_kind mapping_; + std::size_t header_lines_; + std::size_t max_lines_; + string_type column_types_; + string_type column_defaults_; +public: + basic_csv_decode_options() + : assume_header_(false), + ignore_empty_values_(false), + ignore_empty_lines_(true), + trim_leading_(false), + trim_trailing_(false), + trim_leading_inside_quotes_(false), + trim_trailing_inside_quotes_(false), + unquoted_empty_value_is_null_(false), + infer_types_(true), + lossless_number_(false), + comment_starter_('\0'), + mapping_(), + header_lines_(0), + max_lines_((std::numeric_limits::max)()) + {} + + basic_csv_decode_options(const basic_csv_decode_options& other) = default; + + basic_csv_decode_options(basic_csv_decode_options&& other) + : super_type(std::forward(other)), + assume_header_(other.assume_header_), + ignore_empty_values_(other.ignore_empty_values_), + ignore_empty_lines_(other.ignore_empty_lines_), + trim_leading_(other.trim_leading_), + trim_trailing_(other.trim_trailing_), + trim_leading_inside_quotes_(other.trim_leading_inside_quotes_), + trim_trailing_inside_quotes_(other.trim_trailing_inside_quotes_), + unquoted_empty_value_is_null_(other.unquoted_empty_value_is_null_), + infer_types_(other.infer_types_), + lossless_number_(other.lossless_number_), + comment_starter_(other.comment_starter_), + mapping_(other.mapping_), + header_lines_(other.header_lines_), + max_lines_(other.max_lines_), + column_types_(std::move(other.column_types_)), + column_defaults_(std::move(other.column_defaults_)) + {} + + std::size_t header_lines() const + { + return (assume_header_ && header_lines_ <= 1) ? 1 : header_lines_; + } + + bool assume_header() const + { + return assume_header_; + } + + bool ignore_empty_values() const + { + return ignore_empty_values_; + } + + bool ignore_empty_lines() const + { + return ignore_empty_lines_; + } + + bool trim_leading() const + { + return trim_leading_; + } + + bool trim_trailing() const + { + return trim_trailing_; + } + + bool trim_leading_inside_quotes() const + { + return trim_leading_inside_quotes_; + } + + bool trim_trailing_inside_quotes() const + { + return trim_trailing_inside_quotes_; + } + + bool trim() const + { + return trim_leading_ && trim_trailing_; + } + + bool trim_inside_quotes() const + { + return trim_leading_inside_quotes_ && trim_trailing_inside_quotes_; + } + + bool unquoted_empty_value_is_null() const + { + return unquoted_empty_value_is_null_; + } + + bool infer_types() const + { + return infer_types_; + } + + bool lossless_number() const + { + return lossless_number_; + } + + char_type comment_starter() const + { + return comment_starter_; + } + + csv_mapping_kind mapping_kind() const + { + return mapping_ != csv_mapping_kind() ? mapping_ : (assume_header() || this->column_names().size() > 0 ? csv_mapping_kind::n_objects : csv_mapping_kind::n_rows); + } + +#if !defined(JSONCONS_NO_DEPRECATED) + csv_mapping_kind mapping() const + { + return mapping_kind(); + } +#endif + + std::size_t max_lines() const + { + return max_lines_; + } + + string_type column_types() const + { + return column_types_; + } + + string_type column_defaults() const + { + return column_defaults_; + } +}; + +template +class basic_csv_encode_options : public virtual basic_csv_options_common +{ + friend class basic_csv_options; + using super_type = basic_csv_options_common; +public: + using typename super_type::char_type; + using typename super_type::string_type; +private: + quote_style_kind quote_style_; + float_chars_format float_format_; + int8_t precision_; + string_type line_delimiter_; +public: + basic_csv_encode_options() + : quote_style_(quote_style_kind::minimal), + float_format_(float_chars_format::general), + precision_(0) + { + line_delimiter_.push_back('\n'); + } + + basic_csv_encode_options(const basic_csv_encode_options& other) = default; + + basic_csv_encode_options(basic_csv_encode_options&& other) + : super_type(std::forward(other)), + quote_style_(other.quote_style_), + float_format_(other.float_format_), + precision_(other.precision_), + line_delimiter_(std::move(other.line_delimiter_)) + { + } + + quote_style_kind quote_style() const + { + return quote_style_; + } + + float_chars_format float_format() const + { + return float_format_; + } + + int8_t precision() const + { + return precision_; + } + + string_type line_delimiter() const + { + return line_delimiter_; + } +}; + +template +class basic_csv_options final : public basic_csv_decode_options, public basic_csv_encode_options +{ + using char_type = CharT; + using string_type = std::basic_string; + +public: + using basic_csv_decode_options::enable_str_to_nan; + using basic_csv_decode_options::enable_str_to_inf; + using basic_csv_decode_options::enable_str_to_neginf; + using basic_csv_decode_options::nan_to_str; + using basic_csv_decode_options::inf_to_str; + using basic_csv_decode_options::neginf_to_str; + using basic_csv_decode_options::nan_to_num; + using basic_csv_decode_options::inf_to_num; + using basic_csv_decode_options::neginf_to_num; + using basic_csv_decode_options::field_delimiter; + using basic_csv_decode_options::subfield_delimiter; + using basic_csv_decode_options::quote_char; + using basic_csv_decode_options::quote_escape_char; + using basic_csv_decode_options::column_names; + using basic_csv_decode_options::header_lines; + using basic_csv_decode_options::assume_header; + using basic_csv_decode_options::ignore_empty_values; + using basic_csv_decode_options::ignore_empty_lines; + using basic_csv_decode_options::trim_leading; + using basic_csv_decode_options::trim_trailing; + using basic_csv_decode_options::trim_leading_inside_quotes; + using basic_csv_decode_options::trim_trailing_inside_quotes; + using basic_csv_decode_options::trim; + using basic_csv_decode_options::trim_inside_quotes; + using basic_csv_decode_options::unquoted_empty_value_is_null; + using basic_csv_decode_options::infer_types; + using basic_csv_decode_options::lossless_number; + using basic_csv_decode_options::comment_starter; + using basic_csv_decode_options::mapping; + using basic_csv_decode_options::max_lines; + using basic_csv_decode_options::column_types; + using basic_csv_decode_options::column_defaults; + using basic_csv_encode_options::float_format; + using basic_csv_encode_options::precision; + using basic_csv_encode_options::line_delimiter; + using basic_csv_encode_options::quote_style; + + static constexpr size_t default_indent = 4; + +// Constructors + + basic_csv_options() = default; + basic_csv_options(const basic_csv_options&) = default; + basic_csv_options(basic_csv_options&&) = default; + basic_csv_options& operator=(const basic_csv_options&) = default; + basic_csv_options& operator=(basic_csv_options&&) = default; + + basic_csv_options& float_format(float_chars_format value) + { + this->float_format_ = value; + return *this; + } + + basic_csv_options& precision(int8_t value) + { + this->precision_ = value; + return *this; + } + + basic_csv_options& header_lines(std::size_t value) + { + this->header_lines_ = value; + return *this; + } + + basic_csv_options& assume_header(bool value) + { + this->assume_header_ = value; + return *this; + } + + basic_csv_options& ignore_empty_values(bool value) + { + this->ignore_empty_values_ = value; + return *this; + } + + basic_csv_options& ignore_empty_lines(bool value) + { + this->ignore_empty_lines_ = value; + return *this; + } + + basic_csv_options& trim_leading(bool value) + { + this->trim_leading_ = value; + return *this; + } + + basic_csv_options& trim_trailing(bool value) + { + this->trim_trailing_ = value; + return *this; + } + + basic_csv_options& trim_leading_inside_quotes(bool value) + { + this->trim_leading_inside_quotes_ = value; + return *this; + } + + basic_csv_options& trim_trailing_inside_quotes(bool value) + { + this->trim_trailing_inside_quotes_ = value; + return *this; + } + + basic_csv_options& trim(bool value) + { + this->trim_leading_ = value; + this->trim_trailing_ = value; + return *this; + } + + basic_csv_options& trim_inside_quotes(bool value) + { + this->trim_leading_inside_quotes_ = value; + this->trim_trailing_inside_quotes_ = value; + return *this; + } + + basic_csv_options& unquoted_empty_value_is_null(bool value) + { + this->unquoted_empty_value_is_null_ = value; + return *this; + } + + basic_csv_options& column_names(const string_type& value) + { + this->column_names_ = value; + return *this; + } + + basic_csv_options& column_types(const string_type& value) + { + this->column_types_ = value; + return *this; + } + + basic_csv_options& column_defaults(const string_type& value) + { + this->column_defaults_ = value; + return *this; + } + + basic_csv_options& field_delimiter(char_type value) + { + this->field_delimiter_ = value; + return *this; + } + + basic_csv_options& subfield_delimiter(char_type value) + { + this->subfield_delimiter_ = value; + return *this; + } + + basic_csv_options& line_delimiter(const string_type& value) + { + this->line_delimiter_ = value; + return *this; + } + + basic_csv_options& quote_char(char_type value) + { + this->quote_char_ = value; + return *this; + } + + basic_csv_options& infer_types(bool value) + { + this->infer_types_ = value; + return *this; + } + + basic_csv_options& lossless_number(bool value) + { + this->lossless_number_ = value; + return *this; + } + + basic_csv_options& quote_escape_char(char_type value) + { + this->quote_escape_char_ = value; + return *this; + } + + basic_csv_options& comment_starter(char_type value) + { + this->comment_starter_ = value; + return *this; + } + + basic_csv_options& quote_style(quote_style_kind value) + { + this->quote_style_ = value; + return *this; + } + +//#if !defined(JSONCONS_NO_DEPRECATED) + basic_csv_options& mapping(csv_mapping_kind value) + { + this->mapping_ = value; + return *this; + } +//#endif + + basic_csv_options& mapping_kind(csv_mapping_kind value) + { + this->mapping_ = value; + return *this; + } + + basic_csv_options& max_lines(std::size_t value) + { + this->max_lines_ = value; + return *this; + } + + basic_csv_options& nan_to_num(const string_type& value) + { + this->enable_nan_to_num_ = true; + this->nan_to_str_.clear(); + this->nan_to_num_ = value; + return *this; + } + + basic_csv_options& inf_to_num(const string_type& value) + { + this->enable_inf_to_num_ = true; + this->inf_to_str_.clear(); + this->inf_to_num_ = value; + return *this; + } + + basic_csv_options& neginf_to_num(const string_type& value) + { + this->enable_neginf_to_num_ = true; + this->neginf_to_str_.clear(); + this->neginf_to_num_ = value; + return *this; + } + + basic_csv_options& nan_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_nan_to_str_ = true; + this->enable_str_to_nan_ = enable_inverse; + this->nan_to_num_.clear(); + this->nan_to_str_ = value; + return *this; + } + + basic_csv_options& inf_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_inf_to_str_ = true; + this->enable_inf_to_str_ = enable_inverse; + this->inf_to_num_.clear(); + this->inf_to_str_ = value; + return *this; + } + + basic_csv_options& neginf_to_str(const string_type& value, bool enable_inverse = true) + { + this->enable_neginf_to_str_ = true; + this->enable_neginf_to_str_ = enable_inverse; + this->neginf_to_num_.clear(); + this->neginf_to_str_ = value; + return *this; + } + +#if !defined(JSONCONS_NO_DEPRECATED) + + JSONCONS_DEPRECATED_MSG("Instead, use float_format(float_chars_format)") + basic_csv_options& floating_point_format(float_chars_format value) + { + this->float_format_ = value; + return *this; + } +#endif + +}; + +using csv_options = basic_csv_options; +using wcsv_options = basic_csv_options; + +#if !defined(JSONCONS_NO_DEPRECATED) +JSONCONS_DEPRECATED_MSG("Instead, use csv_options") typedef csv_options csv_parameters; +JSONCONS_DEPRECATED_MSG("Instead, use wcsv_options") typedef wcsv_options wcsv_parameters; +JSONCONS_DEPRECATED_MSG("Instead, use csv_options") typedef csv_options csv_serializing_options; +JSONCONS_DEPRECATED_MSG("Instead, use wcsv_options") typedef wcsv_options wcsv_serializing_options; +#endif + + +}} +#endif -- cgit v1.2.3