aboutsummaryrefslogtreecommitdiff
path: root/include/jsoncons_ext/jsonschema/format_validator.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'include/jsoncons_ext/jsonschema/format_validator.hpp')
-rw-r--r--include/jsoncons_ext/jsonschema/format_validator.hpp968
1 files changed, 968 insertions, 0 deletions
diff --git a/include/jsoncons_ext/jsonschema/format_validator.hpp b/include/jsoncons_ext/jsonschema/format_validator.hpp
new file mode 100644
index 0000000..312bf41
--- /dev/null
+++ b/include/jsoncons_ext/jsonschema/format_validator.hpp
@@ -0,0 +1,968 @@
+// Copyright 2020 Daniel Parker
+// Distributed under the Boost license, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+// See https://github.com/danielaparker/jsoncons for latest version
+
+#ifndef JSONCONS_JSONSCHEMA_FORMAT_VALIDATOR_HPP
+#define JSONCONS_JSONSCHEMA_FORMAT_VALIDATOR_HPP
+
+#include <jsoncons/config/jsoncons_config.hpp>
+#include <jsoncons/uri.hpp>
+#include <jsoncons/json.hpp>
+#include <jsoncons_ext/jsonpointer/jsonpointer.hpp>
+#include <jsoncons_ext/jsonschema/subschema.hpp>
+#include <cassert>
+#include <set>
+#include <sstream>
+#include <iostream>
+#include <cassert>
+#if defined(JSONCONS_HAS_STD_REGEX)
+#include <regex>
+#endif
+
+namespace jsoncons {
+namespace jsonschema {
+
+ inline
+ bool is_atext( char c)
+ {
+ switch (c)
+ {
+ case '!':
+ case '#':
+ case '$':
+ case '%':
+ case '&':
+ case '\'':
+ case '*':
+ case '+':
+ case '-':
+ case '/':
+ case '=':
+ case '?':
+ case '^':
+ case '_':
+ case '`':
+ case '{':
+ case '|':
+ case '}':
+ case '~':
+ return true;
+ default:
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+ }
+ }
+
+ inline
+ bool is_dtext( char c)
+ {
+ return (c >= 33 && c <= 90) || (c >= 94 && c <= 126);
+ }
+
+ // RFC 5322, section 3.4.1
+ inline
+ bool validate_email_rfc5322(const std::string& s)
+ {
+ enum class state_t {local_part,atom,dot_atom,quoted_string,amp,domain};
+
+ state_t state = state_t::local_part;
+ std::size_t part_length = 0;
+
+ for (char c : s)
+ {
+ switch (state)
+ {
+ case state_t::local_part:
+ {
+ if (is_atext(c))
+ {
+ state = state_t::atom;
+ }
+ else if (c == '"')
+ {
+ state = state_t::quoted_string;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::dot_atom:
+ {
+ if (is_atext(c))
+ {
+ ++part_length;
+ state = state_t::atom;
+ }
+ else
+ return false;
+ break;
+ }
+ case state_t::atom:
+ {
+ switch (c)
+ {
+ case '@':
+ state = state_t::domain;
+ part_length = 0;
+ break;
+ case '.':
+ state = state_t::dot_atom;
+ ++part_length;
+ break;
+ default:
+ if (is_atext(c))
+ ++part_length;
+ else
+ return false;
+ break;
+ }
+ break;
+ }
+ case state_t::quoted_string:
+ {
+ if (c == '\"')
+ {
+ state = state_t::amp;
+ }
+ else
+ {
+ ++part_length;
+ }
+ break;
+ }
+ case state_t::amp:
+ {
+ if (c == '@')
+ {
+ state = state_t::domain;
+ part_length = 0;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::domain:
+ {
+ if (is_dtext(c))
+ {
+ ++part_length;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ }
+ }
+
+ return state == state_t::domain && part_length > 0;
+ }
+
+ // RFC 2673, Section 3.2
+
+ inline
+ bool validate_ipv6_rfc2373(const std::string& s)
+ {
+ enum class state_t{start,expect_hexdig_or_unspecified,
+ hexdig, decdig,expect_unspecified, unspecified};
+
+ state_t state = state_t::start;
+
+ std::size_t digit_count = 0;
+ std::size_t piece_count = 0;
+ std::size_t piece_count2 = 0;
+ bool has_unspecified = false;
+ std::size_t dec_value = 0;
+
+ for (std::size_t i = 0; i < s.length(); ++i)
+ {
+ char c = s[i];
+ switch (state)
+ {
+ case state_t::start:
+ {
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':
+ case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':
+ case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':
+ state = state_t::hexdig;
+ ++digit_count;
+ piece_count = 0;
+ break;
+ case ':':
+ if (!has_unspecified)
+ {
+ state = state_t::expect_unspecified;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ case state_t::expect_hexdig_or_unspecified:
+ {
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':
+ dec_value = dec_value*10 + static_cast<std::size_t>(c - '0'); // just in case this piece is followed by a dot
+ state = state_t::hexdig;
+ ++digit_count;
+ break;
+ case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':
+ case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':
+ state = state_t::hexdig;
+ ++digit_count;
+ break;
+ case ':':
+ if (!has_unspecified)
+ {
+ has_unspecified = true;
+ state = state_t::unspecified;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ case state_t::expect_unspecified:
+ {
+ if (c == ':')
+ {
+ has_unspecified = true;
+ state = state_t::unspecified;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::hexdig:
+ {
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':
+ case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':
+ case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':
+ ++digit_count;
+ break;
+ case ':':
+ if (digit_count <= 4)
+ {
+ ++piece_count;
+ digit_count = 0;
+ dec_value = 0;
+ state = state_t::expect_hexdig_or_unspecified;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ case '.':
+ if (piece_count == 6 || has_unspecified)
+ {
+ ++piece_count2;
+ state = state_t::decdig;
+ dec_value = 0;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ case state_t::decdig:
+ {
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':
+ dec_value = dec_value*10 + static_cast<std::size_t>(c - '0');
+ ++digit_count;
+ break;
+ case '.':
+ if (dec_value > 0xff)
+ {
+ return false;
+ }
+ digit_count = 0;
+ dec_value = 0;
+ ++piece_count2;
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ case state_t::unspecified:
+ {
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':
+ case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':
+ case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':
+ state = state_t::hexdig;
+ ++digit_count;
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ default:
+ return false;
+ }
+ }
+
+ switch (state)
+ {
+ case state_t::unspecified:
+ return piece_count <= 8;
+ case state_t::hexdig:
+ if (digit_count <= 4)
+ {
+ ++piece_count;
+ return digit_count > 0 && (piece_count == 8 || (has_unspecified && piece_count <= 8));
+ }
+ else
+ {
+ return false;
+ }
+ case state_t::decdig:
+ ++piece_count2;
+ if (dec_value > 0xff)
+ {
+ return false;
+ }
+ return digit_count > 0 && piece_count2 == 4;
+ default:
+ return false;
+ }
+ }
+
+ // RFC 2673, Section 3.2
+
+ inline
+ bool validate_ipv4_rfc2673(const std::string& s)
+ {
+ enum class state_t {expect_indicator_or_dotted_quad,decbyte,
+ bindig, octdig, hexdig};
+
+ state_t state = state_t::expect_indicator_or_dotted_quad;
+
+ std::size_t digit_count = 0;
+ std::size_t decbyte_count = 0;
+ std::size_t value = 0;
+
+ for (std::size_t i = 0; i < s.length(); ++i)
+ {
+ char c = s[i];
+ switch (state)
+ {
+ case state_t::expect_indicator_or_dotted_quad:
+ {
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':
+ state = state_t::decbyte;
+ decbyte_count = 0;
+ digit_count = 1;
+ value = 0;
+ break;
+ case 'b':
+ state = state_t::bindig;
+ digit_count = 0;
+ break;
+ case 'o':
+ state = state_t::octdig;
+ digit_count = 0;
+ break;
+ case 'x':
+ state = state_t::hexdig;
+ digit_count = 0;
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ case state_t::bindig:
+ {
+ if (digit_count >= 256)
+ {
+ return false;
+ }
+ switch (c)
+ {
+ case '0':case '1':
+ ++digit_count;
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ case state_t::octdig:
+ {
+ if (digit_count >= 86)
+ {
+ return false;
+ }
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':
+ ++digit_count;
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ case state_t::hexdig:
+ {
+ if (digit_count >= 64)
+ {
+ return false;
+ }
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':
+ case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':
+ case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':
+ ++digit_count;
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ case state_t::decbyte:
+ {
+ if (decbyte_count >= 4)
+ {
+ return false;
+ }
+ switch (c)
+ {
+ case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8': case '9':
+ {
+ if (digit_count >= 3)
+ {
+ return false;
+ }
+ ++digit_count;
+ value = value*10 + static_cast<std::size_t>(c - '0');
+ if (value > 255)
+ {
+ return false;
+ }
+ break;
+ }
+ case '.':
+ if (decbyte_count > 3)
+ {
+ return false;
+ }
+ ++decbyte_count;
+ digit_count = 0;
+ value = 0;
+ break;
+ default:
+ return false;
+ }
+ break;
+ }
+ default:
+ return false;
+ }
+ }
+
+ switch (state)
+ {
+ case state_t::decbyte:
+ if (digit_count > 0)
+ {
+ ++decbyte_count;
+ }
+ else
+ {
+ return false;
+ }
+ return (decbyte_count == 4) ? true : false;
+ case state_t::bindig:
+ return digit_count > 0 ? true : false;
+ case state_t::octdig:
+ return digit_count > 0 ? true : false;
+ case state_t::hexdig:
+ return digit_count > 0 ? true : false;
+ default:
+ return false;
+ }
+ }
+
+ // RFC 1034, Section 3.1
+ inline
+ bool validate_hostname_rfc1034(const std::string& hostname)
+ {
+ enum class state_t {start_label,expect_letter_or_digit_or_hyphen_or_dot};
+
+ state_t state = state_t::start_label;
+ std::size_t length = hostname.length() - 1;
+ std::size_t label_length = 0;
+
+ for (std::size_t i = 0; i < length; ++i)
+ {
+ char c = hostname[i];
+ switch (state)
+ {
+ case state_t::start_label:
+ {
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ {
+ ++label_length;
+ state = state_t::expect_letter_or_digit_or_hyphen_or_dot;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::expect_letter_or_digit_or_hyphen_or_dot:
+ {
+ if (c == '.')
+ {
+ label_length = 0;
+ state = state_t::start_label;
+ }
+ else if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c < '9') || c == '-'))
+ {
+ return false;
+ }
+ if (++label_length > 63)
+ {
+ return false;
+ }
+ break;
+ }
+ }
+ }
+
+ char last = hostname.back();
+ if (!((last >= 'a' && last <= 'z') || (last >= 'A' && last <= 'Z') || (last >= '0' && last < '9')))
+ {
+ return false;
+ }
+ return true;
+ }
+
+ inline
+ bool is_leap_year(std::size_t year)
+ {
+ return (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0));
+ }
+
+ inline
+ std::size_t days_in_month(std::size_t year, std::size_t month)
+ {
+ switch (month)
+ {
+ case 1: return 31;
+ case 2: return is_leap_year(year) ? 29 : 28;
+ case 3: return 31;
+ case 4: return 30;
+ case 5: return 31;
+ case 6: return 30;
+ case 7: return 31;
+ case 8: return 31;
+ case 9: return 30;
+ case 10: return 31;
+ case 11: return 30;
+ case 12: return 31;
+ default:
+ JSONCONS_UNREACHABLE();
+ break;
+ }
+ }
+
+ enum class date_time_type {date_time,date,time};
+ // RFC 3339, Section 5.6
+ inline
+ bool validate_date_time_rfc3339(const std::string& s, date_time_type type)
+ {
+ enum class state_t {fullyear,month,mday,hour,minute,second,secfrac,z,offset_hour,offset_minute};
+
+ std::size_t piece_length = 0;
+ std::size_t year = 0;
+ std::size_t month = 0;
+ std::size_t mday = 0;
+ std::size_t value = 0;
+ state_t state = (type == date_time_type::time) ? state_t::hour : state_t::fullyear;
+
+ for (char c : s)
+ {
+ switch (state)
+ {
+ case state_t::fullyear:
+ {
+ if (piece_length < 4 && (c >= '0' && c <= '9'))
+ {
+ piece_length++;
+ year = year*10 + static_cast<std::size_t>(c - '0');
+ }
+ else if (c == '-' && piece_length == 4)
+ {
+ state = state_t::month;
+ piece_length = 0;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::month:
+ {
+ if (piece_length < 2 && (c >= '0' && c <= '9'))
+ {
+ piece_length++;
+ month = month*10 + static_cast<std::size_t>(c - '0');
+ }
+ else if (c == '-' && piece_length == 2 && (month >=1 && month <= 12))
+ {
+ state = state_t::mday;
+ piece_length = 0;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::mday:
+ {
+ if (piece_length < 2 && (c >= '0' && c <= '9'))
+ {
+ piece_length++;
+ mday = mday *10 + static_cast<std::size_t>(c - '0');
+ }
+ else if ((c == 'T' || c == 't') && piece_length == 2 && (mday <= days_in_month(year, month)))
+ {
+ piece_length = 0;
+ state = state_t::hour;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::hour:
+ {
+ if (piece_length < 2 && (c >= '0' && c <= '9'))
+ {
+ piece_length++;
+ value = value*10 + static_cast<std::size_t>(c - '0');
+ }
+ else if (c == ':' && piece_length == 2 && (/*value >=0 && */ value <= 23))
+ {
+ state = state_t::minute;
+ value = 0;
+ piece_length = 0;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::minute:
+ {
+ if (piece_length < 2 && (c >= '0' && c <= '9'))
+ {
+ piece_length++;
+ value = value*10 + static_cast<std::size_t>(c - '0');
+ }
+ else if (c == ':' && piece_length == 2 && (/*value >=0 && */value <= 59))
+ {
+ state = state_t::second;
+ value = 0;
+ piece_length = 0;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::second:
+ {
+ if (piece_length < 2 && (c >= '0' && c <= '9'))
+ {
+ piece_length++;
+ value = value*10 + static_cast<std::size_t>(c - '0');
+ }
+ else if (piece_length == 2 && (/*value >=0 && */value <= 60)) // 00-58, 00-59, 00-60 based on leap second rules
+ {
+ switch (c)
+ {
+ case '.':
+ value = 0;
+ state = state_t::secfrac;
+ break;
+ case '+':
+ case '-':
+ value = 0;
+ piece_length = 0;
+ state = state_t::offset_hour;
+ break;
+ case 'Z':
+ case 'z':
+ state = state_t::z;
+ break;
+ default:
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::secfrac:
+ {
+ if (c >= '0' && c <= '9')
+ {
+ value = value*10 + static_cast<std::size_t>(c - '0');
+ }
+ else
+ {
+ switch (c)
+ {
+ case '+':
+ case '-':
+ value = 0;
+ piece_length = 0;
+ state = state_t::offset_hour;
+ break;
+ case 'Z':
+ case 'z':
+ state = state_t::z;
+ break;
+ default:
+ return false;
+ }
+ }
+ break;
+ }
+ case state_t::offset_hour:
+ {
+ if (piece_length < 2 && (c >= '0' && c <= '9'))
+ {
+ piece_length++;
+ value = value*10 + static_cast<std::size_t>(c - '0');
+ }
+ else if (c == ':' && piece_length == 2 && (/*value >=0 && */value <= 23))
+ {
+ value = 0;
+ piece_length = 0;
+ state = state_t::offset_minute;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::offset_minute:
+ {
+ if (piece_length < 2 && (c >= '0' && c <= '9'))
+ {
+ piece_length++;
+ value = value*10 + static_cast<std::size_t>(c - '0');
+ }
+ else if (c == ':' && piece_length == 2 && (/*value >=0 && */value <= 59))
+ {
+ value = 0;
+ piece_length = 0;
+ }
+ else
+ {
+ return false;
+ }
+ break;
+ }
+ case state_t::z:
+ return false;
+ }
+ }
+
+ if (type == date_time_type::date)
+ {
+ return state == state_t::mday && piece_length == 2 && (mday >= 1 && mday <= days_in_month(year, month));
+ }
+ else
+ {
+ return state == state_t::offset_minute || state == state_t::z || state == state_t::secfrac;
+ }
+ }
+
+ // format checkers
+ using format_checker = std::function<void(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string&,
+ error_reporter& reporter)>;
+
+ inline
+ void rfc3339_date_check(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string& value,
+ error_reporter& reporter)
+ {
+ if (!validate_date_time_rfc3339(value,date_time_type::date))
+ {
+ reporter.error(validation_output("date",
+ absolute_keyword_location,
+ instance_location.to_uri_fragment(),
+ "\"" + value + "\" is not a RFC 3339 date string"));
+ }
+ }
+
+ inline
+ void rfc3339_time_check(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string &value,
+ error_reporter& reporter)
+ {
+ if (!validate_date_time_rfc3339(value, date_time_type::time))
+ {
+ reporter.error(validation_output("time",
+ absolute_keyword_location,
+ instance_location.to_uri_fragment(),
+ "\"" + value + "\" is not a RFC 3339 time string"));
+ }
+ }
+
+ inline
+ void rfc3339_date_time_check(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string &value,
+ error_reporter& reporter)
+ {
+ if (!validate_date_time_rfc3339(value, date_time_type::date_time))
+ {
+ reporter.error(validation_output("date-time",
+ absolute_keyword_location,
+ instance_location.to_uri_fragment(),
+ "\"" + value + "\" is not a RFC 3339 date-time string"));
+ }
+ }
+
+ inline
+ void email_check(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string& value,
+ error_reporter& reporter)
+ {
+ if (!validate_email_rfc5322(value))
+ {
+ reporter.error(validation_output("email",
+ absolute_keyword_location,
+ instance_location.to_uri_fragment(),
+ "\"" + value + "\" is not a valid email address as defined by RFC 5322"));
+ }
+ }
+
+ inline
+ void hostname_check(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string& value,
+ error_reporter& reporter)
+ {
+ if (!validate_hostname_rfc1034(value))
+ {
+ reporter.error(validation_output("hostname",
+ absolute_keyword_location,
+ instance_location.to_uri_fragment(),
+ "\"" + value + "\" is not a valid hostname as defined by RFC 3986 Appendix A"));
+ }
+ }
+
+ inline
+ void ipv4_check(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string& value,
+ error_reporter& reporter)
+ {
+ if (!validate_ipv4_rfc2673(value))
+ {
+ reporter.error(validation_output("ipv4",
+ absolute_keyword_location,
+ instance_location.to_uri_fragment(),
+ "\"" + value + "\" is not a valid IPv4 address as defined by RFC 2673"));
+ }
+ }
+
+ inline
+ void ipv6_check(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string& value,
+ error_reporter& reporter)
+ {
+ if (!validate_ipv6_rfc2373(value))
+ {
+ reporter.error(validation_output("ipv6",
+ absolute_keyword_location,
+ instance_location.to_uri_fragment(),
+ "\"" + value + "\" is not a valid IPv6 address as defined by RFC 2373"));
+ }
+ }
+
+ inline
+ void regex_check(const std::string& absolute_keyword_location,
+ const jsonpointer::json_pointer& instance_location,
+ const std::string& value,
+ error_reporter& reporter)
+ {
+#if defined(JSONCONS_HAS_STD_REGEX)
+ try
+ {
+ std::regex re(value, std::regex::ECMAScript);
+ }
+ catch (const std::exception& e)
+ {
+ reporter.error(validation_output("pattern",
+ absolute_keyword_location,
+ instance_location.to_uri_fragment(),
+ "\"" + value + "\" is not a valid ECMAScript regular expression. " + e.what()));
+ }
+#endif
+ }
+
+} // namespace jsonschema
+} // namespace jsoncons
+
+#endif // JSONCONS_JSONSCHEMA_FORMAT_CHECKERS_HPP