diff --git a/DEPENDENCIES b/DEPENDENCIES index 529ffd5..4122375 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,4 +1,4 @@ vendorpull https://github.com/sourcemeta/vendorpull dea311b5bfb53b6926a4140267959ae334d3ecf4 noa https://github.com/sourcemeta/noa 7e26abce7a4e31e86a16ef2851702a56773ca527 -jsontoolkit https://github.com/sourcemeta/jsontoolkit a3765c8038ba4271e55318a677f6366bdaa7b805 +jsontoolkit https://github.com/sourcemeta/jsontoolkit e4e585af3d7451f0d97008e09057c84154e2d372 hydra https://github.com/sourcemeta/hydra 3c53d3fdef79e9ba603d48470a508cc45472a0dc diff --git a/src/command_identify.cc b/src/command_identify.cc index 507b08b..1d827aa 100644 --- a/src/command_identify.cc +++ b/src/command_identify.cc @@ -52,7 +52,7 @@ auto intelligence::jsonschema::cli::identify( std::optional identifier; try { - identifier = sourcemeta::jsontoolkit::id( + identifier = sourcemeta::jsontoolkit::identify( schema, sourcemeta::jsontoolkit::official_resolver, sourcemeta::jsontoolkit::IdentificationStrategy::Loose) .get(); diff --git a/vendor/jsontoolkit/src/jsonschema/bundle.cc b/vendor/jsontoolkit/src/jsonschema/bundle.cc index 6aa16e6..91b1a8b 100644 --- a/vendor/jsontoolkit/src/jsonschema/bundle.cc +++ b/vendor/jsontoolkit/src/jsonschema/bundle.cc @@ -28,61 +28,6 @@ auto definitions_keyword(const std::map &vocabularies) "Cannot determine how to bundle on this dialect"); } -auto id_keyword(const std::map &vocabularies) - -> std::string { - if (vocabularies.contains( - "https://json-schema.org/draft/2020-12/vocab/core") || - vocabularies.contains( - "https://json-schema.org/draft/2019-09/vocab/core") || - vocabularies.contains("http://json-schema.org/draft-07/schema#") || - vocabularies.contains("http://json-schema.org/draft-06/schema#")) { - return "$id"; - } else if (vocabularies.contains("http://json-schema.org/draft-04/schema#") || - vocabularies.contains("http://json-schema.org/draft-03/schema#") || - vocabularies.contains("http://json-schema.org/draft-02/schema#") || - vocabularies.contains("http://json-schema.org/draft-01/schema#") || - vocabularies.contains("http://json-schema.org/draft-00/schema#")) { - return "id"; - } else { - throw sourcemeta::jsontoolkit::SchemaError( - "Cannot determine how to bundle on this dialect"); - } -} - -// TODO: Turn it into an official function to set a schema identifier -auto upsert_id(sourcemeta::jsontoolkit::JSON &target, - const std::string &identifier, - const sourcemeta::jsontoolkit::SchemaResolver &resolver, - const std::optional &default_dialect) -> void { - if (!sourcemeta::jsontoolkit::is_schema(target)) { - throw sourcemeta::jsontoolkit::SchemaResolutionError( - identifier, "The JSON document is not a valid JSON Schema"); - } - - const auto dialect{sourcemeta::jsontoolkit::dialect(target, default_dialect)}; - if (!dialect.has_value()) { - throw sourcemeta::jsontoolkit::SchemaResolutionError( - identifier, "The JSON document is not a valid JSON Schema"); - } - - const auto base_dialect{ - sourcemeta::jsontoolkit::base_dialect(target, resolver, dialect).get()}; - const auto vocabularies{sourcemeta::jsontoolkit::vocabularies( - resolver, base_dialect.value(), dialect.value()) - .get()}; - assert(!vocabularies.empty()); - - // Always insert an identifier, as a schema might refer to another schema - // using another URI (i.e. due to relying on HTTP re-directions, etc) - - if (target.is_object()) { - target.assign(id_keyword(vocabularies), - sourcemeta::jsontoolkit::JSON{identifier}); - } - - assert(sourcemeta::jsontoolkit::id(target, base_dialect.value()).has_value()); -} - auto embed_schema(sourcemeta::jsontoolkit::JSON &definitions, const std::string &identifier, const sourcemeta::jsontoolkit::JSON &target) -> void { @@ -159,7 +104,24 @@ auto bundle_schema(sourcemeta::jsontoolkit::JSON &root, // Otherwise, if the target schema does not declare an inline identifier, // references to that identifier from the outer schema won't resolve. sourcemeta::jsontoolkit::JSON copy{remote.value()}; - upsert_id(copy, identifier, resolver, default_dialect); + + if (!sourcemeta::jsontoolkit::is_schema(copy)) { + throw sourcemeta::jsontoolkit::SchemaResolutionError( + identifier, "The JSON document is not a valid JSON Schema"); + } + + const auto dialect{sourcemeta::jsontoolkit::dialect(copy, default_dialect)}; + if (!dialect.has_value()) { + throw sourcemeta::jsontoolkit::SchemaResolutionError( + identifier, "The JSON document is not a valid JSON Schema"); + } + + if (copy.is_object()) { + // Always insert an identifier, as a schema might refer to another schema + // using another URI (i.e. due to relying on HTTP re-directions, etc) + sourcemeta::jsontoolkit::reidentify(copy, identifier, resolver, + default_dialect); + } embed_schema(root.at(container), identifier, copy); bundle_schema(root, container, copy, frame, walker, resolver, @@ -187,7 +149,8 @@ auto remove_identifiers(sourcemeta::jsontoolkit::JSON &schema, continue; } - subschema.erase(id_keyword(entry.vocabularies)); + assert(entry.base_dialect.has_value()); + sourcemeta::jsontoolkit::anonymize(subschema, entry.base_dialect.value()); if (entry.vocabularies.contains( "https://json-schema.org/draft/2020-12/vocab/core")) { diff --git a/vendor/jsontoolkit/src/jsonschema/compile.cc b/vendor/jsontoolkit/src/jsonschema/compile.cc index d2c11ee..a9a633c 100644 --- a/vendor/jsontoolkit/src/jsonschema/compile.cc +++ b/vendor/jsontoolkit/src/jsonschema/compile.cc @@ -80,7 +80,7 @@ auto compile(const JSON &schema, const SchemaWalker &walker, .wait(); const std::string base{ - URI{sourcemeta::jsontoolkit::id( + URI{sourcemeta::jsontoolkit::identify( schema, resolver, sourcemeta::jsontoolkit::IdentificationStrategy::Strict, default_dialect) diff --git a/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema.h b/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema.h index f0ccace..bd29849 100644 --- a/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema.h +++ b/vendor/jsontoolkit/src/jsonschema/include/sourcemeta/jsontoolkit/jsonschema.h @@ -95,7 +95,7 @@ enum class IdentificationStrategy { /// "$id": "https://sourcemeta.com/example-schema" /// })JSON"); /// -/// std::optional id{sourcemeta::jsontoolkit::id( +/// std::optional id{sourcemeta::jsontoolkit::identify( /// document, sourcemeta::jsontoolkit::official_resolver).get()}; /// assert(id.has_value()); /// assert(id.value() == "https://sourcemeta.com/example-schema"); @@ -105,21 +105,89 @@ enum class IdentificationStrategy { /// guessing game. Often useful if you have a schema without a dialect and you /// want to at least try to get something. SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT -auto id(const JSON &schema, const SchemaResolver &resolver, - const IdentificationStrategy strategy = IdentificationStrategy::Strict, - const std::optional &default_dialect = std::nullopt, - const std::optional &default_id = std::nullopt) +auto identify( + const JSON &schema, const SchemaResolver &resolver, + const IdentificationStrategy strategy = IdentificationStrategy::Strict, + const std::optional &default_dialect = std::nullopt, + const std::optional &default_id = std::nullopt) -> std::future>; /// @ingroup jsonschema /// -/// A shortcut to sourcemeta::jsontoolkit::id if you know the base dialect of -/// the schema. +/// A shortcut to sourcemeta::jsontoolkit::identify if you know the base dialect +/// of the schema. SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT -auto id(const JSON &schema, const std::string &base_dialect, - const std::optional &default_id = std::nullopt) +auto identify(const JSON &schema, const std::string &base_dialect, + const std::optional &default_id = std::nullopt) -> std::optional; +/// @ingroup jsonschema +/// +/// This function removes the top-level URI identifier of the given schema, if +/// any, given you know its base dialect. It is the caller responsibility to +/// ensure the schema doesn't perform relative references that might have +/// depended on such top-level identifier. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// sourcemeta::jsontoolkit::JSON document = +/// sourcemeta::jsontoolkit::parse(R"JSON({ +/// "$schema": "https://json-schema.org/draft/2020-12/schema", +/// "$id": "https://sourcemeta.com/example-schema" +/// })JSON"); +/// +/// sourcemeta::jsontoolkit::anonymize(document, +/// "https://json-schema.org/draft/2020-12/schema"); +/// +/// std::optional id{sourcemeta::jsontoolkit::identify( +/// document, sourcemeta::jsontoolkit::official_resolver).get()}; +/// assert(!id.has_value()); +/// ``` +SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT +auto anonymize(JSON &schema, const std::string &base_dialect) -> void; + +/// @ingroup jsonschema +/// +/// This function sets the identifier of a schema, replacing the existing one, +/// if any. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// sourcemeta::jsontoolkit::JSON document = +/// sourcemeta::jsontoolkit::parse(R"JSON({ +/// "$schema": "https://json-schema.org/draft/2020-12/schema", +/// "$id": "https://sourcemeta.com/example-schema" +/// })JSON"); +/// +/// sourcemeta::jsontoolkit::reidentify(document, +/// "https://example.com/my-new-id", +/// sourcemeta::jsontoolkit::official_resolver); +/// +/// std::optional id{sourcemeta::jsontoolkit::identify( +/// document, sourcemeta::jsontoolkit::official_resolver).get()}; +/// assert(id.has_value()); +/// assert(id.value() == "https://example.com/my-new-id"); +/// ``` +SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT +auto reidentify( + JSON &schema, const std::string &new_identifier, + const SchemaResolver &resolver, + const std::optional &default_dialect = std::nullopt) -> void; + +/// @ingroup jsonschema +/// +/// A shortcut to sourcemeta::jsontoolkit::reidentify if you know the base +/// dialect of the schema. +SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT +auto reidentify(JSON &schema, const std::string &new_identifier, + const std::string &base_dialect) -> void; + /// @ingroup jsonschema /// /// Get the dialect URI that corresponds to a JSON Schema instance. diff --git a/vendor/jsontoolkit/src/jsonschema/jsonschema.cc b/vendor/jsontoolkit/src/jsonschema/jsonschema.cc index 9b5bb8d..ac80581 100644 --- a/vendor/jsontoolkit/src/jsonschema/jsonschema.cc +++ b/vendor/jsontoolkit/src/jsonschema/jsonschema.cc @@ -15,22 +15,49 @@ auto sourcemeta::jsontoolkit::is_schema( return schema.is_object() || schema.is_boolean(); } -static auto guess_identifier(const sourcemeta::jsontoolkit::JSON &schema) +static auto id_keyword_guess(const sourcemeta::jsontoolkit::JSON &schema) -> std::optional { if (schema.defines("$id") && schema.at("$id").is_string()) { if (!schema.defines("id") || (schema.defines("id") && (!schema.at("id").is_string() || schema.at("$id") == schema.at("id")))) { - return schema.at("$id").to_string(); + return "$id"; } } else if (schema.defines("id") && schema.at("id").is_string()) { - return schema.at("id").to_string(); + return "id"; } return std::nullopt; } -auto sourcemeta::jsontoolkit::id( +static auto id_keyword(const std::string &base_dialect) -> std::string { + if (base_dialect == "https://json-schema.org/draft/2020-12/schema" || + base_dialect == "https://json-schema.org/draft/2020-12/hyper-schema" || + base_dialect == "https://json-schema.org/draft/2019-09/schema" || + base_dialect == "https://json-schema.org/draft/2019-09/hyper-schema" || + base_dialect == "http://json-schema.org/draft-07/schema#" || + base_dialect == "http://json-schema.org/draft-07/hyper-schema#" || + base_dialect == "http://json-schema.org/draft-06/schema#" || + base_dialect == "http://json-schema.org/draft-06/hyper-schema#") { + return "$id"; + } + + if (base_dialect == "http://json-schema.org/draft-04/schema#" || + base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || + base_dialect == "http://json-schema.org/draft-03/schema#" || + base_dialect == "http://json-schema.org/draft-03/hyper-schema#" || + base_dialect == "http://json-schema.org/draft-02/hyper-schema#" || + base_dialect == "http://json-schema.org/draft-01/hyper-schema#" || + base_dialect == "http://json-schema.org/draft-00/hyper-schema#") { + return "id"; + } + + std::ostringstream error; + error << "Unrecognized base dialect: " << base_dialect; + throw sourcemeta::jsontoolkit::SchemaError(error.str()); +} + +auto sourcemeta::jsontoolkit::identify( const sourcemeta::jsontoolkit::JSON &schema, const SchemaResolver &resolver, const IdentificationStrategy strategy, const std::optional &default_dialect, @@ -47,8 +74,15 @@ auto sourcemeta::jsontoolkit::id( } catch (const SchemaResolutionError &) { // Attempt to play a heuristic guessing game before giving up if (strategy == IdentificationStrategy::Loose && schema.is_object()) { + const auto keyword{id_keyword_guess(schema)}; std::promise> promise; - promise.set_value(guess_identifier(schema)); + + if (keyword.has_value()) { + promise.set_value(schema.at(keyword.value()).to_string()); + } else { + promise.set_value(std::nullopt); + } + return promise.get_future(); } @@ -59,8 +93,15 @@ auto sourcemeta::jsontoolkit::id( // Attempt to play a heuristic guessing game before giving up if (strategy == IdentificationStrategy::Loose && schema.is_object()) { + const auto keyword{id_keyword_guess(schema)}; std::promise> promise; - promise.set_value(guess_identifier(schema)); + + if (keyword.has_value()) { + promise.set_value(schema.at(keyword.value()).to_string()); + } else { + promise.set_value(std::nullopt); + } + return promise.get_future(); } @@ -70,46 +111,61 @@ auto sourcemeta::jsontoolkit::id( } std::promise> promise; - promise.set_value(id(schema, maybe_base_dialect.value(), default_id)); + promise.set_value(identify(schema, maybe_base_dialect.value(), default_id)); return promise.get_future(); } -SOURCEMETA_JSONTOOLKIT_JSONSCHEMA_EXPORT -auto sourcemeta::jsontoolkit::id(const JSON &schema, - const std::string &base_dialect, - const std::optional &default_id) +auto sourcemeta::jsontoolkit::identify( + const JSON &schema, const std::string &base_dialect, + const std::optional &default_id) -> std::optional { - if (base_dialect == "http://json-schema.org/draft-00/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-01/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-02/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-03/schema#" || - base_dialect == "http://json-schema.org/draft-04/hyper-schema#" || - base_dialect == "http://json-schema.org/draft-04/schema#") { - if (schema.is_object() && schema.defines("id")) { - const sourcemeta::jsontoolkit::JSON &id{schema.at("id")}; - if (!id.is_string() || id.empty()) { - throw sourcemeta::jsontoolkit::SchemaError( - "The value of the id property is not valid"); - } + if (!schema.is_object()) { + return default_id; + } - return id.to_string(); - } else { - return default_id; - } + const auto keyword{id_keyword(base_dialect)}; + if (!schema.defines(keyword)) { + return default_id; } - if (schema.is_object() && schema.defines("$id")) { - const sourcemeta::jsontoolkit::JSON &id{schema.at("$id")}; - if (!id.is_string() || id.empty()) { - throw sourcemeta::jsontoolkit::SchemaError( - "The value of the $id property is not valid"); - } + const auto &identifier{schema.at(keyword)}; + if (!identifier.is_string() || identifier.empty()) { + std::ostringstream error; + error << "The value of the " << keyword << " property is not valid"; + throw sourcemeta::jsontoolkit::SchemaError(error.str()); + } + + return identifier.to_string(); +} - return id.to_string(); +auto sourcemeta::jsontoolkit::anonymize( + JSON &schema, const std::string &base_dialect) -> void { + if (schema.is_object()) { + schema.erase(id_keyword(base_dialect)); } +} + +auto sourcemeta::jsontoolkit::reidentify( + JSON &schema, const std::string &new_identifier, + const SchemaResolver &resolver, + const std::optional &default_dialect) -> void { + const auto base_dialect{ + sourcemeta::jsontoolkit::base_dialect(schema, resolver, default_dialect) + .get()}; + if (!base_dialect.has_value()) { + throw sourcemeta::jsontoolkit::SchemaError("Cannot determine base dialect"); + } + + reidentify(schema, new_identifier, base_dialect.value()); +} - return default_id; +auto sourcemeta::jsontoolkit::reidentify( + JSON &schema, const std::string &new_identifier, + const std::string &base_dialect) -> void { + assert(is_schema(schema)); + assert(schema.is_object()); + schema.assign(id_keyword(base_dialect), JSON{new_identifier}); + assert(identify(schema, base_dialect).has_value()); } auto sourcemeta::jsontoolkit::dialect( diff --git a/vendor/jsontoolkit/src/jsonschema/reference.cc b/vendor/jsontoolkit/src/jsonschema/reference.cc index 16faf65..7b7e32e 100644 --- a/vendor/jsontoolkit/src/jsonschema/reference.cc +++ b/vendor/jsontoolkit/src/jsonschema/reference.cc @@ -140,7 +140,7 @@ auto sourcemeta::jsontoolkit::frame( .get()}; assert(root_base_dialect.has_value()); - const std::optional root_id{sourcemeta::jsontoolkit::id( + const std::optional root_id{sourcemeta::jsontoolkit::identify( schema, root_base_dialect.value(), default_id)}; const std::optional root_dialect{ sourcemeta::jsontoolkit::dialect(schema, default_dialect)}; @@ -171,7 +171,7 @@ auto sourcemeta::jsontoolkit::frame( assert(entry.base_dialect.has_value()); // Schema identifier - std::optional id{sourcemeta::jsontoolkit::id( + std::optional id{sourcemeta::jsontoolkit::identify( entry.value, entry.base_dialect.value(), entry.pointer.empty() ? default_id : std::nullopt)};