diff --git a/libcove/lib/common.py b/libcove/lib/common.py index ba206c2..71b1483 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -89,6 +89,8 @@ def unique_ids(validator, ui, instance, schema, id_names=["id"]): if ui and validator.is_type(instance, "array"): non_unique_ids = set() all_ids = set() + run_uniq = False + for item in instance: try: item_ids = tuple(item.get(id_name) for id_name in id_names) @@ -96,21 +98,21 @@ def unique_ids(validator, ui, instance, schema, id_names=["id"]): # if item is not a dict item_ids = None if item_ids and all( - item_id is not None - and not isinstance(item_id, list) - and not isinstance(item_id, dict) + item_id is not None and not isinstance(item_id, (dict, list)) for item_id in item_ids ): if item_ids in all_ids: non_unique_ids.add(item_ids) all_ids.add(item_ids) else: - if not uniq(instance): - msg = "Array has non-unique elements" - err = ValidationError(msg, instance=instance) - err.error_id = "uniqueItems_no_ids" - yield err - return + run_uniq = True + + if run_uniq and not uniq(instance): + msg = "Array has non-unique elements" + err = ValidationError(msg, instance=instance) + err.error_id = "uniqueItems_no_ids" + yield err + return for non_unique_id in sorted(non_unique_ids): if len(id_names) == 1: diff --git a/refactor.patch b/refactor.patch new file mode 100644 index 0000000..939add8 --- /dev/null +++ b/refactor.patch @@ -0,0 +1,124 @@ +diff --git a/libcove/lib/common.py b/libcove/lib/common.py +index e7055a1..0f350f5 100644 +--- a/libcove/lib/common.py ++++ b/libcove/lib/common.py +@@ -418,8 +418,10 @@ def common_checks_context( + + schema_fields = schema_obj.get_pkg_schema_fields() + ++ fields_present, json_data_gen_paths = get_fields_present_and_generic_paths(json_data, {}, {}) ++ + additional_fields_all = get_additional_fields_info( +- json_data, schema_fields, context, fields_regex=fields_regex ++ json_data, schema_fields, context, fields_regex=fields_regex, fields_present=fields_present + ) + + additional_fields = sorted( +@@ -503,7 +505,6 @@ def common_checks_context( + } + ) + +- json_data_gen_paths = get_json_data_generic_paths(json_data, generic_paths={}) + context["deprecated_fields"] = get_json_data_deprecated_fields( + json_data_gen_paths, schema_obj + ) +@@ -588,8 +589,9 @@ def get_additional_codelist_values(schema_obj, json_data): + return additional_codelist_values + + +-def get_additional_fields_info(json_data, schema_fields, context, fields_regex=False): +- fields_present = get_fields_present_with_examples(json_data) ++def get_additional_fields_info(json_data, schema_fields, context, fields_regex=False, fields_present=None): ++ if not fields_present: ++ fields_present, _ = get_fields_present_and_generic_paths(json_data, {}, {}) + + additional_fields = {} + root_additional_fields = set() +@@ -831,7 +833,7 @@ def get_schema_validation_errors( + return dict(validation_errors) + + +-def get_json_data_generic_paths(json_data, generic_paths, path=(), generic_key=()): ++def get_fields_present_and_generic_paths(json_data, counter, generic_paths, prefix="", path=(), generic_key=()): + """Transform json data into a dictionary with keys made of json paths. + + Key are json paths (as tuples). Values are dictionaries with keys including specific +@@ -875,8 +877,16 @@ def get_json_data_generic_paths(json_data, generic_paths, path=(), generic_key=( + + for key, value in iterable: + new_path = path + (key,) ++ + if is_dict: ++ new_key = prefix + "/" + key ++ if new_key not in counter: ++ counter[new_key] = {"count": 1, "examples": []} ++ else: ++ counter[new_key]["count"] += 1 + new_generic_key = generic_key + (key,) ++ else: ++ new_key = prefix + + if new_generic_key in generic_paths: + generic_paths[new_generic_key][new_path] = value +@@ -884,9 +894,11 @@ def get_json_data_generic_paths(json_data, generic_paths, path=(), generic_key=( + generic_paths[new_generic_key] = {new_path: value} + + if isinstance(value, (dict, list)): +- get_json_data_generic_paths(value, generic_paths, new_path, new_generic_key) ++ get_fields_present_and_generic_paths(value, counter, generic_paths, new_key, new_path, new_generic_key) ++ elif len(counter[new_key]["examples"]) < 3: ++ counter[new_key]["examples"].append(value) + +- return generic_paths ++ return counter, generic_paths + + + def get_json_data_deprecated_fields(json_data_paths, schema_obj): +@@ -975,24 +987,11 @@ def _generate_data_path(json_data, path=()): + yield path + (key,), value + + +-def get_fields_present_with_examples(*args, **kwargs): +- counter = {} +- for key, value in fields_present_generator(*args, **kwargs): +- if key not in counter: +- counter[key] = {"count": 1, "examples": []} +- else: +- counter[key]["count"] += 1 +- if len(counter[key]["examples"]) < 3: +- if not isinstance(value, (list, dict)): +- counter[key]["examples"].append(value) +- +- return counter +- +- +-def get_fields_present(*args, **kwargs): ++def get_fields_present(json_data): ++ fields_present, _ = get_fields_present_and_generic_paths(json_data, {}, {}) + return { + key: value["count"] +- for key, value in get_fields_present_with_examples(*args, **kwargs).items() ++ for key, value in fields_present.items() + } + + +@@ -1151,19 +1150,6 @@ def _get_schema_non_required_ids( + return id_paths + + +-def fields_present_generator(json_data, prefix=""): +- if isinstance(json_data, dict): +- for key, value in json_data.items(): +- new_key = prefix + "/" + key +- yield new_key, value +- if isinstance(value, (dict, list)): +- yield from fields_present_generator(value, new_key) +- elif isinstance(json_data, list): +- for item in json_data: +- if isinstance(item, dict): +- yield from fields_present_generator(item, prefix) +- +- + def add_is_codelist(obj): + """This is needed so that we can detect enums that are arrays as the jsonschema library does not + give you any parent information and the codelist property is on the parent in this case. Only applies to