From 12c472d91844a19bfbd801cab6743ee0fba35d4f Mon Sep 17 00:00:00 2001 From: Benjamin Dornel <62495124+benjamin-awd@users.noreply.github.com> Date: Tue, 1 Aug 2023 22:05:40 +0800 Subject: [PATCH] fix(ingest/json-schema): convert non-string enums to strings (#8479) --- .../datahub/ingestion/extractor/json_schema_util.py | 4 +++- .../tests/unit/schema/test_json_schema_util.py | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py index cfa1b7609c0b1..8e313e92cbf84 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py @@ -254,11 +254,13 @@ def _field_from_primitive( isPartOfKey=field_path.is_key_schema, ) elif datahub_field_type in [EnumTypeClass]: + # Convert enums to string representation + schema_enums = list(map(json.dumps, schema["enum"])) yield SchemaField( fieldPath=field_path.expand_type("enum", schema).as_string(), type=type_override or SchemaFieldDataTypeClass(type=EnumTypeClass()), nativeDataType="Enum", - description=f"one of {','.join(schema['enum'])}", + description=f"One of: {', '.join(schema_enums)}", nullable=nullable, jsonProps=JsonSchemaTranslator._get_jsonprops_for_any_schema( schema, required=required diff --git a/metadata-ingestion/tests/unit/schema/test_json_schema_util.py b/metadata-ingestion/tests/unit/schema/test_json_schema_util.py index 731ee336621fd..2635363ed8d2e 100644 --- a/metadata-ingestion/tests/unit/schema/test_json_schema_util.py +++ b/metadata-ingestion/tests/unit/schema/test_json_schema_util.py @@ -712,6 +712,19 @@ def test_required_field(): assert json.loads(fields[0].jsonProps or "{}")["required"] is False +def test_non_str_enums(): + schema = { + "$id": "test", + "$schema": "http://json-schema.org/draft-06/schema#", + "properties": {"bar": {"description": "Mixed enum", "enum": ["baz", 1, None]}}, + } + + fields = list(JsonSchemaTranslator.get_fields_from_schema(schema)) + expected_field_paths: List[str] = ["[version=2.0].[type=object].[type=enum].bar"] + assert_field_paths_match(fields, expected_field_paths) + assert fields[0].description == 'One of: "baz", 1, null' + + def test_anyof_with_properties(): # We expect the event / timestamp fields to be included in both branches of the anyOf.