From 745e12470b6960af6407321017a971a24b963fee Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 27 Oct 2023 17:51:22 -0700 Subject: [PATCH] fix StrEnum --- .../api/entities/dataprocess/dataprocess_instance.py | 4 ++-- .../src/datahub/configuration/time_window_config.py | 3 ++- .../datahub/ingestion/glossary/datahub_classifier.py | 4 ++-- .../src/datahub/ingestion/graph/client.py | 4 ++-- .../src/datahub/ingestion/source/common/subtypes.py | 10 +++++----- .../src/datahub/ingestion/source/kafka.py | 4 ++-- .../ingestion/source/looker/looker_query_model.py | 6 +----- .../datahub/ingestion/source/snowflake/constants.py | 8 ++++---- .../ingestion/source/snowflake/snowflake_config.py | 4 ++-- .../src/datahub/ingestion/source/sql/presto_on_hive.py | 4 ++-- metadata-ingestion/tests/performance/data_model.py | 5 +++-- 11 files changed, 27 insertions(+), 29 deletions(-) diff --git a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py index cf6080c7072e6..fd55430dde706 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py +++ b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py @@ -1,6 +1,5 @@ import time from dataclasses import dataclass, field -from enum import Enum from typing import Callable, Dict, Iterable, List, Optional, Union, cast from datahub.api.entities.datajob import DataFlow, DataJob @@ -22,6 +21,7 @@ DataProcessTypeClass, StatusClass, ) +from datahub.utilities.str_enum import StrEnum from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn @@ -34,7 +34,7 @@ class DataProcessInstanceKey(DatahubKey): id: str -class InstanceRunResult(str, Enum): +class InstanceRunResult(StrEnum): SUCCESS = RunResultType.SUCCESS SKIPPED = RunResultType.SKIPPED FAILURE = RunResultType.FAILURE diff --git a/metadata-ingestion/src/datahub/configuration/time_window_config.py b/metadata-ingestion/src/datahub/configuration/time_window_config.py index 15de7470e4d82..d621be79522ad 100644 --- a/metadata-ingestion/src/datahub/configuration/time_window_config.py +++ b/metadata-ingestion/src/datahub/configuration/time_window_config.py @@ -9,10 +9,11 @@ from datahub.configuration.common import ConfigModel from datahub.configuration.datetimes import parse_absolute_time, parse_relative_timespan from datahub.metadata.schema_classes import CalendarIntervalClass +from datahub.utilities.str_enum import StrEnum @enum.unique -class BucketDuration(str, enum.Enum): +class BucketDuration(StrEnum): DAY = CalendarIntervalClass.DAY HOUR = CalendarIntervalClass.HOUR diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py index 1f2b7f5689ea3..9314da909f055 100644 --- a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py +++ b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py @@ -1,4 +1,3 @@ -from enum import Enum from typing import Any, Dict, List, Optional from datahub_classify.helper_classes import ColumnInfo @@ -9,6 +8,7 @@ from datahub.configuration.common import ConfigModel from datahub.ingestion.glossary.classifier import Classifier +from datahub.utilities.str_enum import StrEnum class NameFactorConfig(ConfigModel): @@ -32,7 +32,7 @@ class DataTypeFactorConfig(ConfigModel): ) -class ValuePredictionType(str, Enum): +class ValuePredictionType(StrEnum): REGEX = "regex" LIBRARY = "library" diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index ccff677c3a471..3ee8d52651c20 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -1,4 +1,3 @@ -import enum import functools import json import logging @@ -44,6 +43,7 @@ TelemetryClientIdClass, ) from datahub.utilities.perf_timer import PerfTimer +from datahub.utilities.str_enum import StrEnum from datahub.utilities.urns.urn import Urn, guess_entity_type if TYPE_CHECKING: @@ -802,7 +802,7 @@ def execute_graphql(self, query: str, variables: Optional[Dict] = None) -> Dict: return result["data"] - class RelationshipDirection(str, enum.Enum): + class RelationshipDirection(StrEnum): # FIXME: Upgrade to enum.StrEnum when we drop support for Python 3.10 INCOMING = "INCOMING" diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 741b4789bef21..0244069c1f978 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -1,7 +1,7 @@ -from enum import Enum +from datahub.utilities.str_enum import StrEnum -class DatasetSubTypes(str, Enum): +class DatasetSubTypes(StrEnum): # Generic SubTypes TABLE = "Table" VIEW = "View" @@ -20,7 +20,7 @@ class DatasetSubTypes(str, Enum): NOTEBOOK = "Notebook" -class DatasetContainerSubTypes(str, Enum): +class DatasetContainerSubTypes(StrEnum): # Generic SubTypes DATABASE = "Database" SCHEMA = "Schema" @@ -34,13 +34,13 @@ class DatasetContainerSubTypes(str, Enum): GCS_BUCKET = "GCS bucket" -class BIContainerSubTypes(str, Enum): +class BIContainerSubTypes(StrEnum): LOOKER_FOLDER = "Folder" TABLEAU_WORKBOOK = "Workbook" POWERBI_WORKSPACE = "Workspace" POWERBI_DATASET = "PowerBI Dataset" -class BIAssetSubTypes(str, Enum): +class BIAssetSubTypes(StrEnum): # Generic SubTypes REPORT = "Report" diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index 23770ff3cf812..2b8226a67bee3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -2,7 +2,6 @@ import json import logging from dataclasses import dataclass, field -from enum import Enum from typing import Any, Dict, Iterable, List, Optional, Type, cast import avro.schema @@ -66,11 +65,12 @@ ) from datahub.utilities.mapping import Constants, OperationProcessor from datahub.utilities.registries.domain_registry import DomainRegistry +from datahub.utilities.str_enum import StrEnum logger = logging.getLogger(__name__) -class KafkaTopicConfigKeys(str, Enum): +class KafkaTopicConfigKeys(StrEnum): MIN_INSYNC_REPLICAS_CONFIG = "min.insync.replicas" RETENTION_SIZE_CONFIG = "retention.bytes" RETENTION_TIME_CONFIG = "retention.ms" diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py index b3002828ceeff..7ed46c8f7084c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py @@ -1,13 +1,9 @@ from dataclasses import dataclass, field -from enum import Enum from typing import Dict, List, cast from looker_sdk.sdk.api40.models import WriteQuery - -# Enum whose value is string and compatible with dictionary having string value as key -class StrEnum(str, Enum): - pass +from datahub.utilities.str_enum import StrEnum class LookerModel(StrEnum): diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py index 6f5e22e39d0c8..9ffe89d8f1c27 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py @@ -1,7 +1,7 @@ -from enum import Enum +from datahub.utilities.str_enum import StrEnum -class SnowflakeCloudProvider(str, Enum): +class SnowflakeCloudProvider(StrEnum): AWS = "aws" GCP = "gcp" AZURE = "azure" @@ -10,7 +10,7 @@ class SnowflakeCloudProvider(str, Enum): SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS -class SnowflakeEdition(str, Enum): +class SnowflakeEdition(StrEnum): STANDARD = "Standard" # We use this to represent Enterprise Edition or higher @@ -44,7 +44,7 @@ class SnowflakeEdition(str, Enum): # We will always compare with lowercase # Complete list for objectDomain - https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html -class SnowflakeObjectDomain(str, Enum): +class SnowflakeObjectDomain(StrEnum): TABLE = "table" EXTERNAL_TABLE = "external table" VIEW = "view" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 032bdef178fdf..b820d8239283f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -1,7 +1,6 @@ import logging from collections import defaultdict from dataclasses import dataclass -from enum import Enum from typing import Dict, List, Optional, Set, cast from pydantic import Field, SecretStr, root_validator, validator @@ -24,6 +23,7 @@ ) from datahub.ingestion.source_config.usage.snowflake_usage import SnowflakeUsageConfig from datahub.utilities.global_warning_util import add_global_warning +from datahub.utilities.str_enum import StrEnum logger = logging.Logger(__name__) @@ -40,7 +40,7 @@ ] -class TagOption(str, Enum): +class TagOption(StrEnum): with_lineage = "with_lineage" without_lineage = "without_lineage" skip = "skip" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py index ceb9ecacb25d2..fe916cae6df1d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py @@ -2,7 +2,6 @@ import json import logging from collections import namedtuple -from enum import Enum from itertools import groupby from typing import Any, Dict, Iterable, List, Optional, Tuple, Union @@ -61,13 +60,14 @@ ViewPropertiesClass, ) from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column +from datahub.utilities.str_enum import StrEnum logger: logging.Logger = logging.getLogger(__name__) TableKey = namedtuple("TableKey", ["schema", "table"]) -class PrestoOnHiveConfigMode(str, Enum): +class PrestoOnHiveConfigMode(StrEnum): hive: str = "hive" # noqa: F811 presto: str = "presto" presto_on_hive: str = "presto-on-hive" diff --git a/metadata-ingestion/tests/performance/data_model.py b/metadata-ingestion/tests/performance/data_model.py index 9425fa827070e..2a1da6a7917a2 100644 --- a/metadata-ingestion/tests/performance/data_model.py +++ b/metadata-ingestion/tests/performance/data_model.py @@ -1,10 +1,11 @@ from dataclasses import dataclass from datetime import datetime -from enum import Enum from typing import Dict, List, Optional from typing_extensions import Literal +from datahub.utilities.str_enum import StrEnum + StatementType = Literal[ # SELECT + values from OperationTypeClass "SELECT", "INSERT", @@ -24,7 +25,7 @@ class Container: parent: Optional["Container"] = None -class ColumnType(str, Enum): +class ColumnType(StrEnum): # Can add types that take parameters in the future INTEGER = "INTEGER"