From 9dc9816e2044531338bcb302e5faa99a8720aa7a Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 25 Aug 2023 20:00:18 +0530 Subject: [PATCH 1/7] tmp ownerhsip from tags --- metadata-ingestion/setup.py | 1 + .../extract_ownership_from_tags.py | 43 +++++++++++++++++++ .../tests/unit/test_transform_dataset.py | 28 ++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 32e1cf926cc685..ffbfc854604792 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -630,6 +630,7 @@ def get_long_description(): "simple_add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:SimpleAddDatasetProperties", "pattern_add_dataset_schema_terms = datahub.ingestion.transformer.add_dataset_schema_terms:PatternAddDatasetSchemaTerms", "pattern_add_dataset_schema_tags = datahub.ingestion.transformer.add_dataset_schema_tags:PatternAddDatasetSchemaTags", + "extract_owners_from_tags = datahub.ingestion.transformer.extract_ownership_from_tags:ExtractOwnersFromTagsTransformer", ], "datahub.ingestion.sink.plugins": [ "file = datahub.ingestion.sink.file:FileSink", diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py new file mode 100644 index 00000000000000..57e49ad64cf6a3 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -0,0 +1,43 @@ +from typing import List, Optional, cast + +import pydantic + +from datahub.configuration.common import ( + TransformerSemantics, + TransformerSemanticsConfigModel, +) +from datahub.emitter.mce_builder import Aspect +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer +from datahub.metadata.schema_classes import OwnershipClass + + +class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel): + tag_prefix: str + + +class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer): + """Transformer that can be used to set extract ownership from entity tags (currently does not support column level tags)""" + + ctx: PipelineContext + config: ExtractOwnersFromTagsConfig + + def __init__(self, config: ExtractOwnersFromTagsConfig, ctx: PipelineContext): + super().__init__() + self.ctx = ctx + self.config = config + + @classmethod + def create( + cls, config_dict: dict, ctx: PipelineContext + ) -> "ExtractOwnersFromTagsConfig": + config = ExtractOwnersFromTagsConfig.parse_obj(config_dict) + return cls(config, ctx) + + def transform_aspect( + self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] + ) -> Optional[Aspect]: + + owner_aspect = OwnershipClass(owners=[]) + return cast(Aspect, owner_aspect) diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 8b2535eea1fe9d..6b4bd4208c1a5e 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -62,6 +62,9 @@ ) from datahub.ingestion.transformer.dataset_transformer import DatasetTransformer from datahub.ingestion.transformer.extract_dataset_tags import ExtractDatasetTags +from datahub.ingestion.transformer.extract_ownership_from_tags import ( + ExtractOwnersFromTagsTransformer, +) from datahub.ingestion.transformer.mark_dataset_status import MarkDatasetStatus from datahub.ingestion.transformer.remove_dataset_ownership import ( SimpleRemoveDatasetOwnership, @@ -586,6 +589,31 @@ def test_mark_status_dataset(tmp_path): ) +def test_extract_owners_from_tags(): + # TODO Implement this + dataset = make_generic_dataset( + aspects=[models.GlobalTagsClass(tags=["urn:li:tag:owner:foo"])] + ) + transformer = ExtractOwnersFromTagsTransformer.create( + { + "tag_prefix": "owner:", + }, + PipelineContext(run_id="test"), + ) + transformed = list( + transformer.transform( + [ + RecordEnvelope(dataset, metadata={}), + RecordEnvelope(EndOfStream(), metadata={}), + ] + ) + ) + owners_aspect = transformed[1].record.aspect + assert owners_aspect + print(owners_aspect) + assert owners_aspect is None + + def test_add_dataset_browse_paths(): dataset = make_generic_dataset() From 9aced13654d2823a024812eed588c9c366d76dbb Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 25 Aug 2023 20:28:03 +0530 Subject: [PATCH 2/7] wip --- metadata-ingestion/tests/unit/test_transform_dataset.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 6b4bd4208c1a5e..43c760e9ca96c5 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -590,7 +590,6 @@ def test_mark_status_dataset(tmp_path): def test_extract_owners_from_tags(): - # TODO Implement this dataset = make_generic_dataset( aspects=[models.GlobalTagsClass(tags=["urn:li:tag:owner:foo"])] ) @@ -604,11 +603,13 @@ def test_extract_owners_from_tags(): transformer.transform( [ RecordEnvelope(dataset, metadata={}), - RecordEnvelope(EndOfStream(), metadata={}), ] ) ) - owners_aspect = transformed[1].record.aspect + for item in transformed: + print(item) + # TODO Implement this after figuring what is the correct approach currently + owners_aspect = transformed[0].record.proposedSnapshot.aspects[0] assert owners_aspect print(owners_aspect) assert owners_aspect is None From a3766fa712929325d0503ddcfdece5f02bad5843 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 29 Aug 2023 22:33:58 +0530 Subject: [PATCH 3/7] feat(transfomer): add transformer to get ownership from tags --- .../extract_ownership_from_tags.py | 23 ++++++++++++++----- .../tests/unit/test_transform_dataset.py | 4 +--- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py index 57e49ad64cf6a3..4097ed73532813 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -1,20 +1,18 @@ from typing import List, Optional, cast -import pydantic - from datahub.configuration.common import ( - TransformerSemantics, TransformerSemanticsConfigModel, ) from datahub.emitter.mce_builder import Aspect from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer -from datahub.metadata.schema_classes import OwnershipClass +from datahub.metadata.schema_classes import OwnershipClass, GlobalTagsClass, OwnerClass, OwnershipTypeClass +from datahub.utilities.urns.tag_urn import TagUrn class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel): tag_prefix: str + # TODO Add type as an option here and use that instead of hard-coding type of owner class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer): @@ -39,5 +37,18 @@ def transform_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] ) -> Optional[Aspect]: - owner_aspect = OwnershipClass(owners=[]) + in_tags_aspect: Optional[GlobalTagsClass] = cast(GlobalTagsClass, aspect) + if in_tags_aspect is None: + return None + tags_str = in_tags_aspect.tags + owners: List[OwnerClass] = [] + for tag in tags_str: + tag_urn = TagUrn.create_from_string(tag) + tag_str = tag_urn.get_entity_id()[0] + if tag_str.startswith(self.config.tag_prefix): + + owner = OwnerClass(owner=tag_str[len(self.config.tag_prefix) :], type=OwnershipTypeClass.TECHNICAL_OWNER) + owners.append(owner) + + owner_aspect = OwnershipClass(owners=owners) return cast(Aspect, owner_aspect) diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 43c760e9ca96c5..a6ba2fd29025e7 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -606,11 +606,9 @@ def test_extract_owners_from_tags(): ] ) ) - for item in transformed: - print(item) - # TODO Implement this after figuring what is the correct approach currently owners_aspect = transformed[0].record.proposedSnapshot.aspects[0] assert owners_aspect + # TODO implement this test properly print(owners_aspect) assert owners_aspect is None From 4219a7396c1ab7ac895627bce9042ac1eb7608e8 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 4 Sep 2023 17:27:56 +0530 Subject: [PATCH 4/7] more implementation --- .../extract_ownership_from_tags.py | 49 +++++++++-- .../tests/unit/test_transform_dataset.py | 81 +++++++++++++++---- 2 files changed, 106 insertions(+), 24 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py index 4097ed73532813..743a434001bfb1 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -1,18 +1,35 @@ +from functools import lru_cache from typing import List, Optional, cast -from datahub.configuration.common import ( - TransformerSemanticsConfigModel, -) +from datahub.configuration.common import TransformerSemanticsConfigModel from datahub.emitter.mce_builder import Aspect from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer -from datahub.metadata.schema_classes import OwnershipClass, GlobalTagsClass, OwnerClass, OwnershipTypeClass +from datahub.metadata.schema_classes import ( + GlobalTagsClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, +) +from datahub.utilities.urns.corp_group_urn import CorpGroupUrn +from datahub.utilities.urns.corpuser_urn import CorpuserUrn from datahub.utilities.urns.tag_urn import TagUrn class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel): tag_prefix: str - # TODO Add type as an option here and use that instead of hard-coding type of owner + is_user: bool = True + email_domain: Optional[str] = None + owner_type: str = "TECHNICAL_OWNER" + owner_type_urn: Optional[str] = None + + +@lru_cache(maxsize=10) +def get_owner_type(owner_type_str: str) -> Optional[OwnershipTypeClass]: + for item in dir(OwnershipTypeClass): + if str(item) == owner_type_str: + return item + return OwnershipTypeClass.CUSTOM class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer): @@ -36,7 +53,6 @@ def create( def transform_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] ) -> Optional[Aspect]: - in_tags_aspect: Optional[GlobalTagsClass] = cast(GlobalTagsClass, aspect) if in_tags_aspect is None: return None @@ -46,8 +62,25 @@ def transform_aspect( tag_urn = TagUrn.create_from_string(tag) tag_str = tag_urn.get_entity_id()[0] if tag_str.startswith(self.config.tag_prefix): - - owner = OwnerClass(owner=tag_str[len(self.config.tag_prefix) :], type=OwnershipTypeClass.TECHNICAL_OWNER) + owner_str = tag_str[len(self.config.tag_prefix) :] + if self.config.email_domain is not None: + owner_urn_str = owner_str + "@" + self.config.email_domain + else: + owner_urn_str = owner_str + if self.config.is_user: + owner_urn = CorpuserUrn.create_from_id(owner_urn_str) + else: + owner_urn = CorpGroupUrn.create_from_id(owner_urn_str) + owner_type = get_owner_type(self.config.owner_type) + if owner_type == OwnershipTypeClass.CUSTOM: + assert ( + self.config.owner_type_urn is not None + ), "owner_type_urn must be set if owner_type is CUSTOM" + owner = OwnerClass( + owner=str(owner_urn), + type=owner_type, + typeUrn=self.config.owner_type_urn, + ) owners.append(owner) owner_aspect = OwnershipClass(owners=owners) diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index a6ba2fd29025e7..04f1983e035a64 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -75,6 +75,7 @@ GlobalTagsClass, MetadataChangeEventClass, OwnershipClass, + OwnershipTypeClass, StatusClass, TagAssociationClass, ) @@ -590,27 +591,75 @@ def test_mark_status_dataset(tmp_path): def test_extract_owners_from_tags(): - dataset = make_generic_dataset( - aspects=[models.GlobalTagsClass(tags=["urn:li:tag:owner:foo"])] + def _test_owner( + tag: str, + config: Dict, + expected_owner: str, + expected_owner_type: Optional[OwnershipTypeClass] = None, + ) -> None: + dataset = make_generic_dataset(aspects=[models.GlobalTagsClass(tags=[tag])]) + transformer = ExtractOwnersFromTagsTransformer.create( + config, + PipelineContext(run_id="test"), + ) + transformed = list( + transformer.transform( + [ + RecordEnvelope(dataset, metadata={}), + ] + ) + ) + owners_aspect = transformed[0].record.proposedSnapshot.aspects[0] + owners = owners_aspect.owners + owner = owners[0] + if expected_owner_type is not None: + assert owner.type == expected_owner_type + assert owner.owner == expected_owner + + _test_owner( + tag="urn:li:tag:owner:foo", + config={ + "tag_prefix": "owner:", + }, + expected_owner="urn:li:corpuser:foo", ) - transformer = ExtractOwnersFromTagsTransformer.create( - { + _test_owner( + tag="urn:li:tag:owner:foo", + config={ "tag_prefix": "owner:", + "is_user": False, }, - PipelineContext(run_id="test"), + expected_owner="urn:li:corpGroup:foo", ) - transformed = list( - transformer.transform( - [ - RecordEnvelope(dataset, metadata={}), - ] - ) + _test_owner( + tag="urn:li:tag:owner:foo", + config={ + "tag_prefix": "owner:", + "email_domain": "example.com", + }, + expected_owner="urn:li:corpuser:foo@example.com", + ) + _test_owner( + tag="urn:li:tag:owner:foo", + config={ + "tag_prefix": "owner:", + "email_domain": "example.com", + "owner_type": "TECHNICAL_OWNER", + }, + expected_owner="urn:li:corpuser:foo@example.com", + expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER, + ) + _test_owner( + tag="urn:li:tag:owner:foo", + config={ + "tag_prefix": "owner:", + "email_domain": "example.com", + "owner_type": "AUTHOR", + "owner_type_urn": "urn:li:ownershipType:ad8557d6-dcb9-4d2a-83fc-b7d0d54f3e0f", + }, + expected_owner="urn:li:corpuser:foo@example.com", + expected_owner_type=OwnershipTypeClass.CUSTOM, ) - owners_aspect = transformed[0].record.proposedSnapshot.aspects[0] - assert owners_aspect - # TODO implement this test properly - print(owners_aspect) - assert owners_aspect is None def test_add_dataset_browse_paths(): From a91413d44a52e59208a146de16e3a36f63cd70c4 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 4 Sep 2023 18:16:43 +0530 Subject: [PATCH 5/7] minor changes + change deprecated fields in examples --- docs/how/add-custom-data-platform.md | 2 +- docs/how/add-user-data.md | 2 +- docs/ownership/ownership-types.md | 2 +- .../transformer/extract_ownership_from_tags.py | 16 +++++++++------- .../tests/unit/test_transform_dataset.py | 18 ++++++++++++------ 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/docs/how/add-custom-data-platform.md b/docs/how/add-custom-data-platform.md index a4ea32af455c11..5dcd423e775698 100644 --- a/docs/how/add-custom-data-platform.md +++ b/docs/how/add-custom-data-platform.md @@ -77,7 +77,7 @@ datahub put platform --name MyCustomDataPlatform --display_name "My Custom Data source: type: "file" config: - filename: "./my-custom-data-platform.json" + path: "./my-custom-data-platform.json" # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation sink: diff --git a/docs/how/add-user-data.md b/docs/how/add-user-data.md index ea76c97163ddda..035821ab75879d 100644 --- a/docs/how/add-user-data.md +++ b/docs/how/add-user-data.md @@ -57,7 +57,7 @@ Define an [ingestion recipe](https://datahubproject.io/docs/metadata-ingestion/# source: type: "file" config: - filename: "./my-user.json" + path: "./my-user.json" # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation sink: diff --git a/docs/ownership/ownership-types.md b/docs/ownership/ownership-types.md index 2dbefaa488140d..ae3a13ae1035c3 100644 --- a/docs/ownership/ownership-types.md +++ b/docs/ownership/ownership-types.md @@ -85,7 +85,7 @@ source: type: "file" config: # path to json file - filename: "metadata-ingestion/examples/ownership/ownership_type.json" + path: "metadata-ingestion/examples/ownership/ownership_type.json" # see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation sink: diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py index 743a434001bfb1..f1a57e88a8fd49 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -50,23 +50,25 @@ def create( config = ExtractOwnersFromTagsConfig.parse_obj(config_dict) return cls(config, ctx) + def get_owner_urn(self, owner_str: str): + if self.config.email_domain is not None: + return owner_str + "@" + self.config.email_domain + return owner_str + def transform_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] ) -> Optional[Aspect]: in_tags_aspect: Optional[GlobalTagsClass] = cast(GlobalTagsClass, aspect) if in_tags_aspect is None: return None - tags_str = in_tags_aspect.tags + tags = in_tags_aspect.tags owners: List[OwnerClass] = [] - for tag in tags_str: - tag_urn = TagUrn.create_from_string(tag) + for tag_class in tags: + tag_urn = TagUrn.create_from_string(tag_class.tag) tag_str = tag_urn.get_entity_id()[0] if tag_str.startswith(self.config.tag_prefix): owner_str = tag_str[len(self.config.tag_prefix) :] - if self.config.email_domain is not None: - owner_urn_str = owner_str + "@" + self.config.email_domain - else: - owner_urn_str = owner_str + owner_urn_str = self.get_owner_urn(owner_str) if self.config.is_user: owner_urn = CorpuserUrn.create_from_id(owner_urn_str) else: diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 04f1983e035a64..ec5f49bc7f9ceb 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -597,7 +597,13 @@ def _test_owner( expected_owner: str, expected_owner_type: Optional[OwnershipTypeClass] = None, ) -> None: - dataset = make_generic_dataset(aspects=[models.GlobalTagsClass(tags=[tag])]) + dataset = make_generic_dataset( + aspects=[ + models.GlobalTagsClass( + tags=[TagAssociationClass(tag=builder.make_tag_urn(tag))] + ) + ] + ) transformer = ExtractOwnersFromTagsTransformer.create( config, PipelineContext(run_id="test"), @@ -617,14 +623,14 @@ def _test_owner( assert owner.owner == expected_owner _test_owner( - tag="urn:li:tag:owner:foo", + tag="owner:foo", config={ "tag_prefix": "owner:", }, expected_owner="urn:li:corpuser:foo", ) _test_owner( - tag="urn:li:tag:owner:foo", + tag="owner:foo", config={ "tag_prefix": "owner:", "is_user": False, @@ -632,7 +638,7 @@ def _test_owner( expected_owner="urn:li:corpGroup:foo", ) _test_owner( - tag="urn:li:tag:owner:foo", + tag="owner:foo", config={ "tag_prefix": "owner:", "email_domain": "example.com", @@ -640,7 +646,7 @@ def _test_owner( expected_owner="urn:li:corpuser:foo@example.com", ) _test_owner( - tag="urn:li:tag:owner:foo", + tag="owner:foo", config={ "tag_prefix": "owner:", "email_domain": "example.com", @@ -650,7 +656,7 @@ def _test_owner( expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER, ) _test_owner( - tag="urn:li:tag:owner:foo", + tag="owner:foo", config={ "tag_prefix": "owner:", "email_domain": "example.com", From c21d66a1317a4f6e2e0ad3bb2866b0f3062bbe62 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 4 Sep 2023 18:20:42 +0530 Subject: [PATCH 6/7] fix lint --- .../ingestion/transformer/extract_ownership_from_tags.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py index f1a57e88a8fd49..23819a2d14b1dd 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -28,7 +28,7 @@ class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel): def get_owner_type(owner_type_str: str) -> Optional[OwnershipTypeClass]: for item in dir(OwnershipTypeClass): if str(item) == owner_type_str: - return item + return getattr(OwnershipTypeClass, item) return OwnershipTypeClass.CUSTOM @@ -46,7 +46,7 @@ def __init__(self, config: ExtractOwnersFromTagsConfig, ctx: PipelineContext): @classmethod def create( cls, config_dict: dict, ctx: PipelineContext - ) -> "ExtractOwnersFromTagsConfig": + ) -> "ExtractOwnersFromTagsTransformer": config = ExtractOwnersFromTagsConfig.parse_obj(config_dict) return cls(config, ctx) From a488e2eb320c8ab931f4e87dcd2d92c3666365c7 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 4 Sep 2023 18:26:11 +0530 Subject: [PATCH 7/7] fix more lint --- .../transformer/extract_ownership_from_tags.py | 12 ++++++------ .../tests/unit/test_transform_dataset.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py index 23819a2d14b1dd..ee8d15c2ece0fc 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -25,10 +25,10 @@ class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel): @lru_cache(maxsize=10) -def get_owner_type(owner_type_str: str) -> Optional[OwnershipTypeClass]: +def get_owner_type(owner_type_str: str) -> str: for item in dir(OwnershipTypeClass): if str(item) == owner_type_str: - return getattr(OwnershipTypeClass, item) + return item return OwnershipTypeClass.CUSTOM @@ -50,7 +50,7 @@ def create( config = ExtractOwnersFromTagsConfig.parse_obj(config_dict) return cls(config, ctx) - def get_owner_urn(self, owner_str: str): + def get_owner_urn(self, owner_str: str) -> str: if self.config.email_domain is not None: return owner_str + "@" + self.config.email_domain return owner_str @@ -70,16 +70,16 @@ def transform_aspect( owner_str = tag_str[len(self.config.tag_prefix) :] owner_urn_str = self.get_owner_urn(owner_str) if self.config.is_user: - owner_urn = CorpuserUrn.create_from_id(owner_urn_str) + owner_urn = str(CorpuserUrn.create_from_id(owner_urn_str)) else: - owner_urn = CorpGroupUrn.create_from_id(owner_urn_str) + owner_urn = str(CorpGroupUrn.create_from_id(owner_urn_str)) owner_type = get_owner_type(self.config.owner_type) if owner_type == OwnershipTypeClass.CUSTOM: assert ( self.config.owner_type_urn is not None ), "owner_type_urn must be set if owner_type is CUSTOM" owner = OwnerClass( - owner=str(owner_urn), + owner=owner_urn, type=owner_type, typeUrn=self.config.owner_type_urn, ) diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index ec5f49bc7f9ceb..6939916c7d2bf5 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -595,7 +595,7 @@ def _test_owner( tag: str, config: Dict, expected_owner: str, - expected_owner_type: Optional[OwnershipTypeClass] = None, + expected_owner_type: Optional[str] = None, ) -> None: dataset = make_generic_dataset( aspects=[