-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(ingestion): extend feast plugin to ingest tags and owners #11784
base: master
Are you sure you want to change the base?
Changes from 9 commits
ef24dc4
0494431
429f663
94fc26a
c14f94e
f471598
33535bb
cfdccec
6a2cc71
4e8916a
7620c98
adb4809
f21d1a0
c066e6a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
#!/bin/bash -e | ||
#!/bin/bash -e | ||
|
||
#From https://stackoverflow.com/questions/4023830/how-to-compare-two-strings-in-dot-separated-version-format-in-bash | ||
verlte() { | ||
|
@@ -45,7 +45,7 @@ arm64_darwin_preflight() { | |
pip3 install --no-use-pep517 scipy | ||
fi | ||
|
||
brew_install "openssl@1.1" | ||
brew_install "openssl@3.0.14" | ||
brew install "postgresql@14" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Quick question - Why this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I needed to upgrade the dependency version because the 1.1 version was deprecated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I reverted to the previous version |
||
|
||
# postgresql installs libs in a strange way | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,11 @@ | |
) | ||
from datahub.ingestion.api.source import Source, SourceReport | ||
from datahub.ingestion.api.workunit import MetadataWorkUnit | ||
from datahub.metadata._schema_classes import ( | ||
OwnerClass, | ||
OwnershipClass, | ||
OwnershipTypeClass, | ||
) | ||
from datahub.metadata.com.linkedin.pegasus2avro.common import MLFeatureDataType | ||
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( | ||
MLFeatureSnapshot, | ||
|
@@ -42,10 +47,12 @@ | |
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent | ||
from datahub.metadata.schema_classes import ( | ||
BrowsePathsClass, | ||
GlobalTagsClass, | ||
MLFeaturePropertiesClass, | ||
MLFeatureTablePropertiesClass, | ||
MLPrimaryKeyPropertiesClass, | ||
StatusClass, | ||
TagAssociationClass, | ||
) | ||
|
||
# FIXME: ValueType module cannot be used as a type | ||
|
@@ -216,9 +223,26 @@ def _get_entity_workunit( | |
|
||
feature_view_name = f"{self.feature_store.project}.{feature_view.name}" | ||
|
||
aspects = [StatusClass(removed=False)] | ||
|
||
if entity.tags.get("name"): | ||
tag: str = entity.tags.get("name") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor - Any way to extract the owner and tags logic into a reusable method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Already created a reusable method for tags and owner logic. Only thing missing is regarding the
the issue here is having to specify in Datahub repo the specific use cases that each team might have (for example, in our case we will have an owner "MLOps") which might not be applicable to other teams. |
||
tag_association = TagAssociationClass(tag=builder.make_tag_urn(tag)) | ||
global_tags_aspect = GlobalTagsClass(tags=[tag_association]) | ||
aspects.append(global_tags_aspect) | ||
|
||
if entity.owner: | ||
owner = entity.owner | ||
owner_association = OwnerClass( | ||
owner=builder.make_owner_urn(owner, owner_type=builder.OwnerType.USER), | ||
type=OwnershipTypeClass.TECHNICAL_OWNER, | ||
) | ||
owners_aspect = OwnershipClass(owners=[owner_association]) | ||
aspects.append(owners_aspect) | ||
|
||
entity_snapshot = MLPrimaryKeySnapshot( | ||
urn=builder.make_ml_primary_key_urn(feature_view_name, entity.name), | ||
aspects=[StatusClass(removed=False)], | ||
aspects=aspects, | ||
) | ||
|
||
entity_snapshot.aspects.append( | ||
|
@@ -243,10 +267,20 @@ def _get_feature_workunit( | |
Generate an MLFeature work unit for a Feast feature. | ||
""" | ||
feature_view_name = f"{self.feature_store.project}.{feature_view.name}" | ||
global_tags_aspect = None | ||
|
||
if field.tags.get("name"): | ||
tag_name = field.tags.get("name") | ||
tag_association = TagAssociationClass(tag=builder.make_tag_urn(tag_name)) | ||
global_tags_aspect = GlobalTagsClass(tags=[tag_association]) | ||
|
||
aspects = [StatusClass(removed=False)] | ||
if global_tags_aspect is not None: | ||
aspects.append(global_tags_aspect) | ||
|
||
feature_snapshot = MLFeatureSnapshot( | ||
urn=builder.make_ml_feature_urn(feature_view_name, field.name), | ||
aspects=[StatusClass(removed=False)], | ||
aspects=aspects, | ||
) | ||
|
||
feature_sources = [] | ||
|
@@ -296,12 +330,29 @@ def _get_feature_view_workunit(self, feature_view: FeatureView) -> MetadataWorkU | |
|
||
feature_view_name = f"{self.feature_store.project}.{feature_view.name}" | ||
|
||
aspects = [ | ||
BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]), | ||
StatusClass(removed=False), | ||
] | ||
|
||
if feature_view.tags.get("name"): | ||
tag = feature_view.tags.get("name") | ||
tag_association = TagAssociationClass(tag=builder.make_tag_urn(tag)) | ||
global_tags_aspect = GlobalTagsClass(tags=[tag_association]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just note: If you've attached tags in DataHub, this will replace them by default :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The goal is to have Feast as our source of truth, so the fields must be defined in Feast repo. |
||
aspects.append(global_tags_aspect) | ||
|
||
if feature_view.owner: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just note: If you've attached owners in DataHub UI, this will replace them by default :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See comment above |
||
owner = feature_view.owner | ||
owner_association = OwnerClass( | ||
owner=builder.make_owner_urn(owner, owner_type=builder.OwnerType.USER), | ||
type=OwnershipTypeClass.TECHNICAL_OWNER, | ||
) | ||
owners_aspect = OwnershipClass(owners=[owner_association]) | ||
aspects.append(owners_aspect) | ||
|
||
feature_view_snapshot = MLFeatureTableSnapshot( | ||
urn=builder.make_ml_feature_table_urn("feast", feature_view_name), | ||
aspects=[ | ||
BrowsePathsClass(paths=[f"/feast/{self.feature_store.project}"]), | ||
StatusClass(removed=False), | ||
], | ||
aspects=aspects, | ||
) | ||
|
||
feature_view_snapshot.aspects.append( | ||
|
@@ -366,6 +417,7 @@ def create(cls, config_dict, ctx): | |
return cls(config, ctx) | ||
|
||
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: | ||
|
||
for feature_view in self.feature_store.list_feature_views(): | ||
for entity_name in feature_view.entities: | ||
entity = self.feature_store.get_entity(entity_name) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why this space change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed