-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(ingest): support view lineage for all sqlalchemy sources #9039
Changes from 7 commits
75f63a2
93b7ba1
7c18697
bf4ecb5
b74e794
337ca34
e60a7f4
1fe4b02
c5ca9b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -130,10 +130,13 @@ def auto_incremental_lineage( | |
if len(wu.metadata.proposedSnapshot.aspects) > 0: | ||
yield wu | ||
|
||
yield _lineage_wu_via_read_modify_write( | ||
graph, urn, lineage_aspect, wu.metadata.systemMetadata | ||
) if lineage_aspect.fineGrainedLineages else _convert_upstream_lineage_to_patch( | ||
urn, lineage_aspect, wu.metadata.systemMetadata | ||
) | ||
if lineage_aspect.fineGrainedLineages: | ||
yield _lineage_wu_via_read_modify_write( | ||
graph, urn, lineage_aspect, wu.metadata.systemMetadata | ||
) | ||
elif lineage_aspect.upstreams: | ||
yield _convert_upstream_lineage_to_patch( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there is a table level upstream only aspect with empty upstreams, we ignore it, as part of incremental lineage. |
||
urn, lineage_aspect, wu.metadata.systemMetadata | ||
) | ||
else: | ||
yield wu |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -136,7 +136,6 @@ def get_workunits( | |
return | ||
|
||
self._populate_external_lineage_map(discovered_tables) | ||
|
||
if self.config.include_view_lineage: | ||
if len(discovered_views) > 0: | ||
yield from self.get_view_upstream_workunits( | ||
|
@@ -200,14 +199,15 @@ def _gen_workunit_from_sql_parsing_result( | |
self, | ||
dataset_identifier: str, | ||
result: SqlParsingResult, | ||
) -> MetadataWorkUnit: | ||
) -> Iterable[MetadataWorkUnit]: | ||
upstreams, fine_upstreams = self.get_upstreams_from_sql_parsing_result( | ||
self.dataset_urn_builder(dataset_identifier), result | ||
) | ||
self.report.num_views_with_upstreams += 1 | ||
return self._create_upstream_lineage_workunit( | ||
dataset_identifier, upstreams, fine_upstreams | ||
) | ||
if upstreams: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should not emit upstream lineage if no upstreams are found. |
||
self.report.num_views_with_upstreams += 1 | ||
yield self._create_upstream_lineage_workunit( | ||
dataset_identifier, upstreams, fine_upstreams | ||
) | ||
|
||
def _gen_workunits_from_query_result( | ||
self, | ||
|
@@ -251,7 +251,7 @@ def get_view_upstream_workunits( | |
) | ||
if result: | ||
views_processed.add(view_identifier) | ||
yield self._gen_workunit_from_sql_parsing_result( | ||
yield from self._gen_workunit_from_sql_parsing_result( | ||
view_identifier, result | ||
) | ||
self.report.view_lineage_parse_secs = timer.elapsed_seconds() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Incremental lineage requires presence of DataHubGraph, which is available by default only when using DataHub rest sink. We plan to keep this default enabled in managed ingestion.