Skip to content

Commit

Permalink
Add fineGrained Lineage for app used tables and datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
shubhamjagtap639 committed Feb 8, 2024
1 parent 6c48007 commit 191f0ff
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,17 @@ def _get_sheet(
response = websocket_connection.websocket_send_request(method="GetLayout")
sheet_dict = response[Constant.QLAYOUT]
sheet = Sheet.parse_obj(sheet_dict[Constant.QMETA])
i = 1
for chart_dict in sheet_dict[Constant.QCHILDLIST][Constant.QITEMS]:
for i, chart_dict in enumerate(
sheet_dict[Constant.QCHILDLIST][Constant.QITEMS]
):
chart = self._get_chart(
websocket_connection,
chart_dict[Constant.QINFO][Constant.QID],
sheet_id,
)
if chart:
if not chart.title:
chart.title = f"Object {i}"
i += 1
chart.title = f"Object {i+1} of Sheet '{sheet.title}'"
sheet.charts.append(chart)
websocket_connection.handle.pop()
return sheet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
DatasetLineageType,
DatasetProperties,
FineGrainedLineage,
FineGrainedLineageDownstreamType,
FineGrainedLineageUpstreamType,
Upstream,
UpstreamLineage,
)
Expand Down Expand Up @@ -244,7 +247,9 @@ def _gen_dashboard_info_workunit(
entityUrn=dashboard_urn, aspect=dashboard_info_cls
).as_workunit()

def _gen_charts_workunit(self, charts: List[Chart]) -> Iterable[MetadataWorkUnit]:
def _gen_charts_workunit(
self, charts: List[Chart], app_id: str
) -> Iterable[MetadataWorkUnit]:
"""
Map Qlik Chart to Datahub Chart
"""
Expand Down Expand Up @@ -272,6 +277,12 @@ def _gen_charts_workunit(self, charts: List[Chart]) -> Iterable[MetadataWorkUnit
),
).as_workunit()

yield from add_entity_to_container(
container_key=self._gen_app_key(app_id),
entity_type="chart",
entity_urn=chart_urn,
)

def _gen_sheets_workunit(
self, sheets: List[Sheet], app_id: str
) -> Iterable[MetadataWorkUnit]:
Expand Down Expand Up @@ -299,7 +310,7 @@ def _gen_sheets_workunit(
if self.config.ingest_owner and owner_username:
yield self._gen_entity_owner_aspect(dashboard_urn, owner_username)

yield from self._gen_charts_workunit(sheet.charts)
yield from self._gen_charts_workunit(sheet.charts, app_id)

def _gen_app_table_upstream_lineage(
self, dataset_urn: str, table: QlikTable
Expand All @@ -316,7 +327,7 @@ def _gen_app_table_upstream_lineage(
)
)
upstream_dataset_urn = builder.make_dataset_urn_with_platform_instance(
name=f"{table.databaseName}.{table.schemaName}.{table.tableName}",
name=f"{table.databaseName}.{table.schemaName}.{table.tableName}".lower(),
platform=KNOWN_DATA_PLATFORM_MAPPING.get(
table.dataconnectorPlatform, table.dataconnectorPlatform
),
Expand All @@ -325,17 +336,33 @@ def _gen_app_table_upstream_lineage(
)
elif table.type == BoxType.LOADFILE:
upstream_dataset_urn = self._gen_qlik_dataset_urn(
f"{table.spaceId}.{table.databaseName}"
f"{table.spaceId}.{table.databaseName}".lower()
)

if upstream_dataset_urn:
# Generate finegrained lineage
fine_grained_lineages = [
FineGrainedLineage(
upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
upstreams=[
builder.make_schema_field_urn(upstream_dataset_urn, field.name)
],
downstreamType=FineGrainedLineageDownstreamType.FIELD,
downstreams=[
builder.make_schema_field_urn(dataset_urn, field.name)
],
)
for field in table.datasetSchema
]
return MetadataChangeProposalWrapper(
entityUrn=dataset_urn,
aspect=UpstreamLineage(
upstreams=[
Upstream(
dataset=upstream_dataset_urn, type=DatasetLineageType.COPY
)
]
],
fineGrainedLineages=fine_grained_lineages,
),
).as_workunit()
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,51 @@
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(qlik-sense,qlik_sense_platform.QYUUb)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
"container": "urn:li:container:784b26286a16989c6329d372ccc2f97e"
}
},
"systemMetadata": {
"lastObserved": 1707393018679,
"runId": "qlik-sense-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(qlik-sense,qlik_sense_platform.QYUUb)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform)",
"urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform)"
},
{
"id": "urn:li:container:c2b8d174a817b41fbbd501fd4a84248f",
"urn": "urn:li:container:c2b8d174a817b41fbbd501fd4a84248f"
},
{
"id": "urn:li:container:784b26286a16989c6329d372ccc2f97e",
"urn": "urn:li:container:784b26286a16989c6329d372ccc2f97e"
}
]
}
},
"systemMetadata": {
"lastObserved": 1707393018680,
"runId": "qlik-sense-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#jokg8u7cvizvgxwrfsyxru0ykr2rl2wfd5djph9bj5q#rrg6-1cerbo4ews9o--qup3toxhm5molizgy6_wcxje,PROD)",
Expand Down Expand Up @@ -713,11 +758,24 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,google-cloud.harshal-playground-306419.test_dataset.test_table,DEV)",
"type": "COPY"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,google-cloud.harshal-playground-306419.test_dataset.test_table,DEV),name)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#jokg8u7cvizvgxwrfsyxru0ykr2rl2wfd5djph9bj5q#rrg6-1cerbo4ews9o--qup3toxhm5molizgy6_wcxje,PROD),name)"
],
"confidenceScore": 1.0
}
]
}
},
"systemMetadata": {
"lastObserved": 1707327546144,
"lastObserved": 1707393018687,
"runId": "qlik-sense-test",
"lastRunId": "no-run-id-provided"
}
Expand Down Expand Up @@ -912,14 +970,38 @@
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.659d0e41d1b0ecce6eebc9b1.IPL_Matches_2022.csv,PROD)",
"dataset": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD)",
"type": "COPY"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD),City)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#_s2ws5rvxaarzazlmghwjhngq8znjagxptal6jlzoaw#fcj-h2tvmayi--l6fn0vqgpthf8kb2rj7sj0_ysrhgc,PROD),City)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD),Date)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#_s2ws5rvxaarzazlmghwjhngq8znjagxptal6jlzoaw#fcj-h2tvmayi--l6fn0vqgpthf8kb2rj7sj0_ysrhgc,PROD),Date)"
],
"confidenceScore": 1.0
}
]
}
},
"systemMetadata": {
"lastObserved": 1707327546152,
"lastObserved": 1707393018696,
"runId": "qlik-sense-test",
"lastRunId": "no-run-id-provided"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,47 @@
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(qlik-sense,QYUUb)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"json": {
"container": "urn:li:container:43defedfcb41b246659c449a6f3ea8ac"
}
},
"systemMetadata": {
"lastObserved": 1707393018435,
"runId": "qlik-sense-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(qlik-sense,QYUUb)",
"changeType": "UPSERT",
"aspectName": "browsePathsV2",
"aspect": {
"json": {
"path": [
{
"id": "urn:li:container:88cf1accecf63ec7669dc1ec7cb28704",
"urn": "urn:li:container:88cf1accecf63ec7669dc1ec7cb28704"
},
{
"id": "urn:li:container:43defedfcb41b246659c449a6f3ea8ac",
"urn": "urn:li:container:43defedfcb41b246659c449a6f3ea8ac"
}
]
}
},
"systemMetadata": {
"lastObserved": 1707393018436,
"runId": "qlik-sense-test",
"lastRunId": "no-run-id-provided"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#jokg8u7cvizvgxwrfsyxru0ykr2rl2wfd5djph9bj5q#rrg6-1cerbo4ews9o--qup3toxhm5molizgy6_wcxje,PROD)",
Expand Down Expand Up @@ -655,11 +696,24 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_dataset.test_table,PROD)",
"type": "COPY"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_dataset.test_table,PROD),name)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#jokg8u7cvizvgxwrfsyxru0ykr2rl2wfd5djph9bj5q#rrg6-1cerbo4ews9o--qup3toxhm5molizgy6_wcxje,PROD),name)"
],
"confidenceScore": 1.0
}
]
}
},
"systemMetadata": {
"lastObserved": 1707327443034,
"lastObserved": 1707393018442,
"runId": "qlik-sense-test",
"lastRunId": "no-run-id-provided"
}
Expand Down Expand Up @@ -833,14 +887,38 @@
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"dataset": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,659d0e41d1b0ecce6eebc9b1.IPL_Matches_2022.csv,PROD)",
"dataset": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD)",
"type": "COPY"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD),City)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#_s2ws5rvxaarzazlmghwjhngq8znjagxptal6jlzoaw#fcj-h2tvmayi--l6fn0vqgpthf8kb2rj7sj0_ysrhgc,PROD),City)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD),Date)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#_s2ws5rvxaarzazlmghwjhngq8znjagxptal6jlzoaw#fcj-h2tvmayi--l6fn0vqgpthf8kb2rj7sj0_ysrhgc,PROD),Date)"
],
"confidenceScore": 1.0
}
]
}
},
"systemMetadata": {
"lastObserved": 1707327443043,
"lastObserved": 1707393018451,
"runId": "qlik-sense-test",
"lastRunId": "no-run-id-provided"
}
Expand Down

0 comments on commit 191f0ff

Please sign in to comment.