From 191f0ff9e8cfa60a7049ac134d0f65e055d486f2 Mon Sep 17 00:00:00 2001 From: shubhamjagtap639 Date: Thu, 8 Feb 2024 17:22:28 +0530 Subject: [PATCH] Add fineGrained Lineage for app used tables and datasets --- .../ingestion/source/qlik_sense/qlik_api.py | 8 +- .../ingestion/source/qlik_sense/qlik_sense.py | 37 ++++++-- .../golden_test_platform_instance_ingest.json | 88 ++++++++++++++++++- .../golden_test_qlik_sense_ingest.json | 84 +++++++++++++++++- 4 files changed, 202 insertions(+), 15 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py index d5d559144ef16..76a7d4ce60af3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py @@ -125,8 +125,9 @@ def _get_sheet( response = websocket_connection.websocket_send_request(method="GetLayout") sheet_dict = response[Constant.QLAYOUT] sheet = Sheet.parse_obj(sheet_dict[Constant.QMETA]) - i = 1 - for chart_dict in sheet_dict[Constant.QCHILDLIST][Constant.QITEMS]: + for i, chart_dict in enumerate( + sheet_dict[Constant.QCHILDLIST][Constant.QITEMS] + ): chart = self._get_chart( websocket_connection, chart_dict[Constant.QINFO][Constant.QID], @@ -134,8 +135,7 @@ def _get_sheet( ) if chart: if not chart.title: - chart.title = f"Object {i}" - i += 1 + chart.title = f"Object {i+1} of Sheet '{sheet.title}'" sheet.charts.append(chart) websocket_connection.handle.pop() return sheet diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py index 3cdc5c846dd79..83f564a13f819 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py +++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py @@ -61,6 +61,9 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageType, DatasetProperties, + FineGrainedLineage, + FineGrainedLineageDownstreamType, + FineGrainedLineageUpstreamType, Upstream, UpstreamLineage, ) @@ -244,7 +247,9 @@ def _gen_dashboard_info_workunit( entityUrn=dashboard_urn, aspect=dashboard_info_cls ).as_workunit() - def _gen_charts_workunit(self, charts: List[Chart]) -> Iterable[MetadataWorkUnit]: + def _gen_charts_workunit( + self, charts: List[Chart], app_id: str + ) -> Iterable[MetadataWorkUnit]: """ Map Qlik Chart to Datahub Chart """ @@ -272,6 +277,12 @@ def _gen_charts_workunit(self, charts: List[Chart]) -> Iterable[MetadataWorkUnit ), ).as_workunit() + yield from add_entity_to_container( + container_key=self._gen_app_key(app_id), + entity_type="chart", + entity_urn=chart_urn, + ) + def _gen_sheets_workunit( self, sheets: List[Sheet], app_id: str ) -> Iterable[MetadataWorkUnit]: @@ -299,7 +310,7 @@ def _gen_sheets_workunit( if self.config.ingest_owner and owner_username: yield self._gen_entity_owner_aspect(dashboard_urn, owner_username) - yield from self._gen_charts_workunit(sheet.charts) + yield from self._gen_charts_workunit(sheet.charts, app_id) def _gen_app_table_upstream_lineage( self, dataset_urn: str, table: QlikTable @@ -316,7 +327,7 @@ def _gen_app_table_upstream_lineage( ) ) upstream_dataset_urn = builder.make_dataset_urn_with_platform_instance( - name=f"{table.databaseName}.{table.schemaName}.{table.tableName}", + name=f"{table.databaseName}.{table.schemaName}.{table.tableName}".lower(), platform=KNOWN_DATA_PLATFORM_MAPPING.get( table.dataconnectorPlatform, table.dataconnectorPlatform ), @@ -325,9 +336,24 @@ def _gen_app_table_upstream_lineage( ) elif table.type == BoxType.LOADFILE: upstream_dataset_urn = self._gen_qlik_dataset_urn( - f"{table.spaceId}.{table.databaseName}" + f"{table.spaceId}.{table.databaseName}".lower() ) + if upstream_dataset_urn: + # Generate finegrained lineage + fine_grained_lineages = [ + FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + upstreams=[ + builder.make_schema_field_urn(upstream_dataset_urn, field.name) + ], + downstreamType=FineGrainedLineageDownstreamType.FIELD, + downstreams=[ + builder.make_schema_field_urn(dataset_urn, field.name) + ], + ) + for field in table.datasetSchema + ] return MetadataChangeProposalWrapper( entityUrn=dataset_urn, aspect=UpstreamLineage( @@ -335,7 +361,8 @@ def _gen_app_table_upstream_lineage( Upstream( dataset=upstream_dataset_urn, type=DatasetLineageType.COPY ) - ] + ], + fineGrainedLineages=fine_grained_lineages, ), ).as_workunit() else: diff --git a/metadata-ingestion/tests/integration/qlik_sense/golden_test_platform_instance_ingest.json b/metadata-ingestion/tests/integration/qlik_sense/golden_test_platform_instance_ingest.json index 7cfe225dddbd7..7558d08b9e3f5 100644 --- a/metadata-ingestion/tests/integration/qlik_sense/golden_test_platform_instance_ingest.json +++ b/metadata-ingestion/tests/integration/qlik_sense/golden_test_platform_instance_ingest.json @@ -561,6 +561,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(qlik-sense,qlik_sense_platform.QYUUb)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:784b26286a16989c6329d372ccc2f97e" + } + }, + "systemMetadata": { + "lastObserved": 1707393018679, + "runId": "qlik-sense-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(qlik-sense,qlik_sense_platform.QYUUb)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform)" + }, + { + "id": "urn:li:container:c2b8d174a817b41fbbd501fd4a84248f", + "urn": "urn:li:container:c2b8d174a817b41fbbd501fd4a84248f" + }, + { + "id": "urn:li:container:784b26286a16989c6329d372ccc2f97e", + "urn": "urn:li:container:784b26286a16989c6329d372ccc2f97e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1707393018680, + "runId": "qlik-sense-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#jokg8u7cvizvgxwrfsyxru0ykr2rl2wfd5djph9bj5q#rrg6-1cerbo4ews9o--qup3toxhm5molizgy6_wcxje,PROD)", @@ -713,11 +758,24 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,google-cloud.harshal-playground-306419.test_dataset.test_table,DEV)", "type": "COPY" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,google-cloud.harshal-playground-306419.test_dataset.test_table,DEV),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#jokg8u7cvizvgxwrfsyxru0ykr2rl2wfd5djph9bj5q#rrg6-1cerbo4ews9o--qup3toxhm5molizgy6_wcxje,PROD),name)" + ], + "confidenceScore": 1.0 + } ] } }, "systemMetadata": { - "lastObserved": 1707327546144, + "lastObserved": 1707393018687, "runId": "qlik-sense-test", "lastRunId": "no-run-id-provided" } @@ -912,14 +970,38 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.659d0e41d1b0ecce6eebc9b1.IPL_Matches_2022.csv,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD)", "type": "COPY" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD),City)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#_s2ws5rvxaarzazlmghwjhngq8znjagxptal6jlzoaw#fcj-h2tvmayi--l6fn0vqgpthf8kb2rj7sj0_ysrhgc,PROD),City)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD),Date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qlik_sense_platform.qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#_s2ws5rvxaarzazlmghwjhngq8znjagxptal6jlzoaw#fcj-h2tvmayi--l6fn0vqgpthf8kb2rj7sj0_ysrhgc,PROD),Date)" + ], + "confidenceScore": 1.0 + } ] } }, "systemMetadata": { - "lastObserved": 1707327546152, + "lastObserved": 1707393018696, "runId": "qlik-sense-test", "lastRunId": "no-run-id-provided" } diff --git a/metadata-ingestion/tests/integration/qlik_sense/golden_test_qlik_sense_ingest.json b/metadata-ingestion/tests/integration/qlik_sense/golden_test_qlik_sense_ingest.json index 32af5d2191de4..18542a1e4c4ef 100644 --- a/metadata-ingestion/tests/integration/qlik_sense/golden_test_qlik_sense_ingest.json +++ b/metadata-ingestion/tests/integration/qlik_sense/golden_test_qlik_sense_ingest.json @@ -520,6 +520,47 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(qlik-sense,QYUUb)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:43defedfcb41b246659c449a6f3ea8ac" + } + }, + "systemMetadata": { + "lastObserved": 1707393018435, + "runId": "qlik-sense-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(qlik-sense,QYUUb)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:88cf1accecf63ec7669dc1ec7cb28704", + "urn": "urn:li:container:88cf1accecf63ec7669dc1ec7cb28704" + }, + { + "id": "urn:li:container:43defedfcb41b246659c449a6f3ea8ac", + "urn": "urn:li:container:43defedfcb41b246659c449a6f3ea8ac" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1707393018436, + "runId": "qlik-sense-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#jokg8u7cvizvgxwrfsyxru0ykr2rl2wfd5djph9bj5q#rrg6-1cerbo4ews9o--qup3toxhm5molizgy6_wcxje,PROD)", @@ -655,11 +696,24 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_dataset.test_table,PROD)", "type": "COPY" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_dataset.test_table,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#jokg8u7cvizvgxwrfsyxru0ykr2rl2wfd5djph9bj5q#rrg6-1cerbo4ews9o--qup3toxhm5molizgy6_wcxje,PROD),name)" + ], + "confidenceScore": 1.0 + } ] } }, "systemMetadata": { - "lastObserved": 1707327443034, + "lastObserved": 1707393018442, "runId": "qlik-sense-test", "lastRunId": "no-run-id-provided" } @@ -833,14 +887,38 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,659d0e41d1b0ecce6eebc9b1.IPL_Matches_2022.csv,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:qlik-sense,659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD)", "type": "COPY" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD),City)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#_s2ws5rvxaarzazlmghwjhngq8znjagxptal6jlzoaw#fcj-h2tvmayi--l6fn0vqgpthf8kb2rj7sj0_ysrhgc,PROD),City)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,659d0e41d1b0ecce6eebc9b1.ipl_matches_2022.csv,PROD),Date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:qlik-sense,qri:qdf:space://ebw1euduywmui8p2bm7cor5oathzuyxvt0bircc2iru#_s2ws5rvxaarzazlmghwjhngq8znjagxptal6jlzoaw#fcj-h2tvmayi--l6fn0vqgpthf8kb2rj7sj0_ysrhgc,PROD),Date)" + ], + "confidenceScore": 1.0 + } ] } }, "systemMetadata": { - "lastObserved": 1707327443043, + "lastObserved": 1707393018451, "runId": "qlik-sense-test", "lastRunId": "no-run-id-provided" }