From 35d7770462e74cd95cf80b6df986fec715b7ce2c Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Fri, 20 Oct 2023 01:40:23 -0400 Subject: [PATCH 01/80] test(ingest/delta-lake): Fix integration tests (#9056) --- .../delta_lake_minio_mces_golden.json | 48 ++-- .../local/golden_mces_allow_table.json | 261 ++++++++++++------ .../local/golden_mces_inner_table.json | 261 ++++++++++++------ .../local/golden_mces_relative_path.json | 62 +++-- .../local/golden_mces_single_table.json | 102 ++++--- 5 files changed, 480 insertions(+), 254 deletions(-) diff --git a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json index 52e92d27549f0..ed65d74037796 100644 --- a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json +++ b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json @@ -136,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -156,7 +157,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -171,7 +173,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -186,7 +189,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -203,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -218,7 +223,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -238,7 +244,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -253,7 +260,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -268,7 +276,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -285,7 +294,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -300,7 +310,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -320,7 +331,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -335,7 +347,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -355,14 +368,16 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "version": "0" }, "lastUpdatedTimestamp": 1655664815399 } }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } }, { @@ -386,7 +401,8 @@ }, "systemMetadata": { "lastObserved": 1672531200000, - "runId": "delta-lake-test" + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json index 4dcdf71ce0095..6ec6eb2809a10 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json @@ -94,7 +94,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -115,7 +116,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -130,7 +132,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +167,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -183,7 +188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -204,7 +210,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -219,7 +226,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -235,7 +243,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -252,7 +261,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -267,7 +277,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -291,7 +302,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -312,7 +324,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -327,7 +340,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -343,7 +357,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -360,7 +375,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -375,7 +391,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -403,7 +420,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -424,7 +442,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -439,7 +458,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -455,7 +475,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +493,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +509,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +542,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -540,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -555,7 +580,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -571,7 +597,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -588,7 +615,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -603,7 +631,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -639,7 +668,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -654,7 +684,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -674,14 +705,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831476907 + "lastUpdatedTimestamp": 1655831477768 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -702,14 +736,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831477701 + "lastUpdatedTimestamp": 1655831477745 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -730,14 +766,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831477726 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -758,14 +796,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831477745 + "lastUpdatedTimestamp": 1655831477701 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -786,14 +826,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831477768 + "lastUpdatedTimestamp": 1655831476907 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -833,7 +874,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -973,7 +1015,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -988,7 +1031,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1008,14 +1052,16 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "version": "0" }, "lastUpdatedTimestamp": 1655664815399 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1055,7 +1101,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1199,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1167,7 +1215,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,14 +1236,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831649166 + "lastUpdatedTimestamp": 1655831649788 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1215,14 +1267,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831649715 + "lastUpdatedTimestamp": 1655831649754 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1243,14 +1297,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831649731 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1271,14 +1327,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831649754 + "lastUpdatedTimestamp": 1655831649715 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1299,14 +1357,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831649788 + "lastUpdatedTimestamp": 1655831649166 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1346,7 +1405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1444,7 +1504,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1465,7 +1526,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1480,7 +1542,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1496,7 +1559,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1513,7 +1577,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1528,7 +1593,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1568,7 +1634,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1583,7 +1650,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1603,14 +1671,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831865396 + "lastUpdatedTimestamp": 1655831866541 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1631,14 +1702,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831866337 + "lastUpdatedTimestamp": 1655831866447 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1659,14 +1732,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831866398 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1687,14 +1762,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831866447 + "lastUpdatedTimestamp": 1655831866337 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1715,14 +1792,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831866541 + "lastUpdatedTimestamp": 1655831865396 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1766,7 +1844,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "allow_table.json" + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json index 901e4c1262d3f..715beebfe22fb 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json @@ -94,7 +94,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +115,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -144,7 +147,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +165,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -176,7 +181,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -196,7 +202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -211,7 +218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -226,7 +234,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -243,7 +252,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -258,7 +268,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -278,7 +289,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -298,7 +310,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +326,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +342,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -345,7 +360,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -360,7 +376,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +401,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -404,7 +422,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -419,7 +438,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -434,7 +454,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +472,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -466,7 +488,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -494,7 +517,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -514,7 +538,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -529,7 +554,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -544,7 +570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -561,7 +588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -576,7 +604,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -608,7 +637,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -623,7 +653,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -643,14 +674,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831476907 + "lastUpdatedTimestamp": 1655831477768 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -671,14 +705,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831477701 + "lastUpdatedTimestamp": 1655831477745 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -699,14 +735,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831477726 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -727,14 +765,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831477745 + "lastUpdatedTimestamp": 1655831477701 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -755,14 +795,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831477768 + "lastUpdatedTimestamp": 1655831476907 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -798,7 +839,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -938,7 +980,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -953,7 +996,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -973,14 +1017,16 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "version": "0" }, "lastUpdatedTimestamp": 1655664815399 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1016,7 +1062,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1113,7 +1160,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1128,7 +1176,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1148,14 +1197,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831649166 + "lastUpdatedTimestamp": 1655831649788 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1176,14 +1228,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831649715 + "lastUpdatedTimestamp": 1655831649754 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1204,14 +1258,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831649731 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1232,14 +1288,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831649754 + "lastUpdatedTimestamp": 1655831649715 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1260,14 +1318,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831649788 + "lastUpdatedTimestamp": 1655831649166 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1303,7 +1362,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1401,7 +1461,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1421,7 +1482,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1436,7 +1498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1451,7 +1514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1468,7 +1532,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1483,7 +1548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1519,7 +1585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1534,7 +1601,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1554,14 +1622,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831865396 + "lastUpdatedTimestamp": 1655831866541 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1582,14 +1653,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831866337 + "lastUpdatedTimestamp": 1655831866447 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1610,14 +1683,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831866398 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1638,14 +1713,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831866447 + "lastUpdatedTimestamp": 1655831866337 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1666,14 +1743,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831866541 + "lastUpdatedTimestamp": 1655831865396 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -1713,7 +1791,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "inner_table.json" + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json index 18474e819334e..2076ec4096f68 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json @@ -94,7 +94,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +115,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -144,7 +147,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +165,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -176,7 +181,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -191,7 +197,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -211,14 +218,17 @@ "customProperties": { "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", - "isolationLevel": "Serializable" + "isolationLevel": "Serializable", + "readVersion": "3", + "version": "4" }, - "lastUpdatedTimestamp": 1655831476907 + "lastUpdatedTimestamp": 1655831477768 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -239,14 +249,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "0" + "readVersion": "2", + "version": "3" }, - "lastUpdatedTimestamp": 1655831477701 + "lastUpdatedTimestamp": 1655831477745 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -267,14 +279,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "1" + "readVersion": "1", + "version": "2" }, "lastUpdatedTimestamp": 1655831477726 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -295,14 +309,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "2" + "readVersion": "0", + "version": "1" }, - "lastUpdatedTimestamp": 1655831477745 + "lastUpdatedTimestamp": 1655831477701 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -323,14 +339,15 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "version": "0" }, - "lastUpdatedTimestamp": 1655831477768 + "lastUpdatedTimestamp": 1655831476907 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } }, { @@ -350,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "relative_path.json" + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json index bb47a077e878b..42e3b19612c2b 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json @@ -93,7 +93,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -113,7 +114,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -128,7 +130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -143,7 +146,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -160,7 +164,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -175,7 +180,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -195,7 +201,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +217,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +233,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -242,7 +251,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -257,7 +267,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -277,7 +288,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -297,7 +309,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -312,7 +325,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -327,7 +341,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -344,7 +359,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -359,7 +375,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -383,7 +400,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -403,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -418,7 +437,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -433,7 +453,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -450,7 +471,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -465,7 +487,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -493,7 +516,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -513,7 +537,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -528,7 +553,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -543,7 +569,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -560,7 +587,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +603,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -607,7 +636,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +652,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -643,14 +674,16 @@ "engineInfo": "local Delta-Standalone/0.4.0", "isBlindAppend": "True", "isolationLevel": "Serializable", - "readVersion": "3" + "readVersion": "3", + "version": "4" }, "lastUpdatedTimestamp": 1655831477768 } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } }, { @@ -686,7 +719,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "single_table.json" + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file From 3360fa222db42d587a4ae8fbc046628424f51373 Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Fri, 20 Oct 2023 16:25:06 -0400 Subject: [PATCH 02/80] test(): Ingestion source creation cypress test (#8850) --- .../app/ingest/source/IngestionSourceList.tsx | 7 +- .../source/IngestionSourceTableColumns.tsx | 12 +++- .../source/builder/CreateScheduleStep.tsx | 6 +- .../ingest/source/builder/NameSourceStep.tsx | 1 + .../ingest/source/builder/RecipeBuilder.tsx | 18 ++++- .../cypress/e2e/mutations/ingestion_source.js | 68 +++++++++++++++++++ 6 files changed, 105 insertions(+), 7 deletions(-) create mode 100644 smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx index 6c91a0f6f3f8f..0e341a5ff3a79 100644 --- a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx +++ b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx @@ -358,7 +358,12 @@ export const IngestionSourceList = () => {
- )} diff --git a/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx b/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx index dba9b25e14e99..7a14b6a794189 100644 --- a/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/CreateScheduleStep.tsx @@ -167,7 +167,11 @@ export const CreateScheduleStep = ({ state, updateState, goTo, prev }: StepProps
-
diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index 913f8253ece5a..992ebff643c31 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -123,6 +123,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps)
0)} onClick={() => onClickCreate(false)} > diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx index 4ddeb7b492595..bee9b04cee100 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx @@ -86,10 +86,20 @@ function RecipeBuilder(props: Props) { {sourceConfigs?.displayName} Recipe - switchViews(true)}> + switchViews(true)} + data-testid="recipe-builder-form-button" + > Form - switchViews(false)}> + switchViews(false)} + data-testid="recipe-builder-yaml-button" + > YAML @@ -114,7 +124,9 @@ function RecipeBuilder(props: Props) { - + )} diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js new file mode 100644 index 0000000000000..6c5dd77810644 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js @@ -0,0 +1,68 @@ + +const number = Math.floor(Math.random() * 100000); +const accound_id = `account${number}`; +const warehouse_id = `warehouse${number}`; +const username = `user${number}`; +const password = `password${number}`; +const role = `role${number}`; +const ingestion_source_name = `ingestion source ${number}`; + +describe("ingestion source creation flow", () => { + it("create a ingestion source using ui, verify ingestion source details saved correctly, remove ingestion source", () => { + // Go to ingestion page, create a snowflake source + cy.loginWithCredentials(); + cy.goToIngestionPage(); + cy.clickOptionWithTestId("create-ingestion-source-button"); + cy.clickOptionWithText("Snowflake"); + cy.waitTextVisible("Snowflake Recipe"); + cy.get("#account_id").type(accound_id); + cy.get("#warehouse").type(warehouse_id); + cy.get("#username").type(username); + cy.get("#password").type(password); + cy.focused().blur(); + cy.get("#role").type(role); + + // Verify yaml recipe is generated correctly + cy.clickOptionWithTestId("recipe-builder-yaml-button"); + cy.waitTextVisible("account_id"); + cy.waitTextVisible(accound_id); + cy.waitTextVisible(warehouse_id); + cy.waitTextVisible(username); + cy.waitTextVisible(password); + cy.waitTextVisible(role); + + // Finish creating source + cy.clickOptionWithTestId("recipe-builder-next-button"); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.clickOptionWithTestId("ingestion-schedule-next-button"); + cy.waitTextVisible("Give this ingestion source a name."); + cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); + cy.clickOptionWithTestId("ingestion-source-save-button"); + cy.waitTextVisible("Successfully created ingestion source!").wait(5000) + cy.waitTextVisible(ingestion_source_name); + cy.get('[data-testid="ingestion-source-table-status"]').contains("Pending...").should("be.visible"); + + // Verify ingestion source details are saved correctly + cy.get('[data-testid="ingestion-source-table-edit-button"]').first().click(); + cy.waitTextVisible("Edit Ingestion Source"); + cy.get("#account_id").should("have.value", accound_id); + cy.get("#warehouse").should("have.value", warehouse_id); + cy.get("#username").should("have.value", username); + cy.get("#password").should("have.value", password); + cy.get("#role").should("have.value", role); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.clickOptionWithTestId("ingestion-schedule-next-button"); + cy.get('[data-testid="source-name-input"]').clear().type(ingestion_source_name + " EDITED"); + cy.clickOptionWithTestId("ingestion-source-save-button"); + cy.waitTextVisible("Successfully updated ingestion source!"); + cy.waitTextVisible(ingestion_source_name + " EDITED"); + + // Remove ingestion source + cy.get('[data-testid="delete-button"]').first().click(); + cy.waitTextVisible("Confirm Ingestion Source Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed ingestion source."); + cy.ensureTextNotPresent(ingestion_source_name + " EDITED") + }) +}); \ No newline at end of file From 2fea466d48c856f5c469af6f611990a200e5bece Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 20 Oct 2023 13:47:52 -0700 Subject: [PATCH 03/80] docs: fix lineage capability annotations (#8954) --- .../src/datahub/ingestion/source/aws/glue.py | 1 + .../datahub/ingestion/source/bigquery_v2/bigquery.py | 1 + .../src/datahub/ingestion/source/kafka_connect.py | 1 + .../datahub/ingestion/source/looker/looker_source.py | 6 +++++- .../src/datahub/ingestion/source/metabase.py | 1 + .../src/datahub/ingestion/source/metadata/lineage.py | 10 +++++++++- .../src/datahub/ingestion/source/mode.py | 1 + .../src/datahub/ingestion/source/nifi.py | 4 +++- .../src/datahub/ingestion/source/powerbi/powerbi.py | 5 ++++- .../src/datahub/ingestion/source/sql_queries.py | 10 +++++++++- .../src/datahub/ingestion/source/superset.py | 1 + .../src/datahub/ingestion/source/tableau.py | 6 +++++- 12 files changed, 41 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index e5dff786b71d1..aa7e5aa352a3e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -221,6 +221,7 @@ def report_table_dropped(self, table: str) -> None: SourceCapability.DELETION_DETECTION, "Enabled by default when stateful ingestion is turned on.", ) +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") class GlueSource(StatefulIngestionSourceBase): """ Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub. diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 552612f877b9a..692d8c4f81bb6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -153,6 +153,7 @@ def cleanup(config: BigQueryV2Config) -> None: ) @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration") +@capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration") @capability( SourceCapability.USAGE_STATS, "Enabled by default, can be disabled via configuration `include_usage_statistics`", diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index 5fae0ee5215a3..1a1e012e80633 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -1096,6 +1096,7 @@ def transform_connector_config( @config_class(KafkaConnectSourceConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") class KafkaConnectSource(StatefulIngestionSourceBase): config: KafkaConnectSourceConfig report: KafkaConnectSourceReport diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 8297a0aa8efa7..a3df977582ca4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -103,6 +103,11 @@ @capability( SourceCapability.OWNERSHIP, "Enabled by default, configured using `extract_owners`" ) +@capability(SourceCapability.LINEAGE_COARSE, "Supported by default") +@capability( + SourceCapability.LINEAGE_FINE, + "Enabled by default, configured using `extract_column_level_lineage`", +) @capability( SourceCapability.USAGE_STATS, "Enabled by default, configured using `extract_usage_history`", @@ -1128,7 +1133,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def emit_independent_looks_mcp( self, dashboard_element: LookerDashboardElement ) -> Iterable[MetadataWorkUnit]: - yield from auto_workunit( stream=self._make_chart_metadata_events( dashboard_element=dashboard_element, diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index fb4512893feb1..24145d60210ff 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -80,6 +80,7 @@ def remove_trailing_slash(cls, v): @config_class(MetabaseConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.LINEAGE_COARSE, "Supported by default") class MetabaseSource(Source): """ This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py index 1c0c809c16a60..f33c6e0edae3d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py @@ -23,11 +23,17 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, + capability, config_class, platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport +from datahub.ingestion.api.source import ( + MetadataWorkUnitProcessor, + Source, + SourceCapability, + SourceReport, +) from datahub.ingestion.api.source_helpers import ( auto_status_aspect, auto_workunit_reporter, @@ -121,6 +127,8 @@ def version_must_be_1(cls, v): @platform_name("File Based Lineage") @config_class(LineageFileSourceConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.LINEAGE_COARSE, "Specified in the lineage file.") +@capability(SourceCapability.LINEAGE_FINE, "Specified in the lineage file.") @dataclass class LineageFileSource(Source): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index a000c66a406c2..c46b56da422d9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -98,6 +98,7 @@ class HTTPError429(HTTPError): @config_class(ModeConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.LINEAGE_COARSE, "Supported by default") class ModeSource(Source): """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index ac1e03812db3b..bc05edbb3c623 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -26,11 +26,12 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, + capability, config_class, platform_name, support_status, ) -from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.source import Source, SourceCapability, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.schema_classes import ( DataFlowInfoClass, @@ -360,6 +361,7 @@ def report_dropped(self, ent_name: str) -> None: @platform_name("NiFi", id="nifi") @config_class(NifiSourceConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.LINEAGE_COARSE, "Supported. See docs for limitations") class NifiSource(Source): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 52bcef66658c8..4611a8eed4782 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -264,7 +264,6 @@ def extract_lineage( ) if len(upstream) > 0: - upstream_lineage_class: UpstreamLineageClass = UpstreamLineageClass( upstreams=upstream, fineGrainedLineages=cll_lineage or None, @@ -1139,6 +1138,10 @@ def report_to_datahub_work_units( SourceCapability.OWNERSHIP, "Disabled by default, configured using `extract_ownership`", ) +@capability( + SourceCapability.LINEAGE_COARSE, + "Enabled by default, configured using `extract_lineage`.", +) @capability( SourceCapability.LINEAGE_FINE, "Disabled by default, configured using `extract_column_level_lineage`. ", diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py index bce4d1ec76e6e..fcf97e461967c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py @@ -20,11 +20,17 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, + capability, config_class, platform_name, support_status, ) -from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport +from datahub.ingestion.api.source import ( + MetadataWorkUnitProcessor, + Source, + SourceCapability, + SourceReport, +) from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.graph.client import DataHubGraph @@ -83,6 +89,8 @@ def compute_stats(self) -> None: @platform_name("SQL Queries") @config_class(SqlQueriesSourceConfig) @support_status(SupportStatus.TESTING) +@capability(SourceCapability.LINEAGE_COARSE, "Parsed from SQL queries") +@capability(SourceCapability.LINEAGE_FINE, "Parsed from SQL queries") class SqlQueriesSource(Source): # TODO: Documentation urns: Optional[Set[str]] diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 14bc4242d2a91..e491a1e8b82fa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -142,6 +142,7 @@ def get_filter_name(filter_obj): @capability( SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion" ) +@capability(SourceCapability.LINEAGE_COARSE, "Supported by default") class SupersetSource(StatefulIngestionSourceBase): """ This plugin extracts the following: diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index bad7ae49d325e..4bc40b0aac964 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -452,6 +452,10 @@ class TableauSourceReport(StaleEntityRemovalSourceReport): @capability(SourceCapability.OWNERSHIP, "Requires recipe configuration") @capability(SourceCapability.TAGS, "Requires recipe configuration") @capability(SourceCapability.LINEAGE_COARSE, "Enabled by default") +@capability( + SourceCapability.LINEAGE_FINE, + "Enabled by default, configure using `extract_column_level_lineage`", +) class TableauSource(StatefulIngestionSourceBase): platform = "tableau" @@ -533,7 +537,7 @@ def fetch_projects(): path=[], ) # Set parent project name - for project_id, project in all_project_map.items(): + for _project_id, project in all_project_map.items(): if ( project.parent_id is not None and project.parent_id in all_project_map From 4d35a254cabb3a6241af8857c7d63298783ebaa7 Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Fri, 20 Oct 2023 17:09:14 -0400 Subject: [PATCH 04/80] =?UTF-8?q?Added=20more=20data-testid=20usage=20for?= =?UTF-8?q?=20edit=5Fdocumentation=20and=20managing=5Fsecr=E2=80=A6=20(#90?= =?UTF-8?q?60)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../components/legacy/DescriptionModal.tsx | 6 ++- .../shared/components/styled/AddLinkModal.tsx | 6 ++- .../tabs/Documentation/DocumentationTab.tsx | 1 + .../components/DescriptionEditorToolbar.tsx | 2 +- .../app/ingest/secret/SecretBuilderModal.tsx | 4 ++ .../src/app/ingest/secret/SecretsList.tsx | 6 ++- .../e2e/glossary/glossary_navigation.js | 3 +- .../e2e/mutations/edit_documentation.js | 42 +++++++-------- .../cypress/e2e/mutations/managing_secrets.js | 51 ++++++++++--------- 9 files changed, 70 insertions(+), 51 deletions(-) diff --git a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx index 579b8c9905da0..cb37c44a36caa 100644 --- a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx +++ b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx @@ -41,7 +41,11 @@ export default function UpdateDescriptionModal({ title, description, original, o footer={ <> - diff --git a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx index 34d4f0cb3fe91..68a8cf4094362 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx @@ -57,7 +57,7 @@ export const AddLinkModal = ({ buttonProps, refetch }: AddLinkProps) => { return ( <> - { , - , ]} >
{ {
- diff --git a/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx b/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx index 539eef972608c..30f04d61b8fc9 100644 --- a/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx +++ b/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx @@ -40,6 +40,7 @@ export const SecretBuilderModal = ({ initialState, visible, onSubmit, onCancel } Cancel
diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js index f52e4d3984a88..aeceaf99be889 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js @@ -17,8 +17,7 @@ describe("glossary sidebar navigation test", () => { cy.waitTextVisible("Created Term Group!"); cy.waitTextVisible("Create Glossary Term"); cy.enterTextInTestId("create-glossary-entity-modal-name", glossaryTerm); - cy.clickOptionWithTestId("glossary-entity-modal-create-button"); - cy.waitTextVisible("Created Glossary Term!"); + cy.clickOptionWithTestId("glossary-entity-modal-create-button").wait(3000); cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTerm).click().wait(3000); cy.openThreeDotDropdown(); cy.clickOptionWithTestId("entity-menu-move-button") diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js index 83b66e2cb2549..5f9758a35ca0e 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js @@ -10,20 +10,20 @@ describe("edit documentation and link to dataset", () => { cy.visit( "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema" ); - cy.get("[role='tab']").contains("Documentation").click(); + cy.openEntityTab("Documentation"); cy.waitTextVisible("my hive dataset"); cy.waitTextVisible("Sample doc"); - cy.clickOptionWithText("Edit"); + cy.clickOptionWithTestId("edit-documentation-button"); cy.focused().clear(); cy.focused().type(documentation_edited); - cy.get("button").contains("Save").click(); + cy.clickOptionWithTestId("description-editor-save-button"); cy.waitTextVisible("Description Updated"); cy.waitTextVisible(documentation_edited); //return documentation to original state - cy.clickOptionWithText("Edit"); + cy.clickOptionWithTestId("edit-documentation-button"); cy.focused().clear().wait(1000); cy.focused().type("my hive dataset"); - cy.get("button").contains("Save").click(); + cy.clickOptionWithTestId("description-editor-save-button"); cy.waitTextVisible("Description Updated"); cy.waitTextVisible("my hive dataset"); }); @@ -33,21 +33,21 @@ describe("edit documentation and link to dataset", () => { cy.visit( "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema" ); - cy.get("[role='tab']").contains("Documentation").click(); + cy.openEntityTab("Documentation"); cy.contains("Sample doc").trigger("mouseover", { force: true }); cy.get('[data-icon="delete"]').click(); cy.waitTextVisible("Link Removed"); - cy.get("button").contains("Add Link").click().wait(1000); - cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url); + cy.clickOptionWithTestId("add-link-button").wait(1000); + cy.enterTextInTestId("add-link-modal-url", wrong_url); cy.waitTextVisible("This field must be a valid url."); cy.focused().clear(); cy.waitTextVisible("A URL is required."); - cy.focused().type(correct_url); + cy.enterTextInTestId("add-link-modal-url", correct_url); cy.ensureTextNotPresent("This field must be a valid url."); - cy.get("#addLinkForm_label").type("Sample doc"); - cy.get('[role="dialog"] button').contains("Add").click(); + cy.enterTextInTestId("add-link-modal-label", "Sample doc"); + cy.clickOptionWithTestId("add-link-modal-add-button"); cy.waitTextVisible("Link Added"); - cy.get("[role='tab']").contains("Documentation").click(); + cy.openEntityTab("Documentation"); cy.get(`[href='${correct_url}']`).should("be.visible"); }); @@ -55,18 +55,18 @@ describe("edit documentation and link to dataset", () => { cy.loginWithCredentials(); cy.visit("/domain/urn:li:domain:marketing/Entities"); cy.waitTextVisible("SampleCypressKafkaDataset"); - cy.get("button").contains("Add Link").click().wait(1000); - cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url); + cy.clickOptionWithTestId("add-link-button").wait(1000); + cy.enterTextInTestId("add-link-modal-url", wrong_url); cy.waitTextVisible("This field must be a valid url."); cy.focused().clear(); cy.waitTextVisible("A URL is required."); - cy.focused().type(correct_url); + cy.enterTextInTestId("add-link-modal-url", correct_url); cy.ensureTextNotPresent("This field must be a valid url."); - cy.get("#addLinkForm_label").type("Sample doc"); - cy.get('[role="dialog"] button').contains("Add").click(); + cy.enterTextInTestId("add-link-modal-label", "Sample doc"); + cy.clickOptionWithTestId("add-link-modal-add-button"); cy.waitTextVisible("Link Added"); - cy.get("[role='tab']").contains("Documentation").click(); - cy.waitTextVisible("Edit"); + cy.openEntityTab("Documentation"); + cy.get("[data-testid='edit-documentation-button']").should("be.visible"); cy.get(`[href='${correct_url}']`).should("be.visible"); cy.contains("Sample doc").trigger("mouseover", { force: true }); cy.get('[data-icon="delete"]').click(); @@ -83,14 +83,14 @@ describe("edit documentation and link to dataset", () => { cy.waitTextVisible("Foo field description has changed"); cy.focused().clear().wait(1000); cy.focused().type(documentation_edited); - cy.get("button").contains("Update").click(); + cy.clickOptionWithTestId("description-modal-update-button"); cy.waitTextVisible("Updated!"); cy.waitTextVisible(documentation_edited); cy.waitTextVisible("(edited)"); cy.get("tbody [data-icon='edit']").first().click({ force: true }); cy.focused().clear().wait(1000); cy.focused().type("Foo field description has changed"); - cy.get("button").contains("Update").click(); + cy.clickOptionWithTestId("description-modal-update-button"); cy.waitTextVisible("Updated!"); cy.waitTextVisible("Foo field description has changed"); cy.waitTextVisible("(edited)"); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js index 466bb2ef0757e..77fd63b9cae02 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js @@ -8,23 +8,24 @@ const ingestion_source_name = `ingestion source ${number}`; describe("managing secrets for ingestion creation", () => { it("create a secret, create ingestion source using a secret, remove a secret", () => { + // Navigate to the manage ingestion page → secrets cy.loginWithCredentials(); - //navigate to the manage ingestion page → secrets cy.goToIngestionPage(); - cy.clickOptionWithText("Secrets"); - //create a new secret - cy.clickOptionWithText("Create new secret"); - cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible"); - cy.get('[role="dialog"] #name').type(`secretname${number}`); - cy.get('[role="dialog"] #value').type(`secretvalue${number}`); - cy.get('[role="dialog"] #description').type(`secretdescription${number}`); - cy.get('#createSecretButton').click(); + cy.openEntityTab("Secrets"); + + // Create a new secret + cy.clickOptionWithTestId("create-secret-button"); + cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`); + cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`); + cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`); + cy.clickOptionWithTestId("secret-modal-create-button"); cy.waitTextVisible("Successfully created Secret!"); cy.waitTextVisible(`secretname${number}`); - cy.waitTextVisible(`secretdescription${number}`).wait(5000)//prevent issue with missing secret - //create an ingestion source using a secret + cy.waitTextVisible(`secretdescription${number}`).wait(5000) + + // Create an ingestion source using a secret cy.goToIngestionPage(); - cy.clickOptionWithText("Create new source"); + cy.get("#ingestion-create-source").click(); cy.clickOptionWithText("Snowflake"); cy.waitTextVisible("Snowflake Recipe"); cy.get("#account_id").type(accound_id); @@ -40,11 +41,12 @@ describe("managing secrets for ingestion creation", () => { cy.waitTextVisible("Give this ingestion source a name."); cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); cy.get("button").contains("Save").click(); - cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data + cy.waitTextVisible("Successfully created ingestion source!").wait(5000) cy.waitTextVisible(ingestion_source_name); cy.get("button").contains("Pending...").should("be.visible"); - //remove a secret - cy.clickOptionWithText("Secrets"); + + // Remove a secret + cy.openEntityTab("Secrets"); cy.waitTextVisible(`secretname${number}`); cy.get('[data-icon="delete"]').first().click(); cy.waitTextVisible("Confirm Secret Removal"); @@ -52,14 +54,16 @@ describe("managing secrets for ingestion creation", () => { cy.waitTextVisible("Removed secret."); cy.ensureTextNotPresent(`secretname${number}`); cy.ensureTextNotPresent(`secretdescription${number}`); - //remove ingestion source + + // Remove ingestion source cy.goToIngestionPage(); cy.get('[data-testid="delete-button"]').first().click(); cy.waitTextVisible("Confirm Ingestion Source Removal"); cy.get("button").contains("Yes").click(); cy.waitTextVisible("Removed ingestion source."); cy.ensureTextNotPresent(ingestion_source_name) - //verify secret is not present during ingestion source creation for password dropdown + + // Verify secret is not present during ingestion source creation for password dropdown cy.clickOptionWithText("Create new source"); cy.clickOptionWithText("Snowflake"); cy.waitTextVisible("Snowflake Recipe"); @@ -68,13 +72,13 @@ describe("managing secrets for ingestion creation", () => { cy.get("#username").type(username); cy.get("#password").click().wait(1000); cy.ensureTextNotPresent(`secretname${number}`); - //verify secret can be added during ingestion source creation and used successfully + + // Verify secret can be added during ingestion source creation and used successfully cy.clickOptionWithText("Create Secret"); - cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible"); - cy.get('[role="dialog"] #name').type(`secretname${number}`); - cy.get('[role="dialog"] #value').type(`secretvalue${number}`); - cy.get('[role="dialog"] #description').type(`secretdescription${number}`); - cy.get('#createSecretButton').click(); + cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`) + cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`) + cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`) + cy.clickOptionWithTestId("secret-modal-create-button"); cy.waitTextVisible("Created secret!"); cy.get("#role").type(role); cy.get("button").contains("Next").click(); @@ -86,6 +90,7 @@ describe("managing secrets for ingestion creation", () => { cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data cy.waitTextVisible(ingestion_source_name); cy.get("button").contains("Pending...").should("be.visible"); + //Remove ingestion source and secret cy.goToIngestionPage(); cy.get('[data-testid="delete-button"]').first().click(); From 63599c95553b89304b656efb2c208c9084d60717 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Sat, 21 Oct 2023 03:17:28 -0700 Subject: [PATCH 05/80] fix(search): fix mapping builder bug (#9062) --- .../search/elasticsearch/indexbuilder/MappingsBuilder.java | 2 +- .../timeseries/search/TimeseriesAspectServiceTestBase.java | 6 ++++-- .../io/datahubproject/test/search/SearchTestContainer.java | 2 +- smoke-test/tests/containers/containers_test.py | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java index 1edc77bbd214c..35cef71edd953 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java @@ -133,7 +133,7 @@ private static Map getMappingsForField(@Nonnull final Searchable } else if (fieldType == FieldType.DATETIME) { mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE); } else if (fieldType == FieldType.OBJECT) { - mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE); + mappingForField.put(TYPE, ESUtils.OBJECT_FIELD_TYPE); } else { log.info("FieldType {} has no mappings implemented", fieldType); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index f9b8f84b10ad2..b19d2026fbfc4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -889,15 +889,17 @@ public void testCountByFilterAfterDelete() throws InterruptedException { @Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) public void testGetIndexSizes() { List result = _elasticSearchTimeseriesAspectService.getIndexSizes(); + //CHECKSTYLE:OFF /* Example result: {aspectName=testentityprofile, sizeMb=52.234, indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity} {aspectName=testentityprofile, sizeMb=0.208, indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests} */ // There may be other indices in there from other tests, so just make sure that index for entity + aspect is in there - assertTrue(result.size() > 1); + //CHECKSTYLE:ON + assertTrue(result.size() > 0); assertTrue( result.stream().anyMatch(idxSizeResult -> idxSizeResult.getIndexName().equals( - "es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1"))); + "es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1"))); } } diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java index 67e1ee368f513..4c1555fc510e6 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java @@ -5,7 +5,7 @@ import java.time.Duration; public interface SearchTestContainer { - String SEARCH_JAVA_OPTS = "-Xms64m -Xmx384m -XX:MaxDirectMemorySize=368435456"; + String SEARCH_JAVA_OPTS = "-Xms446m -Xmx446m -XX:MaxDirectMemorySize=368435456"; Duration STARTUP_TIMEOUT = Duration.ofMinutes(5); // usually < 1min GenericContainer startContainer(); diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py index 05a45239dabf8..227645a87d30a 100644 --- a/smoke-test/tests/containers/containers_test.py +++ b/smoke-test/tests/containers/containers_test.py @@ -227,6 +227,7 @@ def test_update_container(frontend_session, ingest_cleanup_data): "ownerUrn": new_owner, "resourceUrn": container_urn, "ownerEntityType": "CORP_USER", + "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner" } }, } From 86e0023a4e158467130f7337478a48bf98fb344b Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Sat, 21 Oct 2023 16:20:59 +0100 Subject: [PATCH 06/80] feat(ingestion): Adds more advanced configurations for runtime debugging (#8998) --- .../ingest/IngestionResolverUtils.java | 10 ++ ...eateIngestionExecutionRequestResolver.java | 3 + .../source/UpsertIngestionSourceResolver.java | 10 ++ .../src/main/resources/ingestion.graphql | 10 ++ .../UpsertIngestionSourceResolverTest.java | 2 +- .../app/ingest/source/IngestionSourceList.tsx | 8 +- .../ingest/source/builder/NameSourceStep.tsx | 123 +++++++++++++++++- .../src/app/ingest/source/builder/types.ts | 17 +++ .../src/graphql/ingestion.graphql | 8 ++ docker/build.gradle | 12 +- docs/ui-ingestion.md | 20 ++- .../docs/dev_guides/profiling_ingestions.md | 39 ++++++ .../TimeseriesAspectServiceTestBase.java | 6 +- .../test/search/SearchTestContainer.java | 2 + .../ingestion/DataHubIngestionSourceInfo.pdl | 13 +- 15 files changed, 267 insertions(+), 16 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java index 7db0b6f826a04..1140c031f1d35 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java @@ -5,6 +5,7 @@ import com.linkedin.datahub.graphql.generated.IngestionConfig; import com.linkedin.datahub.graphql.generated.IngestionSchedule; import com.linkedin.datahub.graphql.generated.IngestionSource; +import com.linkedin.datahub.graphql.generated.StringMapEntry; import com.linkedin.datahub.graphql.generated.StructuredReport; import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper; import com.linkedin.entity.EntityResponse; @@ -21,6 +22,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; @@ -143,6 +145,14 @@ public static IngestionConfig mapIngestionSourceConfig(final DataHubIngestionSou result.setVersion(config.getVersion()); result.setExecutorId(config.getExecutorId()); result.setDebugMode(config.isDebugMode()); + if (config.getExtraArgs() != null) { + List extraArgs = config.getExtraArgs() + .keySet() + .stream() + .map(key -> new StringMapEntry(key, config.getExtraArgs().get(key))) + .collect(Collectors.toList()); + result.setExtraArgs(extraArgs); + } return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java index e5064e6620526..ea20b837e0a1f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java @@ -117,6 +117,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) if (ingestionSourceInfo.getConfig().hasDebugMode()) { debugMode = ingestionSourceInfo.getConfig().isDebugMode() ? "true" : "false"; } + if (ingestionSourceInfo.getConfig().hasExtraArgs()) { + arguments.putAll(ingestionSourceInfo.getConfig().getExtraArgs()); + } arguments.put(DEBUG_MODE_ARG_NAME, debugMode); execInput.setArgs(new StringMap(arguments)); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java index 2ce394ad5ba84..68e334bd976f8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java @@ -1,10 +1,12 @@ package com.linkedin.datahub.graphql.resolvers.ingest.source; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; +import com.linkedin.datahub.graphql.generated.StringMapEntryInput; import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceConfigInput; import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceInput; import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceScheduleInput; @@ -17,6 +19,8 @@ import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import java.net.URISyntaxException; @@ -108,6 +112,12 @@ private DataHubIngestionSourceConfig mapConfig(final UpdateIngestionSourceConfig if (input.getDebugMode() != null) { result.setDebugMode(input.getDebugMode()); } + if (input.getExtraArgs() != null) { + Map extraArgs = input.getExtraArgs() + .stream() + .collect(Collectors.toMap(StringMapEntryInput::getKey, StringMapEntryInput::getValue)); + result.setExtraArgs(new StringMap(extraArgs)); + } return result; } diff --git a/datahub-graphql-core/src/main/resources/ingestion.graphql b/datahub-graphql-core/src/main/resources/ingestion.graphql index 69c8aff124583..21f9fb2633119 100644 --- a/datahub-graphql-core/src/main/resources/ingestion.graphql +++ b/datahub-graphql-core/src/main/resources/ingestion.graphql @@ -332,6 +332,11 @@ type IngestionConfig { Advanced: Whether or not to run ingestion in debug mode """ debugMode: Boolean + + """ + Advanced: Extra arguments for the ingestion run. + """ + extraArgs: [StringMapEntry!] } """ @@ -483,6 +488,11 @@ input UpdateIngestionSourceConfigInput { Whether or not to run ingestion in debug mode """ debugMode: Boolean + + """ + Extra arguments for the ingestion run. + """ + extraArgs: [StringMapEntryInput!] } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java index 2538accc694fb..16d8da9169a8f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java @@ -26,7 +26,7 @@ public class UpsertIngestionSourceResolverTest { "Test source", "mysql", "Test source description", new UpdateIngestionSourceScheduleInput("* * * * *", "UTC"), - new UpdateIngestionSourceConfigInput("my test recipe", "0.8.18", "executor id", false) + new UpdateIngestionSourceConfigInput("my test recipe", "0.8.18", "executor id", false, null) ); @Test diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx index 0e341a5ff3a79..13af19b0b6ac2 100644 --- a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx +++ b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx @@ -15,7 +15,7 @@ import { Message } from '../../shared/Message'; import TabToolbar from '../../entity/shared/components/styled/TabToolbar'; import { IngestionSourceBuilderModal } from './builder/IngestionSourceBuilderModal'; import { addToListIngestionSourcesCache, CLI_EXECUTOR_ID, removeFromListIngestionSourcesCache } from './utils'; -import { DEFAULT_EXECUTOR_ID, SourceBuilderState } from './builder/types'; +import { DEFAULT_EXECUTOR_ID, SourceBuilderState, StringMapEntryInput } from './builder/types'; import { IngestionSource, UpdateIngestionSourceInput } from '../../../types.generated'; import { SearchBar } from '../../search/SearchBar'; import { useEntityRegistry } from '../../useEntityRegistry'; @@ -173,6 +173,11 @@ export const IngestionSourceList = () => { setFocusSourceUrn(undefined); }; + const formatExtraArgs = (extraArgs): StringMapEntryInput[] => { + if (extraArgs === null || extraArgs === undefined) return []; + return extraArgs.map((entry) => ({ key: entry.key, value: entry.value })); + }; + const createOrUpdateIngestionSource = ( input: UpdateIngestionSourceInput, resetState: () => void, @@ -294,6 +299,7 @@ export const IngestionSourceList = () => { (recipeBuilderState.config?.executorId as string)) || DEFAULT_EXECUTOR_ID, debugMode: recipeBuilderState.config?.debugMode || false, + extraArgs: formatExtraArgs(recipeBuilderState.config?.extraArgs || []), }, schedule: recipeBuilderState.schedule && { interval: recipeBuilderState.schedule?.interval as string, diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index 992ebff643c31..f4c048bcaf0d2 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -1,7 +1,7 @@ import { Button, Checkbox, Collapse, Form, Input, Typography } from 'antd'; import React from 'react'; import styled from 'styled-components'; -import { SourceBuilderState, StepProps } from './types'; +import { SourceBuilderState, StepProps, StringMapEntryInput } from './types'; const ControlsContainer = styled.div` display: flex; @@ -13,6 +13,10 @@ const SaveButton = styled(Button)` margin-right: 15px; `; +const ExtraEnvKey = 'extra_env_vars'; +const ExtraReqKey = 'extra_pip_requirements'; +const ExtraPluginKey = 'extra_pip_plugins'; + export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) => { const setName = (stagedName: string) => { const newState: SourceBuilderState = { @@ -55,6 +59,90 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) updateState(newState); }; + const retrieveExtraEnvs = () => { + const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const index: number = extraArgs.findIndex((entry) => entry.key === ExtraEnvKey) as number; + if (index > -1) { + return extraArgs[index].value; + } + return ''; + }; + + const setExtraEnvs = (envs: string) => { + let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const indxOfEnvVars: number = extraArgs.findIndex((entry) => entry.key === ExtraEnvKey) as number; + const value = { key: ExtraEnvKey, value: envs }; + if (indxOfEnvVars > -1) { + extraArgs[indxOfEnvVars] = value; + } else { + extraArgs = [...extraArgs, value]; + } + const newState: SourceBuilderState = { + ...state, + config: { + ...state.config, + extraArgs, + }, + }; + updateState(newState); + }; + + const retrieveExtraDataHubPlugins = () => { + const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const index: number = extraArgs.findIndex((entry) => entry.key === ExtraPluginKey) as number; + if (index > -1) { + return extraArgs[index].value; + } + return ''; + }; + + const setExtraDataHubPlugins = (plugins: string) => { + let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const indxOfPlugins: number = extraArgs.findIndex((entry) => entry.key === ExtraPluginKey) as number; + const value = { key: ExtraPluginKey, value: plugins }; + if (indxOfPlugins > -1) { + extraArgs[indxOfPlugins] = value; + } else { + extraArgs = [...extraArgs, value]; + } + const newState: SourceBuilderState = { + ...state, + config: { + ...state.config, + extraArgs, + }, + }; + updateState(newState); + }; + + const retrieveExtraReqs = () => { + const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const index: number = extraArgs.findIndex((entry) => entry.key === ExtraReqKey) as number; + if (index > -1) { + return extraArgs[index].value; + } + return ''; + }; + + const setExtraReqs = (reqs: string) => { + let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : []; + const indxOfReqs: number = extraArgs.findIndex((entry) => entry.key === ExtraReqKey) as number; + const value = { key: ExtraReqKey, value: reqs }; + if (indxOfReqs > -1) { + extraArgs[indxOfReqs] = value; + } else { + extraArgs = [...extraArgs, value]; + } + const newState: SourceBuilderState = { + ...state, + config: { + ...state.config, + extraArgs, + }, + }; + updateState(newState); + }; + const onClickCreate = (shouldRun?: boolean) => { if (state.name !== undefined && state.name.length > 0) { submit(shouldRun); @@ -116,6 +204,39 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) onChange={(event) => setDebugMode(event.target.checked)} />
+ Extra Enviroment Variables}> + + Advanced: Set extra environment variables to an ingestion execution + + setExtraEnvs(event.target.value)} + /> + + Extra DataHub plugins}> + + Advanced: Set extra DataHub plugins for an ingestion execution + + setExtraDataHubPlugins(event.target.value)} + /> + + Extra Pip Libraries}> + + Advanced: Add extra pip libraries for an ingestion execution + + setExtraReqs(event.target.value)} + /> +
diff --git a/datahub-web-react/src/app/ingest/source/builder/types.ts b/datahub-web-react/src/app/ingest/source/builder/types.ts index cfe0f27ae7dbe..2df467b7beba1 100644 --- a/datahub-web-react/src/app/ingest/source/builder/types.ts +++ b/datahub-web-react/src/app/ingest/source/builder/types.ts @@ -34,6 +34,18 @@ export type StepProps = { ingestionSources: SourceConfig[]; }; +export type StringMapEntryInput = { + /** + * The key of the map entry + */ + key: string; + + /** + * The value fo the map entry + */ + value: string; +}; + /** * The object represents the state of the Ingestion Source Builder form. */ @@ -91,5 +103,10 @@ export interface SourceBuilderState { * Advanced: Whether or not to run this ingestion source in debug mode */ debugMode?: boolean | null; + + /** + * Advanced: Extra arguments for the ingestion run. + */ + extraArgs?: StringMapEntryInput[] | null; }; } diff --git a/datahub-web-react/src/graphql/ingestion.graphql b/datahub-web-react/src/graphql/ingestion.graphql index c127e9ec03f9a..1767fe34bfef0 100644 --- a/datahub-web-react/src/graphql/ingestion.graphql +++ b/datahub-web-react/src/graphql/ingestion.graphql @@ -12,6 +12,10 @@ query listIngestionSources($input: ListIngestionSourcesInput!) { version executorId debugMode + extraArgs { + key + value + } } schedule { interval @@ -51,6 +55,10 @@ query getIngestionSource($urn: String!, $runStart: Int, $runCount: Int) { version executorId debugMode + extraArgs { + key + value + } } schedule { interval diff --git a/docker/build.gradle b/docker/build.gradle index c8fdbc86b18b7..56634a5fe0c67 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -97,10 +97,20 @@ task quickstartDebug(type: Exec, dependsOn: ':metadata-ingestion:install') { dependsOn(debug_modules.collect { it + ':dockerTagDebug' }) shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke' - environment "DATAHUB_PRECREATE_TOPICS", "true" environment "DATAHUB_TELEMETRY_ENABLED", "false" environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}" + // Elastic + // environment "DATAHUB_SEARCH_IMAGE", 'elasticsearch' + // environment "DATAHUB_SEARCH_TAG", '7.10.1' + + // OpenSearch + environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch' + environment "DATAHUB_SEARCH_TAG", '2.9.0' + environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true' + environment "USE_AWS_ELASTICSEARCH", 'true' + + def cmd = [ 'source ../metadata-ingestion/venv/bin/activate && ', 'datahub docker quickstart', diff --git a/docs/ui-ingestion.md b/docs/ui-ingestion.md index db2007e1e19a9..438ddd8823b7e 100644 --- a/docs/ui-ingestion.md +++ b/docs/ui-ingestion.md @@ -1,5 +1,12 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + # Ingestion + + ## Introduction Starting in version `0.8.25`, DataHub supports creating, configuring, scheduling, & executing batch metadata ingestion using the DataHub user interface. This makes @@ -173,28 +180,29 @@ Finally, give your Ingestion Source a name. Once you're happy with your configurations, click 'Done' to save your changes. -##### Advanced: Running with a specific CLI version +##### Advanced ingestion configs: -DataHub comes pre-configured to use the latest version of the DataHub CLI ([acryl-datahub](https://pypi.org/project/acryl-datahub/)) that is compatible +DataHub's Managed Ingestion UI comes pre-configured to use the latest version of the DataHub CLI ([acryl-datahub](https://pypi.org/project/acryl-datahub/)) that is compatible with the server. However, you can override the default package version using the 'Advanced' source configurations. To do so, simply click 'Advanced', then change the 'CLI Version' text box to contain the exact version of the DataHub CLI you'd like to use. -

_Pinning the CLI version to version `0.8.23.2`_ +Other advanced options include specifying **environment variables**, **DataHub plugins** or **python packages at runtime**. + Once you're happy with your changes, simply click 'Done' to save. You can upload and even update recipes using the cli as mentioned in the [cli documentation for uploading ingestion recipes](./cli.md#ingest-deploy). -An example execution would look something like: +An example execution for a given `recipe.yaml` file, would look something like: ```bash datahub ingest deploy --name "My Test Ingestion Source" --schedule "5 * * * *" --time-zone "UTC" -c recipe.yaml @@ -330,8 +338,8 @@ for the `datahub-actions` container and running `docker logs `. There are valid cases for ingesting metadata without the UI-based ingestion scheduler. For example, - You have written a custom ingestion Source -- Your data sources are not reachable on the network where DataHub is deployed -- Your ingestion source requires context from a local filesystem (e.g. input files, environment variables, etc) +- Your data sources are not reachable on the network where DataHub is deployed. Managed DataHub users can use a [remote executor](managed-datahub/operator-guide/setting-up-remote-ingestion-executor-on-aws.md) for remote UI-based ingestion. +- Your ingestion source requires context from a local filesystem (e.g. input files) - You want to distribute metadata ingestion among multiple producers / environments ### How do I attach policies to the actions pod to give it permissions to pull metadata from various sources? diff --git a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md index d876d99b494f8..77cc2f456aa2d 100644 --- a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md +++ b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md @@ -13,6 +13,35 @@ This page documents how to perform memory profiles of ingestion runs. It is useful when trying to size the amount of resources necessary to ingest some source or when developing new features or sources. ## How to use + + + + +Create an ingestion as specified in the [Ingestion guide](../../../docs/ui-ingestion.md). + +Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion: +```yaml +source: + ... + +sink: + ... + +flags: + generate_memory_profiles: "" +``` + +In the final panel, under the advanced section, add the `debug` datahub package under the **Extra DataHub Plugins** section. +As seen below: + +

+ +

+ +Finally, save and run the ingestion process. + +
+ Install the `debug` plugin for DataHub's CLI wherever the ingestion runs: ```bash @@ -33,6 +62,16 @@ flags: generate_memory_profiles: "" ``` +Finally run the ingestion recipe + +```bash +$ datahub ingest -c recipe.yaml +``` + + +
+ + Once the ingestion run starts a binary file will be created and appended to during the execution of the ingestion. These files follow the pattern `file-.bin` for a unique identification. diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index b19d2026fbfc4..1362a0f69eff2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -892,8 +892,10 @@ public void testGetIndexSizes() { //CHECKSTYLE:OFF /* Example result: - {aspectName=testentityprofile, sizeMb=52.234, indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity} - {aspectName=testentityprofile, sizeMb=0.208, indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests} + {aspectName=testentityprofile, sizeMb=52.234, + indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity} + {aspectName=testentityprofile, sizeMb=0.208, + indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests} */ // There may be other indices in there from other tests, so just make sure that index for entity + aspect is in there //CHECKSTYLE:ON diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java index 4c1555fc510e6..34aa6978f742f 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java @@ -5,7 +5,9 @@ import java.time.Duration; public interface SearchTestContainer { + String SEARCH_JAVA_OPTS = "-Xms446m -Xmx446m -XX:MaxDirectMemorySize=368435456"; + Duration STARTUP_TIMEOUT = Duration.ofMinutes(5); // usually < 1min GenericContainer startContainer(); diff --git a/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl index b3e237202fc2f..f777b5d6e12e7 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/ingestion/DataHubIngestionSourceInfo.pdl @@ -37,10 +37,10 @@ record DataHubIngestionSourceInfo { * Parameters associated with the Ingestion Source */ config: record DataHubIngestionSourceConfig { - /** - * The JSON recipe to use for ingestion - */ - recipe: string + /** + * The JSON recipe to use for ingestion + */ + recipe: string /** * The PyPI version of the datahub CLI to use when executing a recipe @@ -56,5 +56,10 @@ record DataHubIngestionSourceInfo { * Whether or not to run this ingestion source in debug mode */ debugMode: optional boolean + + /** + * Extra arguments for the ingestion run. + */ + extraArgs: optional map[string, string] } } \ No newline at end of file From 04216e30bb2a1eab1993e48276d9f8e52d6b3121 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Sat, 21 Oct 2023 17:21:54 +0200 Subject: [PATCH 07/80] feat(ingest/s3): S3 add partition to schema (#8900) Co-authored-by: Pedro Silva --- .../src/datahub/ingestion/source/s3/config.py | 5 ++- .../src/datahub/ingestion/source/s3/source.py | 33 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py index 9b5296f0b9dd5..3ef6476078f6f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py @@ -75,7 +75,10 @@ class DataLakeSourceConfig( default=100, description="Maximum number of rows to use when inferring schemas for TSV and CSV files.", ) - + add_partition_columns_to_schema: bool = Field( + default=False, + description="Whether to add partition fields to the schema.", + ) verify_ssl: Union[bool, str] = Field( default=True, description="Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use.", diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index eb49fcbb268c0..94c571eabad11 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -78,6 +78,7 @@ NullTypeClass, NumberTypeClass, RecordTypeClass, + SchemaField, SchemaFieldDataType, SchemaMetadata, StringTypeClass, @@ -90,6 +91,7 @@ OperationClass, OperationTypeClass, OtherSchemaClass, + SchemaFieldDataTypeClass, _Aspect, ) from datahub.telemetry import stats, telemetry @@ -458,8 +460,39 @@ def get_fields(self, table_data: TableData, path_spec: PathSpec) -> List: logger.debug(f"Extracted fields in schema: {fields}") fields = sorted(fields, key=lambda f: f.fieldPath) + if self.source_config.add_partition_columns_to_schema: + self.add_partition_columns_to_schema( + fields=fields, path_spec=path_spec, full_path=table_data.full_path + ) + return fields + def add_partition_columns_to_schema( + self, path_spec: PathSpec, full_path: str, fields: List[SchemaField] + ) -> None: + is_fieldpath_v2 = False + for field in fields: + if field.fieldPath.startswith("[version=2.0]"): + is_fieldpath_v2 = True + break + vars = path_spec.get_named_vars(full_path) + if vars is not None and "partition_key" in vars: + for partition_key in vars["partition_key"].values(): + fields.append( + SchemaField( + fieldPath=f"{partition_key}" + if not is_fieldpath_v2 + else f"[version=2.0].[type=string].{partition_key}", + nativeDataType="string", + type=SchemaFieldDataType(StringTypeClass()) + if not is_fieldpath_v2 + else SchemaFieldDataTypeClass(type=StringTypeClass()), + isPartitioningKey=True, + nullable=True, + recursive=False, + ) + ) + def get_table_profile( self, table_data: TableData, dataset_urn: str ) -> Iterable[MetadataWorkUnit]: From 633e6d6f779dc1e09bdfd10a160889684c485706 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Mon, 23 Oct 2023 17:15:44 +0100 Subject: [PATCH 08/80] feat(frontend): Remove debug flag from start script (#9075) --- docker/datahub-frontend/start.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/datahub-frontend/start.sh b/docker/datahub-frontend/start.sh index 430982aa2456b..9dc1514144bb1 100755 --- a/docker/datahub-frontend/start.sh +++ b/docker/datahub-frontend/start.sh @@ -50,7 +50,6 @@ export JAVA_OPTS="-Xms512m \ -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf \ -Dlogback.configurationFile=datahub-frontend/conf/logback.xml \ -Dlogback.debug=false \ - -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 \ ${PROMETHEUS_AGENT:-} ${OTEL_AGENT:-} \ ${TRUSTSTORE_FILE:-} ${TRUSTSTORE_TYPE:-} ${TRUSTSTORE_PASSWORD:-} \ ${HTTP_PROXY:-} ${HTTPS_PROXY:-} ${NO_PROXY:-} \ From 8fb95e88a17260d0d6727f4d5e09636b128faf47 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 23 Oct 2023 12:40:42 -0700 Subject: [PATCH 09/80] feat(sqlparser): parse create DDL statements (#9002) --- .../goldens/v2_sqlite_operator.json | 162 +++++++++++++++--- .../v2_sqlite_operator_no_dag_listener.json | 162 +++++++++++++++--- .../datahub/emitter/sql_parsing_builder.py | 9 +- .../testing/check_sql_parser_result.py | 9 + .../src/datahub/utilities/sqlglot_lineage.py | 92 ++++++++-- .../test_bigquery_create_view_with_cte.json | 8 +- ..._bigquery_from_sharded_table_wildcard.json | 4 +- .../test_bigquery_nested_subqueries.json | 4 +- ..._bigquery_sharded_table_normalization.json | 4 +- .../test_bigquery_star_with_replace.json | 6 +- .../test_bigquery_view_from_union.json | 4 +- .../goldens/test_create_table_ddl.json | 55 +++++- .../goldens/test_create_view_as_select.json | 2 +- .../test_select_from_struct_subfields.json | 2 +- .../test_select_with_full_col_name.json | 2 +- .../test_teradata_default_normalization.json | 2 + 16 files changed, 430 insertions(+), 97 deletions(-) diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json index 1a32b38ce055d..81d0a71b651d9 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json @@ -74,9 +74,7 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", @@ -98,7 +96,44 @@ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], "inputDatajobs": [], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -157,7 +192,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:24.632190+00:00", + "start_date": "2023-10-15 20:29:10.262813+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -172,7 +207,7 @@ "name": "sqlite_operator_create_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696056984632, + "time": 1697401750262, "actor": "urn:li:corpuser:datahub" } } @@ -221,7 +256,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056984632, + "timestampMillis": 1697401750262, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -251,9 +286,7 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", @@ -275,7 +308,80 @@ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], "inputDatajobs": [], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -331,7 +437,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056984947, + "timestampMillis": 1697401750651, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -447,7 +553,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:28.605901+00:00", + "start_date": "2023-10-15 20:29:15.013834+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -462,7 +568,7 @@ "name": "sqlite_operator_populate_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696056988605, + "time": 1697401755013, "actor": "urn:li:corpuser:datahub" } } @@ -511,7 +617,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056988605, + "timestampMillis": 1697401755013, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -621,7 +727,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056989098, + "timestampMillis": 1697401755600, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -807,7 +913,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:32.888165+00:00", + "start_date": "2023-10-15 20:29:20.216818+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -822,7 +928,7 @@ "name": "sqlite_operator_transform_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696056992888, + "time": 1697401760216, "actor": "urn:li:corpuser:datahub" } } @@ -895,7 +1001,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056992888, + "timestampMillis": 1697401760216, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1131,7 +1237,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056993744, + "timestampMillis": 1697401761237, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1249,7 +1355,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:37.745717+00:00", + "start_date": "2023-10-15 20:29:26.243934+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -1264,7 +1370,7 @@ "name": "sqlite_operator_cleanup_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696056997745, + "time": 1697401766243, "actor": "urn:li:corpuser:datahub" } } @@ -1313,7 +1419,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056997745, + "timestampMillis": 1697401766243, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1425,7 +1531,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696056998672, + "timestampMillis": 1697401767373, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1543,7 +1649,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 06:56:42.645806+00:00", + "start_date": "2023-10-15 20:29:32.075613+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -1558,7 +1664,7 @@ "name": "sqlite_operator_cleanup_processed_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057002645, + "time": 1697401772075, "actor": "urn:li:corpuser:datahub" } } @@ -1607,7 +1713,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057002645, + "timestampMillis": 1697401772075, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1719,7 +1825,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057003759, + "timestampMillis": 1697401773454, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json index c082be693e30c..96a0f02ccec17 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json @@ -74,9 +74,7 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", @@ -98,7 +96,44 @@ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], "inputDatajobs": [], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -157,7 +192,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:00:45.832554+00:00", + "start_date": "2023-10-15 20:27:26.883178+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -172,7 +207,7 @@ "name": "sqlite_operator_create_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057245832, + "time": 1697401646883, "actor": "urn:li:corpuser:datahub" } } @@ -221,7 +256,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057245832, + "timestampMillis": 1697401646883, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -251,9 +286,7 @@ "downstream_task_ids": "['populate_cost_table']", "inlets": "[]", "outlets": "[]", - "datahub_sql_parser_error": "Can only generate column-level lineage for select-like inner statements, not (outer statement type: )", - "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}", - "openlineage_run_facet_extractionError": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/ExtractionErrorRunFacet\", \"errors\": [{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/BaseFacet\", \"errorMessage\": \"Can only generate column-level lineage for select-like inner statements, not (outer statement type: )\", \"task\": \"datahub_sql_parser\"}], \"failedTasks\": 1, \"totalTasks\": 1}" + "openlineage_job_facet_sql": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.2.0/integration/airflow\", \"_schemaURL\": \"https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/SqlJobFacet\", \"query\": \"\\n CREATE TABLE IF NOT EXISTS costs (\\n id INTEGER PRIMARY KEY,\\n month TEXT NOT NULL,\\n total_cost REAL NOT NULL,\\n area REAL NOT NULL\\n )\\n \"}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=sqlite_operator&_flt_3_task_id=create_cost_table", "name": "create_cost_table", @@ -275,7 +308,80 @@ "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)" ], "inputDatajobs": [], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),month)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),total_cost)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD),area)" + ], + "confidenceScore": 1.0 + } + ] } } }, @@ -331,7 +437,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057246734, + "timestampMillis": 1697401647826, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -502,7 +608,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:00:49.653938+00:00", + "start_date": "2023-10-15 20:27:31.398799+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -517,7 +623,7 @@ "name": "sqlite_operator_populate_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057249653, + "time": 1697401651398, "actor": "urn:li:corpuser:datahub" } } @@ -566,7 +672,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057249653, + "timestampMillis": 1697401651398, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -676,7 +782,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057250831, + "timestampMillis": 1697401652651, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -917,7 +1023,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:00:53.989264+00:00", + "start_date": "2023-10-15 20:27:37.697995+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -932,7 +1038,7 @@ "name": "sqlite_operator_transform_cost_table_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057253989, + "time": 1697401657697, "actor": "urn:li:corpuser:datahub" } } @@ -1005,7 +1111,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057253989, + "timestampMillis": 1697401657697, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1241,7 +1347,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057255628, + "timestampMillis": 1697401659496, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1414,7 +1520,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:01:00.421177+00:00", + "start_date": "2023-10-15 20:27:45.670215+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -1429,7 +1535,7 @@ "name": "sqlite_operator_cleanup_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057260421, + "time": 1697401665670, "actor": "urn:li:corpuser:datahub" } } @@ -1478,7 +1584,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057260421, + "timestampMillis": 1697401665670, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1590,7 +1696,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057262258, + "timestampMillis": 1697401667670, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1763,7 +1869,7 @@ "customProperties": { "run_id": "manual_run_test", "duration": "None", - "start_date": "2023-09-30 07:01:05.540192+00:00", + "start_date": "2023-10-15 20:27:51.559194+00:00", "end_date": "None", "execution_date": "2023-09-27 21:34:38+00:00", "try_number": "0", @@ -1778,7 +1884,7 @@ "name": "sqlite_operator_cleanup_processed_costs_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1696057265540, + "time": 1697401671559, "actor": "urn:li:corpuser:datahub" } } @@ -1827,7 +1933,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057265540, + "timestampMillis": 1697401671559, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" @@ -1939,7 +2045,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1696057267631, + "timestampMillis": 1697401673788, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" diff --git a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py index 071d590f270f8..dedcfa0385f75 100644 --- a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py +++ b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py @@ -179,15 +179,16 @@ def add_lineage( def gen_workunits(self) -> Iterable[MetadataWorkUnit]: if self.generate_lineage: - yield from self._gen_lineage_workunits() + for mcp in self._gen_lineage_mcps(): + yield mcp.as_workunit() if self.generate_usage_statistics: yield from self._gen_usage_statistics_workunits() - def _gen_lineage_workunits(self) -> Iterable[MetadataWorkUnit]: + def _gen_lineage_mcps(self) -> Iterable[MetadataChangeProposalWrapper]: for downstream_urn in self._lineage_map: upstreams: List[UpstreamClass] = [] fine_upstreams: List[FineGrainedLineageClass] = [] - for upstream_urn, edge in self._lineage_map[downstream_urn].items(): + for edge in self._lineage_map[downstream_urn].values(): upstreams.append(edge.gen_upstream_aspect()) fine_upstreams.extend(edge.gen_fine_grained_lineage_aspects()) @@ -201,7 +202,7 @@ def _gen_lineage_workunits(self) -> Iterable[MetadataWorkUnit]: ) yield MetadataChangeProposalWrapper( entityUrn=downstream_urn, aspect=upstream_lineage - ).as_workunit() + ) def _gen_usage_statistics_workunits(self) -> Iterable[MetadataWorkUnit]: yield from self._usage_aggregator.generate_workunits( diff --git a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py index b3b1331db768b..2b610947e9043 100644 --- a/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py +++ b/metadata-ingestion/src/datahub/testing/check_sql_parser_result.py @@ -24,6 +24,7 @@ def assert_sql_result_with_resolver( *, expected_file: pathlib.Path, schema_resolver: SchemaResolver, + allow_table_error: bool = False, **kwargs: Any, ) -> None: # HACK: Our BigQuery source overwrites this value and doesn't undo it. @@ -36,6 +37,14 @@ def assert_sql_result_with_resolver( **kwargs, ) + if res.debug_info.table_error: + if allow_table_error: + logger.info( + f"SQL parser table error: {res.debug_info.table_error}", + exc_info=res.debug_info.table_error, + ) + else: + raise res.debug_info.table_error if res.debug_info.column_error: logger.warning( f"SQL parser column error: {res.debug_info.column_error}", diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index c830ec8c02fd4..97121b368f507 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -241,9 +241,9 @@ class SqlParsingResult(_ParserBaseModel): ) -def _parse_statement(sql: str, dialect: str) -> sqlglot.Expression: - statement = sqlglot.parse_one( - sql, read=dialect, error_level=sqlglot.ErrorLevel.RAISE +def _parse_statement(sql: sqlglot.exp.ExpOrStr, dialect: str) -> sqlglot.Expression: + statement: sqlglot.Expression = sqlglot.maybe_parse( + sql, dialect=dialect, error_level=sqlglot.ErrorLevel.RAISE ) return statement @@ -467,14 +467,20 @@ def _column_level_lineage( # noqa: C901 default_db: Optional[str], default_schema: Optional[str], ) -> List[_ColumnLineageInfo]: - if not isinstance( - statement, - _SupportedColumnLineageTypesTuple, + is_create_ddl = _is_create_table_ddl(statement) + if ( + not isinstance( + statement, + _SupportedColumnLineageTypesTuple, + ) + and not is_create_ddl ): raise UnsupportedStatementTypeError( f"Can only generate column-level lineage for select-like inner statements, not {type(statement)}" ) + column_lineage: List[_ColumnLineageInfo] = [] + use_case_insensitive_cols = dialect in { # Column identifiers are case-insensitive in BigQuery, so we need to # do a normalization step beforehand to make sure it's resolved correctly. @@ -580,6 +586,38 @@ def _schema_aware_fuzzy_column_resolve( ) from e logger.debug("Qualified sql %s", statement.sql(pretty=True, dialect=dialect)) + # Handle the create DDL case. + if is_create_ddl: + assert ( + output_table is not None + ), "output_table must be set for create DDL statements" + + create_schema: sqlglot.exp.Schema = statement.this + sqlglot_columns = create_schema.expressions + + for column_def in sqlglot_columns: + if not isinstance(column_def, sqlglot.exp.ColumnDef): + # Ignore things like constraints. + continue + + output_col = _schema_aware_fuzzy_column_resolve( + output_table, column_def.name + ) + output_col_type = column_def.args.get("kind") + + column_lineage.append( + _ColumnLineageInfo( + downstream=_DownstreamColumnRef( + table=output_table, + column=output_col, + column_type=output_col_type, + ), + upstreams=[], + ) + ) + + return column_lineage + # Try to figure out the types of the output columns. try: statement = sqlglot.optimizer.annotate_types.annotate_types( @@ -589,8 +627,6 @@ def _schema_aware_fuzzy_column_resolve( # This is not a fatal error, so we can continue. logger.debug("sqlglot failed to annotate types: %s", e) - column_lineage = [] - try: assert isinstance(statement, _SupportedColumnLineageTypesTuple) @@ -599,7 +635,6 @@ def _schema_aware_fuzzy_column_resolve( (select_col.alias_or_name, select_col) for select_col in statement.selects ] logger.debug("output columns: %s", [col[0] for col in output_columns]) - output_col: str for output_col, original_col_expression in output_columns: if output_col == "*": # If schema information is available, the * will be expanded to the actual columns. @@ -628,7 +663,7 @@ def _schema_aware_fuzzy_column_resolve( # Generate SELECT lineage. # Using a set here to deduplicate upstreams. - direct_col_upstreams: Set[_ColumnRef] = set() + direct_raw_col_upstreams: Set[_ColumnRef] = set() for node in lineage_node.walk(): if node.downstream: # We only want the leaf nodes. @@ -643,8 +678,9 @@ def _schema_aware_fuzzy_column_resolve( if node.subfield: normalized_col = f"{normalized_col}.{node.subfield}" - col = _schema_aware_fuzzy_column_resolve(table_ref, normalized_col) - direct_col_upstreams.add(_ColumnRef(table=table_ref, column=col)) + direct_raw_col_upstreams.add( + _ColumnRef(table=table_ref, column=normalized_col) + ) else: # This branch doesn't matter. For example, a count(*) column would go here, and # we don't get any column-level lineage for that. @@ -665,7 +701,16 @@ def _schema_aware_fuzzy_column_resolve( if original_col_expression.type: output_col_type = original_col_expression.type - if not direct_col_upstreams: + # Fuzzy resolve upstream columns. + direct_resolved_col_upstreams = { + _ColumnRef( + table=edge.table, + column=_schema_aware_fuzzy_column_resolve(edge.table, edge.column), + ) + for edge in direct_raw_col_upstreams + } + + if not direct_resolved_col_upstreams: logger.debug(f' "{output_col}" has no upstreams') column_lineage.append( _ColumnLineageInfo( @@ -674,12 +719,12 @@ def _schema_aware_fuzzy_column_resolve( column=output_col, column_type=output_col_type, ), - upstreams=sorted(direct_col_upstreams), + upstreams=sorted(direct_resolved_col_upstreams), # logic=column_logic.sql(pretty=True, dialect=dialect), ) ) - # TODO: Also extract referenced columns (e.g. non-SELECT lineage) + # TODO: Also extract referenced columns (aka auxillary / non-SELECT lineage) except (sqlglot.errors.OptimizeError, ValueError) as e: raise SqlUnderstandingError( f"sqlglot failed to compute some lineage: {e}" @@ -700,6 +745,12 @@ def _extract_select_from_create( return statement +def _is_create_table_ddl(statement: sqlglot.exp.Expression) -> bool: + return isinstance(statement, sqlglot.exp.Create) and isinstance( + statement.this, sqlglot.exp.Schema + ) + + def _try_extract_select( statement: sqlglot.exp.Expression, ) -> sqlglot.exp.Expression: @@ -766,6 +817,7 @@ def _translate_sqlglot_type( def _translate_internal_column_lineage( table_name_urn_mapping: Dict[_TableName, str], raw_column_lineage: _ColumnLineageInfo, + dialect: str, ) -> ColumnLineageInfo: downstream_urn = None if raw_column_lineage.downstream.table: @@ -779,7 +831,9 @@ def _translate_internal_column_lineage( ) if raw_column_lineage.downstream.column_type else None, - native_column_type=raw_column_lineage.downstream.column_type.sql() + native_column_type=raw_column_lineage.downstream.column_type.sql( + dialect=dialect + ) if raw_column_lineage.downstream.column_type and raw_column_lineage.downstream.column_type.this != sqlglot.exp.DataType.Type.UNKNOWN @@ -800,12 +854,14 @@ def _get_dialect(platform: str) -> str: # TODO: convert datahub platform names to sqlglot dialect if platform == "presto-on-hive": return "hive" + if platform == "mssql": + return "tsql" else: return platform def _sqlglot_lineage_inner( - sql: str, + sql: sqlglot.exp.ExpOrStr, schema_resolver: SchemaResolver, default_db: Optional[str] = None, default_schema: Optional[str] = None, @@ -918,7 +974,7 @@ def _sqlglot_lineage_inner( if column_lineage: column_lineage_urns = [ _translate_internal_column_lineage( - table_name_urn_mapping, internal_col_lineage + table_name_urn_mapping, internal_col_lineage, dialect=dialect ) for internal_col_lineage in column_lineage ] diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json index f0175b4dc8892..d610b0a83f229 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json @@ -18,7 +18,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -36,7 +36,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -54,7 +54,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -72,7 +72,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json index b7df5444987f2..2d3d188d28316 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json @@ -14,7 +14,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -32,7 +32,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json index 67e306bebf545..41ae0885941b0 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json @@ -14,7 +14,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -32,7 +32,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json index b7df5444987f2..2d3d188d28316 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json @@ -14,7 +14,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -32,7 +32,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json index b393b2445d6c4..26f8f8f59a3ff 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json @@ -16,7 +16,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -34,7 +34,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -52,7 +52,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json index 53fb94300e804..83365c09f69c2 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json @@ -17,7 +17,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { @@ -39,7 +39,7 @@ "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "native_column_type": "TEXT" + "native_column_type": "STRING" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json index 4773974545bfa..cf31b71cb50f6 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_table_ddl.json @@ -4,5 +4,58 @@ "out_tables": [ "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)" ], - "column_lineage": null + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)", + "column": "id", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INTEGER" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)", + "column": "month", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "TEXT" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)", + "column": "total_cost", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "REAL" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:sqlite,costs,PROD)", + "column": "area", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "REAL" + }, + "upstreams": [] + } + ] } \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json index ff452467aa5bd..8a6b60d0f1bde 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json @@ -30,7 +30,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "native_column_type": "BIGINT" + "native_column_type": "NUMBER" }, "upstreams": [] }, diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json index 5ad847e252497..2424fcda34752 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json @@ -14,7 +14,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "native_column_type": "DECIMAL" + "native_column_type": "NUMERIC" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json index 6ee3d2e61c39b..8dd2633eff612 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json @@ -14,7 +14,7 @@ "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "native_column_type": "DECIMAL" + "native_column_type": "NUMERIC" }, "upstreams": [ { diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json index b0351a7e07ad2..ee80285d87f60 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_default_normalization.json @@ -12,6 +12,7 @@ "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)", "column": "PatientId", + "column_type": null, "native_column_type": "INTEGER()" }, "upstreams": [ @@ -25,6 +26,7 @@ "downstream": { "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)", "column": "BMI", + "column_type": null, "native_column_type": "FLOAT()" }, "upstreams": [ From 10456c5e3cdaad14927b89bb9deee1a6df0ce92c Mon Sep 17 00:00:00 2001 From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com> Date: Mon, 23 Oct 2023 14:53:07 -0700 Subject: [PATCH 10/80] docs(ingest): update to get_workunits_internal (#9054) --- metadata-ingestion/adding-source.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/adding-source.md b/metadata-ingestion/adding-source.md index e4fc950a7cdbd..a0930102c6827 100644 --- a/metadata-ingestion/adding-source.md +++ b/metadata-ingestion/adding-source.md @@ -62,7 +62,7 @@ Some sources use the default `SourceReport` class, but others inherit and extend ### 3. Implement the source itself -The core for the source is the `get_workunits` method, which produces a stream of metadata events (typically MCP objects) wrapped up in a MetadataWorkUnit. +The core for the source is the `get_workunits_internal` method, which produces a stream of metadata events (typically MCP objects) wrapped up in a MetadataWorkUnit. The [file source](./src/datahub/ingestion/source/file.py) is a good and simple example. The MetadataChangeEventClass is defined in the metadata models which are generated From a0ce4f333e1cbbc544a650ec3e8012a1f10aef2b Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Mon, 23 Oct 2023 21:21:21 -0400 Subject: [PATCH 11/80] Column level lineage and path test (#8822) --- .../preview/EntityPaths/EntityPathsModal.tsx | 1 + .../e2e/lineage/lineage_column_path.js | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js diff --git a/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx b/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx index d5722429aaf6b..2bb76714d6119 100644 --- a/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx +++ b/datahub-web-react/src/app/preview/EntityPaths/EntityPathsModal.tsx @@ -39,6 +39,7 @@ export default function EntityPathsModal({ paths, resultEntityUrn, hideModal }: return ( Column path{paths.length > 1 && 's'} from{' '} diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js new file mode 100644 index 0000000000000..37ca62c8d1229 --- /dev/null +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js @@ -0,0 +1,68 @@ +import { aliasQuery } from "../utils"; +const DATASET_ENTITY_TYPE = 'dataset'; +const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)'; +const DOWNSTREAM_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; +const upstreamColumn = '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)-Upstream"] text'; +const downstreamColumn = '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)-Downstream"] text'; + +const verifyColumnPathModal = (from, to) => { + cy.get('[data-testid="entity-paths-modal"]').contains(from).should("be.visible"); + cy.get('[data-testid="entity-paths-modal"]').contains(to).should("be.visible"); +}; + +describe("column-Level lineage and impact analysis path test", () => { + beforeEach(() => { + cy.on('uncaught:exception', (err, runnable) => { return false; }); + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); + }); + }); + + it("verify column-level lineage path at lineage praph and impact analysis ", () => { + // Open dataset with column-level lineage configured an navigate to lineage tab -> visualize lineage + cy.loginWithCredentials(); + cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); + + // Enable “show columns” toggle + cy.waitTextVisible("SampleCypressHdfs"); + cy.clickOptionWithTestId("column-toggle"); + cy.waitTextVisible("shipment_info"); + + // Verify functionality of column lineage + cy.get(upstreamColumn).eq(3).click(); + cy.get(upstreamColumn).eq(3).prev().should('not.have.attr', 'fill', 'white'); + cy.get(downstreamColumn).eq(2).prev().should('not.have.attr', 'stroke', 'transparent'); + cy.get(downstreamColumn).eq(2).click(); + cy.get(downstreamColumn).eq(2).prev().should('not.have.attr', 'fill', 'white'); + cy.get(upstreamColumn).eq(3).prev().should('not.have.attr', 'stroke', 'transparent'); + + // Open dataset impact analysis view, enable column lineage + cy.goToDataset(DATASET_URN, "SampleCypressHdfsDataset"); + cy.openEntityTab("Lineage"); + cy.clickOptionWithText("Column Lineage"); + cy.clickOptionWithText("Downstream"); + + // Verify upstream column lineage, test column path modal + cy.clickOptionWithText("Upstream"); + cy.waitTextVisible("SampleCypressKafkaDataset"); + cy.ensureTextNotPresent("field_bar"); + cy.contains("Select column").click({ force: true}).wait(1000); + cy.get(".rc-virtual-list").contains("shipment_info").click(); + cy.waitTextVisible("field_bar"); + cy.clickOptionWithText("field_bar"); + verifyColumnPathModal("shipment_info", "field_bar"); + cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click(); + + // Verify downstream column lineage, test column path modal + cy.goToDataset(DOWNSTREAM_DATASET_URN, "SampleCypressKafkaDataset"); + cy.openEntityTab("Lineage"); + cy.clickOptionWithText("Column Lineage"); + cy.ensureTextNotPresent("shipment_info"); + cy.contains("Select column").click({ force: true}).wait(1000); + cy.get(".rc-virtual-list").contains("field_bar").click(); + cy.waitTextVisible("shipment_info"); + cy.clickOptionWithText("shipment_info"); + verifyColumnPathModal("shipment_info", "field_bar"); + cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click(); + }); +}); \ No newline at end of file From adf8c8db38c56250cb612b208f6e59b04c7258c6 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Tue, 24 Oct 2023 02:59:56 -0400 Subject: [PATCH 12/80] refactor(ingest): Move sqlalchemy import out of sql_types.py (#9065) --- .../src/datahub/ingestion/source/sql/athena.py | 2 +- .../src/datahub/ingestion/source/sql/sql_common.py | 2 +- .../src/datahub/ingestion/source/sql/sql_types.py | 9 +-------- .../src/datahub/utilities/sqlalchemy_type_converter.py | 6 +++++- metadata-ingestion/tests/unit/test_athena_source.py | 2 +- .../unit/utilities/test_sqlalchemy_type_converter.py | 2 +- 6 files changed, 10 insertions(+), 13 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index dad61e5173166..06b9ad92677a2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -31,7 +31,6 @@ register_custom_type, ) from datahub.ingestion.source.sql.sql_config import SQLCommonConfig, make_sqlalchemy_uri -from datahub.ingestion.source.sql.sql_types import MapType from datahub.ingestion.source.sql.sql_utils import ( add_table_to_schema_container, gen_database_container, @@ -41,6 +40,7 @@ from datahub.metadata.schema_classes import RecordTypeClass from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column from datahub.utilities.sqlalchemy_type_converter import ( + MapType, get_schema_fields_for_sqlalchemy_column, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 6524eea8222d4..be03858ec3ef9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -37,7 +37,6 @@ DatasetSubTypes, ) from datahub.ingestion.source.sql.sql_config import SQLCommonConfig -from datahub.ingestion.source.sql.sql_types import MapType from datahub.ingestion.source.sql.sql_utils import ( add_table_to_schema_container, downgrade_schema_from_v2, @@ -91,6 +90,7 @@ from datahub.utilities.lossy_collections import LossyList from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport +from datahub.utilities.sqlalchemy_type_converter import MapType if TYPE_CHECKING: from datahub.ingestion.source.ge_data_profiler import ( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index 51626891e9fef..ae47623188f42 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -1,8 +1,6 @@ import re from typing import Any, Dict, ValuesView -from sqlalchemy import types - from datahub.metadata.com.linkedin.pegasus2avro.schema import ( ArrayType, BooleanType, @@ -17,6 +15,7 @@ TimeType, UnionType, ) +from datahub.utilities.sqlalchemy_type_converter import MapType # these can be obtained by running `select format_type(oid, null),* from pg_type;` # we've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.) @@ -369,12 +368,6 @@ def resolve_vertica_modified_type(type_string: str) -> Any: "array": ArrayType, } - -class MapType(types.TupleType): - # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub - pass - - # https://docs.aws.amazon.com/athena/latest/ug/data-types.html # https://github.com/dbt-athena/dbt-athena/tree/main ATHENA_SQL_TYPES_MAP: Dict[str, Any] = { diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py index a431f262a85fd..1d5ec5dae3519 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py +++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py @@ -7,13 +7,17 @@ from sqlalchemy_bigquery import STRUCT from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields -from datahub.ingestion.source.sql.sql_types import MapType from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField from datahub.metadata.schema_classes import NullTypeClass, SchemaFieldDataTypeClass logger = logging.getLogger(__name__) +class MapType(types.TupleType): + # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub + pass + + class SqlAlchemyColumnToAvroConverter: """Helper class that collects some methods to convert SQLalchemy columns to Avro schema.""" diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py index 6d3ed20eafde2..23dd7dd5a6e45 100644 --- a/metadata-ingestion/tests/unit/test_athena_source.py +++ b/metadata-ingestion/tests/unit/test_athena_source.py @@ -9,7 +9,7 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.aws.s3_util import make_s3_urn from datahub.ingestion.source.sql.athena import CustomAthenaRestDialect -from datahub.ingestion.source.sql.sql_types import MapType +from datahub.utilities.sqlalchemy_type_converter import MapType FROZEN_TIME = "2020-04-14 07:00:00" diff --git a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py index 959da0987a825..6c719d351c4c2 100644 --- a/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py +++ b/metadata-ingestion/tests/unit/utilities/test_sqlalchemy_type_converter.py @@ -3,7 +3,6 @@ from sqlalchemy import types from sqlalchemy_bigquery import STRUCT -from datahub.ingestion.source.sql.sql_types import MapType from datahub.metadata.schema_classes import ( ArrayTypeClass, MapTypeClass, @@ -12,6 +11,7 @@ RecordTypeClass, ) from datahub.utilities.sqlalchemy_type_converter import ( + MapType, get_schema_fields_for_sqlalchemy_column, ) From c849246e63284bc73768ed58a22be62b708a6c48 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 24 Oct 2023 00:09:41 -0700 Subject: [PATCH 13/80] fix(ingest): add releases link (#9014) --- metadata-ingestion/setup.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index c46409ecbf52f..417588a433655 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -282,7 +282,8 @@ # Source plugins # sqlalchemy-bigquery is included here since it provides an implementation of # a SQLalchemy-conform STRUCT type definition - "athena": sql_common | {"PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", "sqlalchemy-bigquery>=1.4.1"}, + "athena": sql_common + | {"PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", "sqlalchemy-bigquery>=1.4.1"}, "azure-ad": set(), "bigquery": sql_common | bigquery_common @@ -354,7 +355,11 @@ | {"psycopg2-binary", "pymysql>=1.0.2"}, "pulsar": {"requests"}, "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib, - "redshift": sql_common | redshift_common | usage_common | sqlglot_lib | {"redshift-connector"}, + "redshift": sql_common + | redshift_common + | usage_common + | sqlglot_lib + | {"redshift-connector"}, "redshift-legacy": sql_common | redshift_common, "redshift-usage-legacy": sql_common | usage_common | redshift_common, "s3": {*s3_base, *data_lake_profiling}, @@ -435,7 +440,9 @@ deepdiff_dep = "deepdiff" test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"} -debug_requirements = {"memray"} +debug_requirements = { + "memray", +} base_dev_requirements = { *base_requirements, @@ -668,6 +675,7 @@ "Documentation": "https://datahubproject.io/docs/", "Source": "https://github.com/datahub-project/datahub", "Changelog": "https://github.com/datahub-project/datahub/releases", + "Releases": "https://github.com/acryldata/datahub/releases", }, license="Apache License 2.0", description="A CLI to work with DataHub metadata", From eb0b03d2f2f2c9ce88562c32d968d095a59f8547 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Tue, 24 Oct 2023 10:45:09 -0400 Subject: [PATCH 14/80] fix(ingest/bigquery): Correctly apply table pattern to read events; fix end time calculation; deprecate match_fully_qualified_names (#9077) --- .../ingestion/source/bigquery_v2/bigquery_config.py | 7 +++---- .../datahub/ingestion/source/bigquery_v2/lineage.py | 2 +- .../src/datahub/ingestion/source/bigquery_v2/usage.py | 10 +++++++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index 944814b6936a4..a6a740385cf5c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -119,8 +119,8 @@ class BigQueryV2Config( ) match_fully_qualified_names: bool = Field( - default=False, - description="Whether `dataset_pattern` is matched against fully qualified dataset name `.`.", + default=True, + description="[deprecated] Whether `dataset_pattern` is matched against fully qualified dataset name `.`.", ) include_external_url: bool = Field( @@ -327,8 +327,7 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict: ): logger.warning( "Please update `dataset_pattern` to match against fully qualified schema name `.` and set config `match_fully_qualified_names : True`." - "Current default `match_fully_qualified_names: False` is only to maintain backward compatibility. " - "The config option `match_fully_qualified_names` will be deprecated in future and the default behavior will assume `match_fully_qualified_names: True`." + "The config option `match_fully_qualified_names` is deprecated and will be removed in a future release." ) return values diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index 98c8cbaf85eec..aa462435b8105 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -548,7 +548,7 @@ def _get_parsed_audit_log_events(self, project_id: str) -> Iterable[QueryEvent]: # handle the case where the read happens within our time range but the query # completion event is delayed and happens after the configured end time. corrected_start_time = self.start_time - self.config.max_query_duration - corrected_end_time = self.end_time + -self.config.max_query_duration + corrected_end_time = self.end_time + self.config.max_query_duration self.report.log_entry_start_time = corrected_start_time self.report.log_entry_end_time = corrected_end_time diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index 201567e104a51..7fc38991e5928 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -335,8 +335,12 @@ def get_time_window(self) -> Tuple[datetime, datetime]: def _is_table_allowed(self, table_ref: Optional[BigQueryTableRef]) -> bool: return ( table_ref is not None - and self.config.dataset_pattern.allowed(table_ref.table_identifier.dataset) - and self.config.table_pattern.allowed(table_ref.table_identifier.table) + and self.config.dataset_pattern.allowed( + f"{table_ref.table_identifier.project_id}.{table_ref.table_identifier.dataset}" + if self.config.match_fully_qualified_names + else table_ref.table_identifier.dataset + ) + and self.config.table_pattern.allowed(str(table_ref.table_identifier)) ) def _should_ingest_usage(self) -> bool: @@ -844,7 +848,7 @@ def _get_parsed_bigquery_log_events( # handle the case where the read happens within our time range but the query # completion event is delayed and happens after the configured end time. corrected_start_time = self.start_time - self.config.max_query_duration - corrected_end_time = self.end_time + -self.config.max_query_duration + corrected_end_time = self.end_time + self.config.max_query_duration self.report.audit_start_time = corrected_start_time self.report.audit_end_time = corrected_end_time From d13553f53ad9e7592256cd88e78eef0ca95832e4 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 24 Oct 2023 12:24:50 -0700 Subject: [PATCH 15/80] feat(sqlparser): extract CLL from `update`s (#9078) --- .../src/datahub/utilities/sqlglot_lineage.py | 68 +++++++++++-- .../test_snowflake_update_from_table.json | 56 +++++++++++ .../test_snowflake_update_hardcoded.json | 35 +++++++ .../unit/sql_parsing/test_sqlglot_lineage.py | 96 +++++++++++++++++++ 4 files changed, 246 insertions(+), 9 deletions(-) create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 97121b368f507..526d90b2a1bfa 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -745,6 +745,47 @@ def _extract_select_from_create( return statement +_UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT: Set[str] = set( + sqlglot.exp.Update.arg_types.keys() +) - set(sqlglot.exp.Select.arg_types.keys()) + + +def _extract_select_from_update( + statement: sqlglot.exp.Update, +) -> sqlglot.exp.Select: + statement = statement.copy() + + # The "SET" expressions need to be converted. + # For the update command, it'll be a list of EQ expressions, but the select + # should contain aliased columns. + new_expressions = [] + for expr in statement.expressions: + if isinstance(expr, sqlglot.exp.EQ) and isinstance( + expr.left, sqlglot.exp.Column + ): + new_expressions.append( + sqlglot.exp.Alias( + this=expr.right, + alias=expr.left.this, + ) + ) + else: + # If we don't know how to convert it, just leave it as-is. If this causes issues, + # they'll get caught later. + new_expressions.append(expr) + + return sqlglot.exp.Select( + **{ + **{ + k: v + for k, v in statement.args.items() + if k not in _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT + }, + "expressions": new_expressions, + } + ) + + def _is_create_table_ddl(statement: sqlglot.exp.Expression) -> bool: return isinstance(statement, sqlglot.exp.Create) and isinstance( statement.this, sqlglot.exp.Schema @@ -767,6 +808,9 @@ def _try_extract_select( elif isinstance(statement, sqlglot.exp.Insert): # TODO Need to map column renames in the expressions part of the statement. statement = statement.expression + elif isinstance(statement, sqlglot.exp.Update): + # Assumption: the output table is already captured in the modified tables list. + statement = _extract_select_from_update(statement) elif isinstance(statement, sqlglot.exp.Create): # TODO May need to map column renames. # Assumption: the output table is already captured in the modified tables list. @@ -942,19 +986,25 @@ def _sqlglot_lineage_inner( ) # Simplify the input statement for column-level lineage generation. - select_statement = _try_extract_select(statement) + try: + select_statement = _try_extract_select(statement) + except Exception as e: + logger.debug(f"Failed to extract select from statement: {e}", exc_info=True) + debug_info.column_error = e + select_statement = None # Generate column-level lineage. column_lineage: Optional[List[_ColumnLineageInfo]] = None try: - column_lineage = _column_level_lineage( - select_statement, - dialect=dialect, - input_tables=table_name_schema_mapping, - output_table=downstream_table, - default_db=default_db, - default_schema=default_schema, - ) + if select_statement is not None: + column_lineage = _column_level_lineage( + select_statement, + dialect=dialect, + input_tables=table_name_schema_mapping, + output_table=downstream_table, + default_db=default_db, + default_schema=default_schema, + ) except UnsupportedStatementTypeError as e: # Inject details about the outer statement type too. e.args = (f"{e.args[0]} (outer statement type: {type(statement)})",) diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json new file mode 100644 index 0000000000000..e2baa34e7fe28 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json @@ -0,0 +1,56 @@ +{ + "query_type": "UPDATE", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)" + ], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)" + ], + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "column": "col1", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "column": "col1" + }, + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "column": "col2" + } + ] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "column": "col2", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "native_column_type": "VARCHAR" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", + "column": "col1" + }, + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)", + "column": "col2" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json new file mode 100644 index 0000000000000..b41ed61b37cdb --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json @@ -0,0 +1,35 @@ +{ + "query_type": "UPDATE", + "in_tables": [], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)" + ], + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "orderkey", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INT" + }, + "upstreams": [] + }, + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "totalprice", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INT" + }, + "upstreams": [] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index 059add8db67e4..dfc5b486abd35 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -3,6 +3,7 @@ import pytest from datahub.testing.check_sql_parser_result import assert_sql_result +from datahub.utilities.sqlglot_lineage import _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT RESOURCE_DIR = pathlib.Path(__file__).parent / "goldens" @@ -672,3 +673,98 @@ def test_teradata_default_normalization(): }, expected_file=RESOURCE_DIR / "test_teradata_default_normalization.json", ) + + +def test_snowflake_update_hardcoded(): + assert_sql_result( + """ +UPDATE snowflake_sample_data.tpch_sf1.orders +SET orderkey = 1, totalprice = 2 +WHERE orderkey = 3 +""", + dialect="snowflake", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": { + "orderkey": "NUMBER(38,0)", + "totalprice": "NUMBER(12,2)", + }, + }, + expected_file=RESOURCE_DIR / "test_snowflake_update_hardcoded.json", + ) + + +def test_update_from_select(): + assert _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT == {"returning", "this"} + + +def test_snowflake_update_from_table(): + # Can create these tables with the following SQL: + """ + -- Create or replace my_table + CREATE OR REPLACE TABLE my_table ( + id INT IDENTITY PRIMARY KEY, + col1 VARCHAR(50), + col2 VARCHAR(50) + ); + + -- Create or replace table1 + CREATE OR REPLACE TABLE table1 ( + id INT IDENTITY PRIMARY KEY, + col1 VARCHAR(50), + col2 VARCHAR(50) + ); + + -- Create or replace table2 + CREATE OR REPLACE TABLE table2 ( + id INT IDENTITY PRIMARY KEY, + col2 VARCHAR(50) + ); + + -- Insert data into my_table + INSERT INTO my_table (col1, col2) + VALUES ('foo', 'bar'), + ('baz', 'qux'); + + -- Insert data into table1 + INSERT INTO table1 (col1, col2) + VALUES ('foo', 'bar'), + ('baz', 'qux'); + + -- Insert data into table2 + INSERT INTO table2 (col2) + VALUES ('bar'), + ('qux'); + """ + + assert_sql_result( + """ +UPDATE my_table +SET + col1 = t1.col1 || t1.col2, + col2 = t1.col1 || t2.col2 +FROM table1 t1 +JOIN table2 t2 ON t1.id = t2.id +WHERE my_table.id = t1.id; +""", + dialect="snowflake", + default_db="my_db", + default_schema="my_schema", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)": { + "id": "NUMBER(38,0)", + "col1": "VARCHAR(16777216)", + "col2": "VARCHAR(16777216)", + }, + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)": { + "id": "NUMBER(38,0)", + "col1": "VARCHAR(16777216)", + "col2": "VARCHAR(16777216)", + }, + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)": { + "id": "NUMBER(38,0)", + "col1": "VARCHAR(16777216)", + "col2": "VARCHAR(16777216)", + }, + }, + expected_file=RESOURCE_DIR / "test_snowflake_update_from_table.json", + ) From 378d84a346cff4061f795dd1b296bde3ea5313c1 Mon Sep 17 00:00:00 2001 From: skrydal Date: Tue, 24 Oct 2023 22:12:11 +0200 Subject: [PATCH 16/80] fix(ui): Fixes handling of resources filters in UI (#9087) --- .../app/permissions/policy/PolicyDetailsModal.tsx | 4 ++-- .../permissions/policy/PolicyPrivilegeForm.tsx | 15 ++++++--------- .../src/app/permissions/policy/policyUtils.ts | 4 ++-- docs/authorization/policies.md | 8 ++++---- metadata-ingestion/tests/unit/serde/test_serde.py | 8 ++++---- 5 files changed, 18 insertions(+), 21 deletions(-) diff --git a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx index 68e91983babdb..d3e01df3a66e8 100644 --- a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx +++ b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx @@ -67,8 +67,8 @@ export default function PolicyDetailsModal({ policy, visible, onClose, privilege const isMetadataPolicy = policy?.type === PolicyType.Metadata; const resources = convertLegacyResourceFilter(policy?.resources); - const resourceTypes = getFieldValues(resources?.filter, 'RESOURCE_TYPE') || []; - const resourceEntities = getFieldValues(resources?.filter, 'RESOURCE_URN') || []; + const resourceTypes = getFieldValues(resources?.filter, 'TYPE') || []; + const resourceEntities = getFieldValues(resources?.filter, 'URN') || []; const domains = getFieldValues(resources?.filter, 'DOMAIN') || []; const { diff --git a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx index 1520388a5033a..b8e1505fceaec 100644 --- a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx +++ b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx @@ -67,8 +67,8 @@ export default function PolicyPrivilegeForm({ } = useAppConfig(); const resources: ResourceFilter = convertLegacyResourceFilter(maybeResources) || EMPTY_POLICY.resources; - const resourceTypes = getFieldValues(resources.filter, 'RESOURCE_TYPE') || []; - const resourceEntities = getFieldValues(resources.filter, 'RESOURCE_URN') || []; + const resourceTypes = getFieldValues(resources.filter, 'TYPE') || []; + const resourceEntities = getFieldValues(resources.filter, 'URN') || []; const getDisplayName = (entity) => { if (!entity) { @@ -145,10 +145,7 @@ export default function PolicyPrivilegeForm({ }; setResources({ ...resources, - filter: setFieldValues(filter, 'RESOURCE_TYPE', [ - ...resourceTypes, - createCriterionValue(selectedResourceType), - ]), + filter: setFieldValues(filter, 'TYPE', [...resourceTypes, createCriterionValue(selectedResourceType)]), }); }; @@ -160,7 +157,7 @@ export default function PolicyPrivilegeForm({ ...resources, filter: setFieldValues( filter, - 'RESOURCE_TYPE', + 'TYPE', resourceTypes?.filter((criterionValue) => criterionValue.value !== deselectedResourceType), ), }); @@ -173,7 +170,7 @@ export default function PolicyPrivilegeForm({ }; setResources({ ...resources, - filter: setFieldValues(filter, 'RESOURCE_URN', [ + filter: setFieldValues(filter, 'URN', [ ...resourceEntities, createCriterionValueWithEntity( resource, @@ -192,7 +189,7 @@ export default function PolicyPrivilegeForm({ ...resources, filter: setFieldValues( filter, - 'RESOURCE_URN', + 'URN', resourceEntities?.filter((criterionValue) => criterionValue.value !== resource), ), }); diff --git a/datahub-web-react/src/app/permissions/policy/policyUtils.ts b/datahub-web-react/src/app/permissions/policy/policyUtils.ts index c7af7342f6efa..2f178fcdeb5c3 100644 --- a/datahub-web-react/src/app/permissions/policy/policyUtils.ts +++ b/datahub-web-react/src/app/permissions/policy/policyUtils.ts @@ -99,10 +99,10 @@ export const convertLegacyResourceFilter = (resourceFilter: Maybe(); if (resourceFilter.type) { - criteria.push(createCriterion('RESOURCE_TYPE', [createCriterionValue(resourceFilter.type)])); + criteria.push(createCriterion('TYPE', [createCriterionValue(resourceFilter.type)])); } if (resourceFilter.resources && resourceFilter.resources.length > 0) { - criteria.push(createCriterion('RESOURCE_URN', resourceFilter.resources.map(createCriterionValue))); + criteria.push(createCriterion('URN', resourceFilter.resources.map(createCriterionValue))); } return { filter: { diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md index e3606f2a3e48d..63aa6688d3eec 100644 --- a/docs/authorization/policies.md +++ b/docs/authorization/policies.md @@ -137,7 +137,7 @@ We currently support the following: #### Resources Resource filter defines the set of resources that the policy applies to is defined using a list of criteria. Each -criterion defines a field type (like resource_type, resource_urn, domain), a list of field values to compare, and a +criterion defines a field type (like type, urn, domain), a list of field values to compare, and a condition (like EQUALS). It essentially checks whether the field of a certain resource matches any of the input values. Note, that if there are no criteria or resource is not set, policy is applied to ALL resources. @@ -149,7 +149,7 @@ For example, the following resource filter will apply the policy to datasets, ch "filter": { "criteria": [ { - "field": "RESOURCE_TYPE", + "field": "TYPE", "condition": "EQUALS", "values": [ "dataset", @@ -175,8 +175,8 @@ Supported fields are as follows | Field Type | Description | Example | |---------------|------------------------|-------------------------| -| resource_type | Type of the resource | dataset, chart, dataJob | -| resource_urn | Urn of the resource | urn:li:dataset:... | +| type | Type of the resource | dataset, chart, dataJob | +| urn | Urn of the resource | urn:li:dataset:... | | domain | Domain of the resource | urn:li:domain:domainX | ## Managing Policies diff --git a/metadata-ingestion/tests/unit/serde/test_serde.py b/metadata-ingestion/tests/unit/serde/test_serde.py index d116f1f5473fa..d2d6a0bdda5b9 100644 --- a/metadata-ingestion/tests/unit/serde/test_serde.py +++ b/metadata-ingestion/tests/unit/serde/test_serde.py @@ -238,7 +238,7 @@ def test_missing_optional_simple() -> None: "criteria": [ { "condition": "EQUALS", - "field": "RESOURCE_TYPE", + "field": "TYPE", "values": ["notebook", "dataset", "dashboard"], } ] @@ -252,7 +252,7 @@ def test_missing_optional_simple() -> None: "criteria": [ { "condition": "EQUALS", - "field": "RESOURCE_TYPE", + "field": "TYPE", "values": ["notebook", "dataset", "dashboard"], } ] @@ -267,13 +267,13 @@ def test_missing_optional_simple() -> None: def test_missing_optional_in_union() -> None: # This one doesn't contain any optional fields and should work fine. revised_json = json.loads( - '{"lastUpdatedTimestamp":1662356745807,"actors":{"groups":[],"resourceOwners":false,"allUsers":true,"allGroups":false,"users":[]},"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"displayName":"customtest","resources":{"filter":{"criteria":[{"field":"RESOURCE_TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]},"allResources":false},"description":"","state":"ACTIVE","type":"METADATA"}' + '{"lastUpdatedTimestamp":1662356745807,"actors":{"groups":[],"resourceOwners":false,"allUsers":true,"allGroups":false,"users":[]},"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"displayName":"customtest","resources":{"filter":{"criteria":[{"field":"TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]},"allResources":false},"description":"","state":"ACTIVE","type":"METADATA"}' ) revised = models.DataHubPolicyInfoClass.from_obj(revised_json) # This one is missing the optional filters.allResources field. original_json = json.loads( - '{"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"actors":{"resourceOwners":false,"groups":[],"allGroups":false,"allUsers":true,"users":[]},"lastUpdatedTimestamp":1662356745807,"displayName":"customtest","description":"","resources":{"filter":{"criteria":[{"field":"RESOURCE_TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]}},"state":"ACTIVE","type":"METADATA"}' + '{"privileges":["EDIT_ENTITY_ASSERTIONS","EDIT_DATASET_COL_GLOSSARY_TERMS","EDIT_DATASET_COL_TAGS","EDIT_DATASET_COL_DESCRIPTION"],"actors":{"resourceOwners":false,"groups":[],"allGroups":false,"allUsers":true,"users":[]},"lastUpdatedTimestamp":1662356745807,"displayName":"customtest","description":"","resources":{"filter":{"criteria":[{"field":"TYPE","condition":"EQUALS","values":["notebook","dataset","dashboard"]}]}},"state":"ACTIVE","type":"METADATA"}' ) original = models.DataHubPolicyInfoClass.from_obj(original_json) From edb82ad91fba8a401c56b82bc4c2916a39a6a6dd Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Tue, 24 Oct 2023 18:56:14 -0400 Subject: [PATCH 17/80] docs(ingest/bigquery): Add docs for breaking change: match_fully_qualified_names (#9094) --- docs/how/updating-datahub.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 3af3b2bdda215..7d8c25b06255a 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -11,11 +11,17 @@ This file documents any backwards-incompatible changes in DataHub and assists pe by Looker and LookML source connectors. - #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details. - #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`. -- #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled. +- #8943 - The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled. This is currently enabled by default to preserve compatibility, but will be disabled by default and then removed in the future. If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup. Otherwise, we recommend soft deleting all databricks data via the DataHub CLI: `datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`. +- #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`. +This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully +qualified dataset name, i.e. `.`. If this is not the case, please +update your pattern (e.g. prepend your old dataset pattern with `.*\.` which matches the project part), +or set `match_fully_qualified_names: false` in your recipe. However, note that +setting this to `false` is deprecated and this flag will be removed entirely in a future release. ### Potential Downtime From fe18532b29e35af1cd3007e6affc102042b1af61 Mon Sep 17 00:00:00 2001 From: skrydal Date: Wed, 25 Oct 2023 00:58:56 +0200 Subject: [PATCH 18/80] docs(update): Added info on breaking change for policies (#9093) Co-authored-by: Pedro Silva --- docs/how/updating-datahub.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 7d8c25b06255a..57193ea69f2be 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -16,6 +16,39 @@ This is currently enabled by default to preserve compatibility, but will be disa If stateful ingestion is enabled, simply setting `include_metastore: false` will perform all required cleanup. Otherwise, we recommend soft deleting all databricks data via the DataHub CLI: `datahub delete --platform databricks --soft` and then reingesting with `include_metastore: false`. +- #8846 - Changed enum values in resource filters used by policies. `RESOURCE_TYPE` became `TYPE` and `RESOURCE_URN` became `URN`. +Any existing policies using these filters (i.e. defined for particular `urns` or `types` such as `dataset`) need to be upgraded +manually, for example by retrieving their respective `dataHubPolicyInfo` aspect and changing part using filter i.e. +```yaml + "resources": { + "filter": { + "criteria": [ + { + "field": "RESOURCE_TYPE", + "condition": "EQUALS", + "values": [ + "dataset" + ] + } + ] + } +``` +into +```yaml + "resources": { + "filter": { + "criteria": [ + { + "field": "TYPE", + "condition": "EQUALS", + "values": [ + "dataset" + ] + } + ] + } +``` +for example, using `datahub put` command. Policies can be also removed and re-created via UI. - #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`. This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully qualified dataset name, i.e. `.`. If this is not the case, please From ca331f58bd24187f9f0ca317216837178e9f41fa Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Wed, 25 Oct 2023 09:39:57 +0900 Subject: [PATCH 19/80] docs: add luckyorange script to head (#9080) --- docs-website/docusaurus.config.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 68ea1ebffa6c9..259ef970d818e 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -13,6 +13,13 @@ module.exports = { projectName: "datahub", // Usually your repo name. staticDirectories: ["static", "genStatic"], stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"], + scripts: [ + { + src: "https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38", + async: true, + defer: true, + }, + ], noIndex: isSaas, customFields: { isSaas: isSaas, From 9a59c452bf36d750964f6d7f78df84a8c0c5eb66 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Wed, 25 Oct 2023 09:40:28 +0900 Subject: [PATCH 20/80] design: refactor docs navbar (#8975) Co-authored-by: Jeff Merrick --- docs-website/docusaurus.config.js | 61 +++++++++---------- docs-website/src/styles/global.scss | 27 +++++--- .../DocsVersionDropdownNavbarItem.js | 4 ++ .../src/theme/NavbarItem/styles.module.scss | 8 +++ 4 files changed, 59 insertions(+), 41 deletions(-) create mode 100644 docs-website/src/theme/NavbarItem/styles.module.scss diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 259ef970d818e..506e263933394 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -57,44 +57,41 @@ module.exports = { position: "right", }, { - to: "https://demo.datahubproject.io/", - label: "Demo", - position: "right", - }, - { - href: "https://blog.datahubproject.io/", - label: "Blog", - position: "right", - }, - { - href: "https://feature-requests.datahubproject.io/roadmap", - label: "Roadmap", + type: "dropdown", + label: "Resources", position: "right", + items: [ + { + href: "https://demo.datahubproject.io/", + label: "Demo", + }, + { + href: "https://blog.datahubproject.io/", + label: "Blog", + }, + { + href: "https://feature-requests.datahubproject.io/roadmap", + label: "Roadmap", + }, + { + href: "https://slack.datahubproject.io", + label: "Slack", + }, + { + href: "https://github.com/datahub-project/datahub", + label: "GitHub", + }, + { + href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w", + label: "YouTube", + }, + ], }, { type: "docsVersionDropdown", - position: "right", + position: "left", dropdownActiveClassDisabled: true, }, - { - href: "https://slack.datahubproject.io", - "aria-label": "Slack", - position: "right", - className: "item__icon item__slack", - }, - { - href: "https://github.com/datahub-project/datahub", - "aria-label": "GitHub", - position: "right", - className: "item__icon item__github", - }, - - { - href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w", - "aria-label": "YouTube", - position: "right", - className: "item__icon item__youtube", - }, ], }, footer: { diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss index 55a54876b41ac..16e3893ed08b7 100644 --- a/docs-website/src/styles/global.scss +++ b/docs-website/src/styles/global.scss @@ -144,20 +144,29 @@ div[class^="announcementBar"] { /** Navbar */ -@media only screen and (max-width: 1050px) { - .navbar__toggle { - display: inherit; - } - .navbar__item { - display: none; - } -} - .navbar { .navbar__logo { height: 3rem; } + + .navbar__link { + align-items: center; + margin: 0 1rem 0; + padding: 0; + border-bottom: 2px solid transparent; + } + + .dropdown > .navbar__link:after { + top: -1px; + border-width: 0.3em 0.3em 0; + margin-left: 0.4em; + } + + .navbar__link--active { + border-bottom-color: var(--ifm-navbar-link-hover-color); + } .navbar__item { + padding: 0.25rem 0; svg[class*="iconExternalLink"] { display: none; } diff --git a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js index cc04ab23d3cf3..661d64392e67f 100644 --- a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js +++ b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js @@ -6,6 +6,9 @@ import { translate } from "@docusaurus/Translate"; import { useLocation } from "@docusaurus/router"; import DefaultNavbarItem from "@theme/NavbarItem/DefaultNavbarItem"; import DropdownNavbarItem from "@theme/NavbarItem/DropdownNavbarItem"; + +import styles from "./styles.module.scss"; + const getVersionMainDoc = (version) => version.docs.find((doc) => doc.id === version.mainDocId); export default function DocsVersionDropdownNavbarItem({ mobile, @@ -60,6 +63,7 @@ export default function DocsVersionDropdownNavbarItem({ return ( Date: Tue, 24 Oct 2023 19:59:42 -0700 Subject: [PATCH 21/80] fix(ingest): update athena type mapping (#9061) --- .../src/datahub/ingestion/source/sql/athena.py | 4 +++- .../src/datahub/ingestion/source/sql/sql_common.py | 5 +---- .../src/datahub/ingestion/source/sql/sql_types.py | 5 ++--- .../datahub/utilities/sqlalchemy_type_converter.py | 13 ++++++++++--- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 06b9ad92677a2..75e8fe1d6f7a6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -37,7 +37,7 @@ gen_database_key, ) from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField -from datahub.metadata.schema_classes import RecordTypeClass +from datahub.metadata.schema_classes import MapTypeClass, RecordTypeClass from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column from datahub.utilities.sqlalchemy_type_converter import ( MapType, @@ -46,7 +46,9 @@ logger = logging.getLogger(__name__) +assert STRUCT, "required type modules are not available" register_custom_type(STRUCT, RecordTypeClass) +register_custom_type(MapType, MapTypeClass) class CustomAthenaRestDialect(AthenaRestDialect): diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index be03858ec3ef9..fad9b9e8018a5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -80,7 +80,6 @@ DatasetLineageTypeClass, DatasetPropertiesClass, GlobalTagsClass, - MapTypeClass, SubTypesClass, TagAssociationClass, UpstreamClass, @@ -90,7 +89,6 @@ from datahub.utilities.lossy_collections import LossyList from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport -from datahub.utilities.sqlalchemy_type_converter import MapType if TYPE_CHECKING: from datahub.ingestion.source.ge_data_profiler import ( @@ -140,6 +138,7 @@ class SqlWorkUnit(MetadataWorkUnit): _field_type_mapping: Dict[Type[TypeEngine], Type] = { + # Note: to add dialect-specific types to this mapping, use the `register_custom_type` function. types.Integer: NumberTypeClass, types.Numeric: NumberTypeClass, types.Boolean: BooleanTypeClass, @@ -156,8 +155,6 @@ class SqlWorkUnit(MetadataWorkUnit): types.DATETIME: TimeTypeClass, types.TIMESTAMP: TimeTypeClass, types.JSON: RecordTypeClass, - # additional type definitions that are used by the Athena source - MapType: MapTypeClass, # type: ignore # Because the postgresql dialect is used internally by many other dialects, # we add some postgres types here. This is ok to do because the postgresql # dialect is built-in to sqlalchemy. diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index ae47623188f42..3b4a7e1dc0287 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -7,7 +7,7 @@ BytesType, DateType, EnumType, - MapType as MapTypeAvro, + MapType, NullType, NumberType, RecordType, @@ -15,7 +15,6 @@ TimeType, UnionType, ) -from datahub.utilities.sqlalchemy_type_converter import MapType # these can be obtained by running `select format_type(oid, null),* from pg_type;` # we've omitted the types without a meaningful DataHub type (e.g. postgres-specific types, index vectors, etc.) @@ -364,7 +363,7 @@ def resolve_vertica_modified_type(type_string: str) -> Any: "time": TimeType, "timestamp": TimeType, "row": RecordType, - "map": MapTypeAvro, + "map": MapType, "array": ArrayType, } diff --git a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py index 1d5ec5dae3519..5d2fc6872c7bd 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py +++ b/metadata-ingestion/src/datahub/utilities/sqlalchemy_type_converter.py @@ -4,7 +4,6 @@ from typing import Any, Dict, List, Optional, Type, Union from sqlalchemy import types -from sqlalchemy_bigquery import STRUCT from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField @@ -12,6 +11,12 @@ logger = logging.getLogger(__name__) +try: + # This is used for both BigQuery and Athena. + from sqlalchemy_bigquery import STRUCT +except ImportError: + STRUCT = None + class MapType(types.TupleType): # Wrapper class around SQLalchemy's TupleType to increase compatibility with DataHub @@ -42,7 +47,9 @@ def get_avro_type( ) -> Dict[str, Any]: """Determines the concrete AVRO schema type for a SQLalchemy-typed column""" - if type(column_type) in cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys(): + if isinstance( + column_type, tuple(cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE.keys()) + ): return { "type": cls.PRIMITIVE_SQL_ALCHEMY_TYPE_TO_AVRO_TYPE[type(column_type)], "native_data_type": str(column_type), @@ -88,7 +95,7 @@ def get_avro_type( "key_type": cls.get_avro_type(column_type=key_type, nullable=nullable), "key_native_data_type": str(key_type), } - if isinstance(column_type, STRUCT): + if STRUCT and isinstance(column_type, STRUCT): fields = [] for field_def in column_type._STRUCT_fields: field_name, field_type = field_def From 2d1584b12fe4a40a077457e618f0937132763586 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Tue, 24 Oct 2023 23:08:24 -0400 Subject: [PATCH 22/80] feat(ingest/datahub-source): Allow ingesting aspects from the entitiesV2 API (#9089) --- .../ingestion/source/datahub/config.py | 19 ++++++- .../source/datahub/datahub_api_reader.py | 49 +++++++++++++++++++ .../source/datahub/datahub_source.py | 16 ++++++ 3 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py index 053d136305527..83958dc76754f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/config.py @@ -1,3 +1,4 @@ +import os from typing import Optional from pydantic import Field, root_validator @@ -67,9 +68,25 @@ class DataHubSourceConfig(StatefulIngestionConfigBase): ), ) + pull_from_datahub_api: bool = Field( + default=False, + description="Use the DataHub API to fetch versioned aspects.", + hidden_from_docs=True, + ) + + max_workers: int = Field( + default=5 * (os.cpu_count() or 4), + description="Number of worker threads to use for datahub api ingestion.", + hidden_from_docs=True, + ) + @root_validator def check_ingesting_data(cls, values): - if not values.get("database_connection") and not values.get("kafka_connection"): + if ( + not values.get("database_connection") + and not values.get("kafka_connection") + and not values.get("pull_from_datahub_api") + ): raise ValueError( "Your current config will not ingest any data." " Please specify at least one of `database_connection` or `kafka_connection`, ideally both." diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py new file mode 100644 index 0000000000000..7ee36736723b2 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_api_reader.py @@ -0,0 +1,49 @@ +import logging +from concurrent import futures +from typing import Dict, Iterable, List + +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.graph.filters import RemovedStatusFilter +from datahub.ingestion.source.datahub.config import DataHubSourceConfig +from datahub.ingestion.source.datahub.report import DataHubSourceReport +from datahub.metadata._schema_classes import _Aspect + +logger = logging.getLogger(__name__) + +# Should work for at least mysql, mariadb, postgres +DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f" + + +class DataHubApiReader: + def __init__( + self, + config: DataHubSourceConfig, + report: DataHubSourceReport, + graph: DataHubGraph, + ): + self.config = config + self.report = report + self.graph = graph + + def get_aspects(self) -> Iterable[MetadataChangeProposalWrapper]: + urns = self.graph.get_urns_by_filter( + status=RemovedStatusFilter.ALL, + batch_size=self.config.database_query_batch_size, + ) + tasks: List[futures.Future[Iterable[MetadataChangeProposalWrapper]]] = [] + with futures.ThreadPoolExecutor( + max_workers=self.config.max_workers + ) as executor: + for urn in urns: + tasks.append(executor.submit(self._get_aspects_for_urn, urn)) + for task in futures.as_completed(tasks): + yield from task.result() + + def _get_aspects_for_urn(self, urn: str) -> Iterable[MetadataChangeProposalWrapper]: + aspects: Dict[str, _Aspect] = self.graph.get_entity_semityped(urn) # type: ignore + for aspect in aspects.values(): + yield MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=aspect, + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py index 2368febe1ff57..a2f43b8cc62cb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/datahub/datahub_source.py @@ -15,6 +15,7 @@ from datahub.ingestion.api.source_helpers import auto_workunit_reporter from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.datahub.config import DataHubSourceConfig +from datahub.ingestion.source.datahub.datahub_api_reader import DataHubApiReader from datahub.ingestion.source.datahub.datahub_database_reader import ( DataHubDatabaseReader, ) @@ -58,6 +59,9 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: logger.info(f"Ingesting DataHub metadata up until {self.report.stop_time}") state = self.stateful_ingestion_handler.get_last_run_state() + if self.config.pull_from_datahub_api: + yield from self._get_api_workunits() + if self.config.database_connection is not None: yield from self._get_database_workunits( from_createdon=state.database_createdon_datetime @@ -139,6 +143,18 @@ def _get_kafka_workunits( ) self._commit_progress(i) + def _get_api_workunits(self) -> Iterable[MetadataWorkUnit]: + if self.ctx.graph is None: + self.report.report_failure( + "datahub_api", + "Specify datahub_api on your ingestion recipe to ingest from the DataHub API", + ) + return + + reader = DataHubApiReader(self.config, self.report, self.ctx.graph) + for mcp in reader.get_aspects(): + yield mcp.as_workunit() + def _commit_progress(self, i: Optional[int] = None) -> None: """Commit progress to stateful storage, if there have been no errors. From b612545220d9329696eaa26d6b42439cdf01fb95 Mon Sep 17 00:00:00 2001 From: siddiquebagwan-gslab Date: Wed, 25 Oct 2023 15:26:06 +0530 Subject: [PATCH 23/80] feat(ingestion/redshift): support auto_incremental_lineage (#9010) --- docs/how/updating-datahub.md | 2 ++ metadata-ingestion/setup.py | 10 +++------- .../datahub/ingestion/source/redshift/config.py | 8 +++++++- .../datahub/ingestion/source/redshift/redshift.py | 15 +++++++++++++-- .../tests/unit/test_redshift_config.py | 6 ++++++ 5 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 metadata-ingestion/tests/unit/test_redshift_config.py diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 57193ea69f2be..8813afee65be9 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -4,6 +4,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next +- #9010 - In Redshift source's config `incremental_lineage` is set default to off. + ### Breaking Changes - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now. diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 417588a433655..72b0e776a0da5 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -355,13 +355,9 @@ | {"psycopg2-binary", "pymysql>=1.0.2"}, "pulsar": {"requests"}, "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib, - "redshift": sql_common - | redshift_common - | usage_common - | sqlglot_lib - | {"redshift-connector"}, - "redshift-legacy": sql_common | redshift_common, - "redshift-usage-legacy": sql_common | usage_common | redshift_common, + "redshift": sql_common | redshift_common | usage_common | {"redshift-connector"} | sqlglot_lib, + "redshift-legacy": sql_common | redshift_common | sqlglot_lib, + "redshift-usage-legacy": sql_common | redshift_common | sqlglot_lib | usage_common, "s3": {*s3_base, *data_lake_profiling}, "gcs": {*s3_base, *data_lake_profiling}, "sagemaker": aws_common, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py index 2789b800940db..79b044841e054 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py @@ -133,7 +133,13 @@ class RedshiftConfig( ) extract_column_level_lineage: bool = Field( - default=True, description="Whether to extract column level lineage." + default=True, + description="Whether to extract column level lineage. This config works with rest-sink only.", + ) + + incremental_lineage: bool = Field( + default=False, + description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run. This config works with rest-sink only.", ) @root_validator(pre=True) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index a1b6333a3775d..26237a6ce12e0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -1,5 +1,6 @@ import logging from collections import defaultdict +from functools import partial from typing import Dict, Iterable, List, Optional, Type, Union import humanfriendly @@ -25,6 +26,7 @@ platform_name, support_status, ) +from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage from datahub.ingestion.api.source import ( CapabilityReport, MetadataWorkUnitProcessor, @@ -369,6 +371,11 @@ def gen_database_container(self, database: str) -> Iterable[MetadataWorkUnit]: def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), + partial( + auto_incremental_lineage, + self.ctx.graph, + self.config.incremental_lineage, + ), StaleEntityRemovalHandler.create( self, self.config, self.ctx ).workunit_processor, @@ -942,7 +949,9 @@ def generate_lineage(self, database: str) -> Iterable[MetadataWorkUnit]: ) if lineage_info: yield from gen_lineage( - dataset_urn, lineage_info, self.config.incremental_lineage + dataset_urn, + lineage_info, + incremental_lineage=False, # incremental lineage generation is taken care by auto_incremental_lineage ) for schema in self.db_views[database]: @@ -956,7 +965,9 @@ def generate_lineage(self, database: str) -> Iterable[MetadataWorkUnit]: ) if lineage_info: yield from gen_lineage( - dataset_urn, lineage_info, self.config.incremental_lineage + dataset_urn, + lineage_info, + incremental_lineage=False, # incremental lineage generation is taken care by auto_incremental_lineage ) def add_config_to_report(self): diff --git a/metadata-ingestion/tests/unit/test_redshift_config.py b/metadata-ingestion/tests/unit/test_redshift_config.py new file mode 100644 index 0000000000000..8a165e7f5f3fe --- /dev/null +++ b/metadata-ingestion/tests/unit/test_redshift_config.py @@ -0,0 +1,6 @@ +from datahub.ingestion.source.redshift.config import RedshiftConfig + + +def test_incremental_lineage_default_to_false(): + config = RedshiftConfig(host_port="localhost:5439", database="test") + assert config.incremental_lineage is False From 9cccd22c04bf357b574f4d9d7dae3aee633bf7d3 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Wed, 25 Oct 2023 11:01:49 +0100 Subject: [PATCH 24/80] feat(auth): Add backwards compatible field resolver (#9096) --- .../com/datahub/authorization/EntityFieldType.java | 13 +++++++++++++ .../authorization/DefaultEntitySpecResolver.java | 13 +++++++------ .../DataPlatformInstanceFieldResolverProvider.java | 10 +++++----- .../DomainFieldResolverProvider.java | 5 +++-- .../EntityFieldResolverProvider.java | 6 ++++-- .../EntityTypeFieldResolverProvider.java | 7 +++++-- .../EntityUrnFieldResolverProvider.java | 7 +++++-- .../GroupMembershipFieldResolverProvider.java | 5 +++-- .../OwnerFieldResolverProvider.java | 5 +++-- ...taPlatformInstanceFieldResolverProviderTest.java | 2 +- .../GroupMembershipFieldResolverProviderTest.java | 2 +- 11 files changed, 50 insertions(+), 25 deletions(-) diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java index 46763f29a7040..1258d958f2092 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/EntityFieldType.java @@ -4,6 +4,19 @@ * List of entity field types to fetch for a given entity */ public enum EntityFieldType { + + /** + * Type of the entity (e.g. dataset, chart) + * @deprecated + */ + @Deprecated + RESOURCE_URN, + /** + * Urn of the entity + * @deprecated + */ + @Deprecated + RESOURCE_TYPE, /** * Type of the entity (e.g. dataset, chart) */ diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java index 4ad14ed59c9c0..65b0329a9c4f2 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DefaultEntitySpecResolver.java @@ -1,15 +1,16 @@ package com.datahub.authorization; -import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider; -import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider; -import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider; import com.datahub.authentication.Authentication; +import com.datahub.authorization.fieldresolverprovider.DataPlatformInstanceFieldResolverProvider; import com.datahub.authorization.fieldresolverprovider.DomainFieldResolverProvider; -import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider; import com.datahub.authorization.fieldresolverprovider.EntityFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.EntityTypeFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.EntityUrnFieldResolverProvider; import com.datahub.authorization.fieldresolverprovider.GroupMembershipFieldResolverProvider; +import com.datahub.authorization.fieldresolverprovider.OwnerFieldResolverProvider; import com.google.common.collect.ImmutableList; import com.linkedin.entity.client.EntityClient; +import com.linkedin.util.Pair; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -34,7 +35,7 @@ public ResolvedEntitySpec resolve(EntitySpec entitySpec) { private Map getFieldResolvers(EntitySpec entitySpec) { return _entityFieldResolverProviders.stream() - .collect(Collectors.toMap(EntityFieldResolverProvider::getFieldType, - hydrator -> hydrator.getFieldResolver(entitySpec))); + .flatMap(resolver -> resolver.getFieldTypes().stream().map(fieldType -> Pair.of(fieldType, resolver))) + .collect(Collectors.toMap(Pair::getKey, pair -> pair.getValue().getFieldResolver(entitySpec))); } } diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java index 27cb8fcee8138..cbb237654e969 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProvider.java @@ -1,8 +1,5 @@ package com.datahub.authorization.fieldresolverprovider; -import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME; - import com.datahub.authentication.Authentication; import com.datahub.authorization.EntityFieldType; import com.datahub.authorization.EntitySpec; @@ -14,10 +11,13 @@ import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.client.EntityClient; import java.util.Collections; +import java.util.List; import java.util.Objects; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import static com.linkedin.metadata.Constants.*; + /** * Provides field resolver for domain given resourceSpec */ @@ -29,8 +29,8 @@ public class DataPlatformInstanceFieldResolverProvider implements EntityFieldRes private final Authentication _systemAuthentication; @Override - public EntityFieldType getFieldType() { - return EntityFieldType.DATA_PLATFORM_INSTANCE; + public List getFieldTypes() { + return Collections.singletonList(EntityFieldType.DATA_PLATFORM_INSTANCE); } @Override diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java index 25c2165f02b94..15d821b75c0bd 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/DomainFieldResolverProvider.java @@ -14,6 +14,7 @@ import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -37,8 +38,8 @@ public class DomainFieldResolverProvider implements EntityFieldResolverProvider private final Authentication _systemAuthentication; @Override - public EntityFieldType getFieldType() { - return EntityFieldType.DOMAIN; + public List getFieldTypes() { + return Collections.singletonList(EntityFieldType.DOMAIN); } @Override diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java index a76db0ecb5102..227d403a9cd1d 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityFieldResolverProvider.java @@ -3,6 +3,7 @@ import com.datahub.authorization.FieldResolver; import com.datahub.authorization.EntityFieldType; import com.datahub.authorization.EntitySpec; +import java.util.List; /** @@ -11,9 +12,10 @@ public interface EntityFieldResolverProvider { /** - * Field that this hydrator is hydrating + * List of fields that this hydrator is hydrating. + * @return */ - EntityFieldType getFieldType(); + List getFieldTypes(); /** * Return resolver for fetching the field values given the entity diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java index 187f696904947..addac84c68b18 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityTypeFieldResolverProvider.java @@ -3,16 +3,19 @@ import com.datahub.authorization.FieldResolver; import com.datahub.authorization.EntityFieldType; import com.datahub.authorization.EntitySpec; +import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList; import java.util.Collections; +import java.util.List; /** * Provides field resolver for entity type given entitySpec */ public class EntityTypeFieldResolverProvider implements EntityFieldResolverProvider { + @Override - public EntityFieldType getFieldType() { - return EntityFieldType.TYPE; + public List getFieldTypes() { + return ImmutableList.of(EntityFieldType.TYPE, EntityFieldType.RESOURCE_TYPE); } @Override diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java index 2f5c4a7c6c961..32960de687839 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/EntityUrnFieldResolverProvider.java @@ -3,16 +3,19 @@ import com.datahub.authorization.FieldResolver; import com.datahub.authorization.EntityFieldType; import com.datahub.authorization.EntitySpec; +import com.datastax.oss.driver.shaded.guava.common.collect.ImmutableList; import java.util.Collections; +import java.util.List; /** * Provides field resolver for entity urn given entitySpec */ public class EntityUrnFieldResolverProvider implements EntityFieldResolverProvider { + @Override - public EntityFieldType getFieldType() { - return EntityFieldType.URN; + public List getFieldTypes() { + return ImmutableList.of(EntityFieldType.URN, EntityFieldType.RESOURCE_URN); } @Override diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java index 8db029632d7e2..b1202d9f4bbd3 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProvider.java @@ -13,6 +13,7 @@ import com.linkedin.identity.NativeGroupMembership; import com.linkedin.metadata.Constants; import com.linkedin.identity.GroupMembership; +import java.util.Collections; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -35,8 +36,8 @@ public class GroupMembershipFieldResolverProvider implements EntityFieldResolver private final Authentication _systemAuthentication; @Override - public EntityFieldType getFieldType() { - return EntityFieldType.GROUP_MEMBERSHIP; + public List getFieldTypes() { + return Collections.singletonList(EntityFieldType.GROUP_MEMBERSHIP); } @Override diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java index bdd652d1d3871..3c27f9e6ce8d7 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/fieldresolverprovider/OwnerFieldResolverProvider.java @@ -12,6 +12,7 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import java.util.Collections; +import java.util.List; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -28,8 +29,8 @@ public class OwnerFieldResolverProvider implements EntityFieldResolverProvider { private final Authentication _systemAuthentication; @Override - public EntityFieldType getFieldType() { - return EntityFieldType.OWNER; + public List getFieldTypes() { + return Collections.singletonList(EntityFieldType.OWNER); } @Override diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java index b2343bbb01509..5c7d87f1c05a9 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/DataPlatformInstanceFieldResolverProviderTest.java @@ -56,7 +56,7 @@ public void setup() { @Test public void shouldReturnDataPlatformInstanceType() { - assertEquals(EntityFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldType()); + assertEquals(EntityFieldType.DATA_PLATFORM_INSTANCE, dataPlatformInstanceFieldResolverProvider.getFieldTypes().get(0)); } @Test diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java index 54675045b4413..af547f14cd3fc 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authorization/fieldresolverprovider/GroupMembershipFieldResolverProviderTest.java @@ -53,7 +53,7 @@ public void setup() { @Test public void shouldReturnGroupsMembershipType() { - assertEquals(EntityFieldType.GROUP_MEMBERSHIP, groupMembershipFieldResolverProvider.getFieldType()); + assertEquals(EntityFieldType.GROUP_MEMBERSHIP, groupMembershipFieldResolverProvider.getFieldTypes().get(0)); } @Test From dd5d997390d489c777aac25dbbd3f47c4bab8340 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Wed, 25 Oct 2023 10:54:55 -0400 Subject: [PATCH 25/80] build(gradle): Support IntelliJ 2023.2.3 (#9034) --- metadata-models/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/metadata-models/build.gradle b/metadata-models/build.gradle index 53e7765152aef..bd8052283e168 100644 --- a/metadata-models/build.gradle +++ b/metadata-models/build.gradle @@ -23,6 +23,7 @@ dependencies { } } api project(':li-utils') + api project(path: ':li-utils', configuration: "dataTemplate") dataModel project(':li-utils') compileOnly externalDependency.lombok From 8a80e858a7b6bf67105e082475ada57a27c37c67 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Wed, 25 Oct 2023 13:06:12 -0400 Subject: [PATCH 26/80] build(ingest): Bump avro pin: security vulnerability (#9042) --- metadata-ingestion/scripts/avro_codegen.py | 3 +- metadata-ingestion/scripts/modeldocgen.py | 4 +- metadata-ingestion/setup.py | 8 +- .../ingestion/extractor/schema_util.py | 109 ++++++++++++------ .../src/datahub/ingestion/source/kafka.py | 19 ++- .../src/datahub/utilities/mapping.py | 4 +- .../unit/data_lake/test_schema_inference.py | 6 +- 7 files changed, 99 insertions(+), 54 deletions(-) diff --git a/metadata-ingestion/scripts/avro_codegen.py b/metadata-ingestion/scripts/avro_codegen.py index a9b9b4b20f5ac..021ebd4a31eb3 100644 --- a/metadata-ingestion/scripts/avro_codegen.py +++ b/metadata-ingestion/scripts/avro_codegen.py @@ -152,7 +152,8 @@ def add_name(self, name_attr, space_attr, new_schema): return encoded -autogen_header = """# flake8: noqa +autogen_header = """# mypy: ignore-errors +# flake8: noqa # This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py # Do not modify manually! diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index ffa80515dbafd..81b26145e620c 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -351,8 +351,8 @@ def strip_types(field_path: str) -> str: field_objects = [] for f in entity_fields: field = avro.schema.Field( - type=f["type"], - name=f["name"], + f["type"], + f["name"], has_default=False, ) field_objects.append(field) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 72b0e776a0da5..0b8661b0df5f5 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -32,7 +32,7 @@ "expandvars>=0.6.5", "avro-gen3==0.7.11", # "avro-gen3 @ git+https://github.com/acryldata/avro_gen@master#egg=avro-gen3", - "avro>=1.10.2,<1.11", + "avro>=1.11.3,<1.12", "python-dateutil>=2.8.0", "tabulate", "progressbar2", @@ -355,7 +355,11 @@ | {"psycopg2-binary", "pymysql>=1.0.2"}, "pulsar": {"requests"}, "redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib, - "redshift": sql_common | redshift_common | usage_common | {"redshift-connector"} | sqlglot_lib, + "redshift": sql_common + | redshift_common + | usage_common + | {"redshift-connector"} + | sqlglot_lib, "redshift-legacy": sql_common | redshift_common | sqlglot_lib, "redshift-usage-legacy": sql_common | redshift_common | sqlglot_lib | usage_common, "s3": {*s3_base, *data_lake_profiling}, diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py index 4acf99a50e50e..df0b732833fbe 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py @@ -1,6 +1,18 @@ import json import logging -from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Mapping, + Optional, + Type, + Union, + cast, + overload, +) import avro.schema @@ -54,6 +66,8 @@ avro.schema.PrimitiveSchema, ] +SchemaOrField = Union[avro.schema.Schema, avro.schema.Field] + FieldStack = List[avro.schema.Field] # The latest avro code contains this type definition in a compatibility module, @@ -124,16 +138,22 @@ def __init__( self._meta_mapping_processor = meta_mapping_processor self._schema_tags_field = schema_tags_field self._tag_prefix = tag_prefix + # Map of avro schema type to the conversion handler - self._avro_type_to_mce_converter_map: Dict[ - avro.schema.Schema, - Callable[[ExtendedAvroNestedSchemas], Generator[SchemaField, None, None]], + # TODO: Clean up this type... perhaps refactor + self._avro_type_to_mce_converter_map: Mapping[ + Union[ + Type[avro.schema.Schema], + Type[avro.schema.Field], + Type[avro.schema.LogicalSchema], + ], + Callable[[SchemaOrField], Iterable[SchemaField]], ] = { avro.schema.RecordSchema: self._gen_from_non_field_nested_schemas, avro.schema.UnionSchema: self._gen_from_non_field_nested_schemas, avro.schema.ArraySchema: self._gen_from_non_field_nested_schemas, avro.schema.MapSchema: self._gen_from_non_field_nested_schemas, - avro.schema.Field: self._gen_nested_schema_from_field, + avro.schema.Field: self._gen_nested_schema_from_field, # type: ignore avro.schema.PrimitiveSchema: self._gen_non_nested_to_mce_fields, avro.schema.FixedSchema: self._gen_non_nested_to_mce_fields, avro.schema.EnumSchema: self._gen_non_nested_to_mce_fields, @@ -142,20 +162,22 @@ def __init__( @staticmethod def _get_type_name( - avro_schema: avro.schema.Schema, logical_if_present: bool = False + avro_schema: SchemaOrField, logical_if_present: bool = False ) -> str: logical_type_name: Optional[str] = None if logical_if_present: - logical_type_name = getattr( - avro_schema, "logical_type", None - ) or avro_schema.props.get("logicalType") + logical_type_name = cast( + Optional[str], + getattr(avro_schema, "logical_type", None) + or avro_schema.props.get("logicalType"), + ) return logical_type_name or str( getattr(avro_schema.type, "type", avro_schema.type) ) @staticmethod def _get_column_type( - avro_schema: avro.schema.Schema, logical_type: Optional[str] + avro_schema: SchemaOrField, logical_type: Optional[str] ) -> SchemaFieldDataType: type_name: str = AvroToMceSchemaConverter._get_type_name(avro_schema) TypeClass: Optional[Type] = AvroToMceSchemaConverter.field_type_mapping.get( @@ -186,7 +208,7 @@ def _get_column_type( ) return dt - def _is_nullable(self, schema: avro.schema.Schema) -> bool: + def _is_nullable(self, schema: SchemaOrField) -> bool: if isinstance(schema, avro.schema.Field): return self._is_nullable(schema.type) if isinstance(schema, avro.schema.UnionSchema): @@ -208,7 +230,7 @@ def _strip_namespace(name_or_fullname: str) -> str: return name_or_fullname.rsplit(".", maxsplit=1)[-1] @staticmethod - def _get_simple_native_type(schema: ExtendedAvroNestedSchemas) -> str: + def _get_simple_native_type(schema: SchemaOrField) -> str: if isinstance(schema, (avro.schema.RecordSchema, avro.schema.Field)): # For Records, fields, always return the name. return AvroToMceSchemaConverter._strip_namespace(schema.name) @@ -226,7 +248,7 @@ def _get_simple_native_type(schema: ExtendedAvroNestedSchemas) -> str: return schema.type @staticmethod - def _get_type_annotation(schema: ExtendedAvroNestedSchemas) -> str: + def _get_type_annotation(schema: SchemaOrField) -> str: simple_native_type = AvroToMceSchemaConverter._get_simple_native_type(schema) if simple_native_type.startswith("__struct_"): simple_native_type = "struct" @@ -237,10 +259,24 @@ def _get_type_annotation(schema: ExtendedAvroNestedSchemas) -> str: else: return f"[type={simple_native_type}]" + @staticmethod + @overload + def _get_underlying_type_if_option_as_union( + schema: SchemaOrField, default: SchemaOrField + ) -> SchemaOrField: + ... + + @staticmethod + @overload + def _get_underlying_type_if_option_as_union( + schema: SchemaOrField, default: Optional[SchemaOrField] = None + ) -> Optional[SchemaOrField]: + ... + @staticmethod def _get_underlying_type_if_option_as_union( - schema: AvroNestedSchemas, default: Optional[AvroNestedSchemas] = None - ) -> AvroNestedSchemas: + schema: SchemaOrField, default: Optional[SchemaOrField] = None + ) -> Optional[SchemaOrField]: if isinstance(schema, avro.schema.UnionSchema) and len(schema.schemas) == 2: (first, second) = schema.schemas if first.type == AVRO_TYPE_NULL: @@ -258,8 +294,8 @@ class SchemaFieldEmissionContextManager: def __init__( self, - schema: avro.schema.Schema, - actual_schema: avro.schema.Schema, + schema: SchemaOrField, + actual_schema: SchemaOrField, converter: "AvroToMceSchemaConverter", description: Optional[str] = None, default_value: Optional[str] = None, @@ -275,7 +311,7 @@ def __enter__(self): self._converter._prefix_name_stack.append(type_annotation) return self - def emit(self) -> Generator[SchemaField, None, None]: + def emit(self) -> Iterable[SchemaField]: if ( not isinstance( self._actual_schema, @@ -307,7 +343,7 @@ def emit(self) -> Generator[SchemaField, None, None]: description = self._description if not description and actual_schema.props.get("doc"): - description = actual_schema.props.get("doc") + description = cast(Optional[str], actual_schema.props.get("doc")) if self._default_value is not None: description = f"{description if description else ''}\nField default value: {self._default_value}" @@ -320,12 +356,12 @@ def emit(self) -> Generator[SchemaField, None, None]: native_data_type = native_data_type[ slice(len(type_prefix), len(native_data_type) - 1) ] - native_data_type = actual_schema.props.get( - "native_data_type", native_data_type + native_data_type = cast( + str, actual_schema.props.get("native_data_type", native_data_type) ) field_path = self._converter._get_cur_field_path() - merged_props = {} + merged_props: Dict[str, Any] = {} merged_props.update(self._schema.other_props) merged_props.update(schema.other_props) @@ -363,12 +399,13 @@ def emit(self) -> Generator[SchemaField, None, None]: meta_terms_aspect = meta_aspects.get(Constants.ADD_TERM_OPERATION) - logical_type_name: Optional[str] = ( + logical_type_name: Optional[str] = cast( + Optional[str], # logicalType nested inside type getattr(actual_schema, "logical_type", None) or actual_schema.props.get("logicalType") # bare logicalType - or self._actual_schema.props.get("logicalType") + or self._actual_schema.props.get("logicalType"), ) field = SchemaField( @@ -392,14 +429,12 @@ def emit(self) -> Generator[SchemaField, None, None]: def __exit__(self, exc_type, exc_val, exc_tb): self._converter._prefix_name_stack.pop() - def _get_sub_schemas( - self, schema: ExtendedAvroNestedSchemas - ) -> Generator[avro.schema.Schema, None, None]: + def _get_sub_schemas(self, schema: SchemaOrField) -> Iterable[SchemaOrField]: """Responsible for generation for appropriate sub-schemas for every nested AVRO type.""" def gen_items_from_list_tuple_or_scalar( val: Any, - ) -> Generator[avro.schema.Schema, None, None]: + ) -> Iterable[avro.schema.Schema]: if isinstance(val, (list, tuple)): for i in val: yield i @@ -433,7 +468,7 @@ def gen_items_from_list_tuple_or_scalar( def _gen_nested_schema_from_field( self, field: avro.schema.Field, - ) -> Generator[SchemaField, None, None]: + ) -> Iterable[SchemaField]: """Handles generation of MCE SchemaFields for an AVRO Field type.""" # NOTE: Here we only manage the field stack and trigger MCE Field generation from this field's type. # The actual emitting of a field happens when @@ -447,7 +482,7 @@ def _gen_nested_schema_from_field( def _gen_from_last_field( self, schema_to_recurse: Optional[AvroNestedSchemas] = None - ) -> Generator[SchemaField, None, None]: + ) -> Iterable[SchemaField]: """Emits the field most-recent field, optionally triggering sub-schema generation under the field.""" last_field_schema = self._fields_stack[-1] # Generate the custom-description for the field. @@ -467,8 +502,8 @@ def _gen_from_last_field( yield from self._to_mce_fields(sub_schema) def _gen_from_non_field_nested_schemas( - self, schema: AvroNestedSchemas - ) -> Generator[SchemaField, None, None]: + self, schema: SchemaOrField + ) -> Iterable[SchemaField]: """Handles generation of MCE SchemaFields for all standard AVRO nested types.""" # Handle recursive record definitions recurse: bool = True @@ -511,8 +546,8 @@ def _gen_from_non_field_nested_schemas( yield from self._to_mce_fields(sub_schema) def _gen_non_nested_to_mce_fields( - self, schema: AvroNonNestedSchemas - ) -> Generator[SchemaField, None, None]: + self, schema: SchemaOrField + ) -> Iterable[SchemaField]: """Handles generation of MCE SchemaFields for non-nested AVRO types.""" with AvroToMceSchemaConverter.SchemaFieldEmissionContextManager( schema, @@ -521,9 +556,7 @@ def _gen_non_nested_to_mce_fields( ) as non_nested_emitter: yield from non_nested_emitter.emit() - def _to_mce_fields( - self, avro_schema: avro.schema.Schema - ) -> Generator[SchemaField, None, None]: + def _to_mce_fields(self, avro_schema: SchemaOrField) -> Iterable[SchemaField]: # Invoke the relevant conversion handler for the schema element type. schema_type = ( type(avro_schema) @@ -541,7 +574,7 @@ def to_mce_fields( meta_mapping_processor: Optional[OperationProcessor] = None, schema_tags_field: Optional[str] = None, tag_prefix: Optional[str] = None, - ) -> Generator[SchemaField, None, None]: + ) -> Iterable[SchemaField]: """ Converts a key or value type AVRO schema string to appropriate MCE SchemaFields. :param avro_schema_string: String representation of the AVRO schema. diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index d5039360da567..23770ff3cf812 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -3,7 +3,7 @@ import logging from dataclasses import dataclass, field from enum import Enum -from typing import Any, Dict, Iterable, List, Optional, Type +from typing import Any, Dict, Iterable, List, Optional, Type, cast import avro.schema import confluent_kafka @@ -316,13 +316,20 @@ def _extract_record( avro_schema = avro.schema.parse( schema_metadata.platformSchema.documentSchema ) - description = avro_schema.doc + description = getattr(avro_schema, "doc", None) # set the tags all_tags: List[str] = [] - for tag in avro_schema.other_props.get( - self.source_config.schema_tags_field, [] - ): - all_tags.append(self.source_config.tag_prefix + tag) + try: + schema_tags = cast( + Iterable[str], + avro_schema.other_props.get( + self.source_config.schema_tags_field, [] + ), + ) + for tag in schema_tags: + all_tags.append(self.source_config.tag_prefix + tag) + except TypeError: + pass if self.source_config.enable_meta_mapping: meta_aspects = self.meta_processor.process(avro_schema.other_props) diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py index eb2d975ee607f..f91c01d901ac1 100644 --- a/metadata-ingestion/src/datahub/utilities/mapping.py +++ b/metadata-ingestion/src/datahub/utilities/mapping.py @@ -4,7 +4,7 @@ import re import time from functools import reduce -from typing import Any, Dict, List, Match, Optional, Union, cast +from typing import Any, Dict, List, Mapping, Match, Optional, Union, cast from datahub.emitter import mce_builder from datahub.emitter.mce_builder import OwnerType @@ -111,7 +111,7 @@ def __init__( self.owner_source_type = owner_source_type self.match_nested_props = match_nested_props - def process(self, raw_props: Dict[str, Any]) -> Dict[str, Any]: + def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]: # Defining the following local variables - # operations_map - the final resulting map when operations are processed. # Against each operation the values to be applied are stored. diff --git a/metadata-ingestion/tests/unit/data_lake/test_schema_inference.py b/metadata-ingestion/tests/unit/data_lake/test_schema_inference.py index cbd5be9e7d832..4a69deb572fbd 100644 --- a/metadata-ingestion/tests/unit/data_lake/test_schema_inference.py +++ b/metadata-ingestion/tests/unit/data_lake/test_schema_inference.py @@ -1,14 +1,14 @@ import tempfile from typing import List, Type -import avro.schema import pandas as pd import ujson from avro import schema as avro_schema from avro.datafile import DataFileWriter from avro.io import DatumWriter -from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet +from datahub.ingestion.source.schema_inference import csv_tsv, json, parquet +from datahub.ingestion.source.schema_inference.avro import AvroInferrer from datahub.metadata.com.linkedin.pegasus2avro.schema import ( BooleanTypeClass, NumberTypeClass, @@ -123,7 +123,7 @@ def test_infer_schema_avro(): file.seek(0) - fields = avro.AvroInferrer().infer_schema(file) + fields = AvroInferrer().infer_schema(file) fields.sort(key=lambda x: x.fieldPath) assert_field_paths_match(fields, expected_field_paths_avro) From b9508e6dd50c5d0eaf8eddb21c5bdf55bec1646a Mon Sep 17 00:00:00 2001 From: siddiquebagwan-gslab Date: Wed, 25 Oct 2023 23:48:15 +0530 Subject: [PATCH 27/80] fix(ingestion/redshift): fix schema field data type mappings --- .../src/datahub/ingestion/source/redshift/redshift.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index 26237a6ce12e0..c7d01021773b1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -218,6 +218,9 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): ] = { "BYTES": BytesType, "BOOL": BooleanType, + "BOOLEAN": BooleanType, + "DOUBLE": NumberType, + "DOUBLE PRECISION": NumberType, "DECIMAL": NumberType, "NUMERIC": NumberType, "BIGNUMERIC": NumberType, @@ -244,6 +247,13 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): "CHARACTER": StringType, "CHAR": StringType, "TIMESTAMP WITHOUT TIME ZONE": TimeType, + "REAL": NumberType, + "VARCHAR": StringType, + "TIMESTAMPTZ": TimeType, + "GEOMETRY": NullType, + "HLLSKETCH": NullType, + "TIMETZ": TimeType, + "VARBYTE": StringType, } def get_platform_instance_id(self) -> str: From 1c77bca0c68878ca5cb86f741ca77ce0aa497272 Mon Sep 17 00:00:00 2001 From: Younghoon YUN Date: Thu, 26 Oct 2023 05:01:47 +0900 Subject: [PATCH 28/80] fix(datahub-protobuf): add check if nested field is reserved (#9058) --- .../src/main/java/datahub/protobuf/model/ProtobufField.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java index 42884241d9f7c..d890c373f1299 100644 --- a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/model/ProtobufField.java @@ -259,7 +259,9 @@ private FieldDescriptorProto getNestedTypeFields(List pathList, Descrip messageType = messageType.getNestedType(value); } - if (pathList.get(pathSize - 2) == DescriptorProto.FIELD_FIELD_NUMBER) { + if (pathList.get(pathSize - 2) == DescriptorProto.FIELD_FIELD_NUMBER + && pathList.get(pathSize - 1) != DescriptorProto.RESERVED_RANGE_FIELD_NUMBER + && pathList.get(pathSize - 1) != DescriptorProto.RESERVED_NAME_FIELD_NUMBER) { return messageType.getField(pathList.get(pathSize - 1)); } else { return null; From 32f5dcb1544e5a47efbb48d39b215d3bdc33535b Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 25 Oct 2023 13:16:49 -0700 Subject: [PATCH 29/80] fix(ingest): better handling around sink errors (#9003) --- .../src/datahub/ingestion/run/pipeline.py | 10 +++++- .../datahub/ingestion/sink/datahub_kafka.py | 33 ++++++++----------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index 07b55e0e25a89..f2735c24ca19d 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -390,7 +390,15 @@ def run(self) -> None: record_envelopes = self.extractor.get_records(wu) for record_envelope in self.transform(record_envelopes): if not self.dry_run: - self.sink.write_record_async(record_envelope, callback) + try: + self.sink.write_record_async( + record_envelope, callback + ) + except Exception as e: + # In case the sink's error handling is bad, we still want to report the error. + self.sink.report.report_failure( + f"Failed to write record: {e}" + ) except RuntimeError: raise diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py index 39054c256a7fd..38ddadaafc862 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_kafka.py @@ -9,7 +9,6 @@ MetadataChangeEvent, MetadataChangeProposal, ) -from datahub.metadata.schema_classes import MetadataChangeProposalClass class KafkaSinkConfig(KafkaEmitterConfig): @@ -58,27 +57,21 @@ def write_record_async( ], write_callback: WriteCallback, ) -> None: - record = record_envelope.record - if isinstance(record, MetadataChangeEvent): - self.emitter.emit_mce_async( + callback = _KafkaCallback( + self.report, record_envelope, write_callback + ).kafka_callback + try: + record = record_envelope.record + self.emitter.emit( record, - callback=_KafkaCallback( - self.report, record_envelope, write_callback - ).kafka_callback, - ) - elif isinstance( - record, (MetadataChangeProposalWrapper, MetadataChangeProposalClass) - ): - self.emitter.emit_mcp_async( - record, - callback=_KafkaCallback( - self.report, record_envelope, write_callback - ).kafka_callback, - ) - else: - raise ValueError( - f"The datahub-kafka sink only supports MetadataChangeEvent/MetadataChangeProposal[Wrapper] classes, not {type(record)}" + callback=callback, ) + except Exception as err: + # In case we throw an exception while trying to emit the record, + # catch it and report the failure. This might happen if the schema + # registry is down or otherwise misconfigured, in which case we'd + # fail when serializing the record. + callback(err, f"Failed to write record: {err}") def close(self) -> None: self.emitter.flush() From 6c932e8afeb4ac71c9b6b31e9fde3876c9e947cf Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Wed, 25 Oct 2023 16:17:09 -0400 Subject: [PATCH 30/80] feat(ingest/bigquery): Attempt to support raw dataset pattern (#9109) --- docs/how/updating-datahub.md | 8 +-- .../source/bigquery_v2/bigquery_config.py | 18 ++++++- .../tests/unit/test_bigquery_source.py | 53 +++++++++++++++++++ 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 8813afee65be9..4d1535f28fa0a 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -53,10 +53,10 @@ into for example, using `datahub put` command. Policies can be also removed and re-created via UI. - #9077 - The BigQuery ingestion source by default sets `match_fully_qualified_names: true`. This means that any `dataset_pattern` or `schema_pattern` specified will be matched on the fully -qualified dataset name, i.e. `.`. If this is not the case, please -update your pattern (e.g. prepend your old dataset pattern with `.*\.` which matches the project part), -or set `match_fully_qualified_names: false` in your recipe. However, note that -setting this to `false` is deprecated and this flag will be removed entirely in a future release. +qualified dataset name, i.e. `.`. We attempt to support the old +pattern format by prepending `.*\\.` to dataset patterns lacking a period, so in most cases this +should not cause any issues. However, if you have a complex dataset pattern, we recommend you +manually convert it to the fully qualified format to avoid any potential issues. ### Potential Downtime diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index a6a740385cf5c..6203192769750 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -299,7 +299,7 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict: "use project_id_pattern whenever possible. project_id will be deprecated, please use project_id_pattern only if possible." ) - dataset_pattern = values.get("dataset_pattern") + dataset_pattern: Optional[AllowDenyPattern] = values.get("dataset_pattern") schema_pattern = values.get("schema_pattern") if ( dataset_pattern == AllowDenyPattern.allow_all() @@ -329,6 +329,22 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict: "Please update `dataset_pattern` to match against fully qualified schema name `.` and set config `match_fully_qualified_names : True`." "The config option `match_fully_qualified_names` is deprecated and will be removed in a future release." ) + elif match_fully_qualified_names and dataset_pattern is not None: + adjusted = False + for lst in [dataset_pattern.allow, dataset_pattern.deny]: + for i, pattern in enumerate(lst): + if "." not in pattern: + if pattern.startswith("^"): + lst[i] = r"^.*\." + pattern[1:] + else: + lst[i] = r".*\." + pattern + adjusted = True + if adjusted: + logger.warning( + "`dataset_pattern` was adjusted to match against fully qualified schema names," + " of the form `.`." + ) + return values def get_table_pattern(self, pattern: List[str]) -> str: diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py index 5a11a933c8595..4cfa5c48d2377 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/test_bigquery_source.py @@ -53,6 +53,59 @@ def test_bigquery_uri_on_behalf(): assert config.get_sql_alchemy_url() == "bigquery://test-project-on-behalf" +def test_bigquery_dataset_pattern(): + config = BigQueryV2Config.parse_obj( + { + "dataset_pattern": { + "allow": [ + "test-dataset", + "test-project.test-dataset", + ".*test-dataset", + ], + "deny": [ + "^test-dataset-2$", + "project\\.second_dataset", + ], + }, + } + ) + assert config.dataset_pattern.allow == [ + r".*\.test-dataset", + r"test-project.test-dataset", + r".*test-dataset", + ] + assert config.dataset_pattern.deny == [ + r"^.*\.test-dataset-2$", + r"project\.second_dataset", + ] + + config = BigQueryV2Config.parse_obj( + { + "dataset_pattern": { + "allow": [ + "test-dataset", + "test-project.test-dataset", + ".*test-dataset", + ], + "deny": [ + "^test-dataset-2$", + "project\\.second_dataset", + ], + }, + "match_fully_qualified_names": False, + } + ) + assert config.dataset_pattern.allow == [ + r"test-dataset", + r"test-project.test-dataset", + r".*test-dataset", + ] + assert config.dataset_pattern.deny == [ + r"^test-dataset-2$", + r"project\.second_dataset", + ] + + def test_bigquery_uri_with_credential(): expected_credential_json = { "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", From 2ebf33eb13d14c17bc6cb0eaee3a97dba33ea338 Mon Sep 17 00:00:00 2001 From: Zachary McNellis Date: Wed, 25 Oct 2023 16:25:41 -0400 Subject: [PATCH 31/80] docs(observability): Column Assertion user guide (#9106) Co-authored-by: John Joyce --- docs-website/sidebars.js | 1 + .../observe/column-assertions.md | 358 ++++++++++++++++++ 2 files changed, 359 insertions(+) create mode 100644 docs/managed-datahub/observe/column-assertions.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index b2b3df4dfb33c..31d69aec46d8b 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -446,6 +446,7 @@ module.exports = { "docs/managed-datahub/observe/freshness-assertions", "docs/managed-datahub/observe/volume-assertions", "docs/managed-datahub/observe/custom-sql-assertions", + "docs/managed-datahub/observe/column-assertions", ], }, { diff --git a/docs/managed-datahub/observe/column-assertions.md b/docs/managed-datahub/observe/column-assertions.md new file mode 100644 index 0000000000000..99a764f771676 --- /dev/null +++ b/docs/managed-datahub/observe/column-assertions.md @@ -0,0 +1,358 @@ +--- +description: This page provides an overview of working with DataHub Column Assertions +--- +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + + +# Column Assertions + + + +> ⚠️ The **Column Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only +> be available to a limited set of design partners. +> +> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success +> representative. + +## Introduction + +Can you remember a time when an important warehouse table column changed dramatically, with little or no notice? Perhaps the number of null values suddenly spiked, or a new value was added to a fixed set of possible values. If the answer is yes, how did you initially find out? We'll take a guess - someone looking at an internal reporting dashboard or worse, a user using your your product, sounded an alarm when a number looked a bit out of the ordinary. + +There are many reasons why important columns in your Snowflake, Redshift, or BigQuery tables may change - application code bugs, new feature rollouts, etc. Oftentimes, these changes break important assumptions made about the data used in building key downstream data products like reporting dashboards or data-driven product features. + +What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data issues before anyone else? With Acryl DataHub Column Assertions, you can. + +With Acryl DataHub, you can define **Column Value** assertions to ensure each value in a column matches specific constraints, and **Column Metric** assertions to ensure that computed metrics from columns align with your expectations. As soon as things go wrong, your team will be the first to know, before the data issue becomes a larger data incident. + +In this guide, we'll cover the basics of Column Assertions - what they are, how to configure them, and more - so that you and your team can start building trust in your most important data assets. + +Let's dive in! + +## Support + +Column Assertions are currently supported for: + +1. Snowflake +2. Redshift +3. BigQuery + +Note that an Ingestion Source _must_ be configured with the data platform of your choice in +Acryl DataHub's **Ingestion** tab. + +> Note that Column Assertions are not yet supported if you are connecting to your warehouse +> using the DataHub CLI or a Remote Ingestion Executor. + +## What is a Column Assertion? + +A **Column Assertion** is a highly configurable Data Quality rule used to monitor specific columns of a Data Warehouse table for unexpected changes. + +Column Assertions are defined to validate a specific column, and can be used to + +1. Validate that the values of the column match some constraints (regex, allowed values, max, min, etc) across rows OR +2. Validate that specific column aggregation metrics match some expectations across rows. + +Column Assertions can be particularly useful for documenting and enforcing column-level "contracts", i.e. formal specifications about the expected contents of a particular column that can be used for coordinating among producers and consumers of the data. + +### Anatomy of Column Assertion + +Column Assertions can be divided into two main types: **Column Value** and **Column Metric** Assertions. + +A **Column Value Assertion** is used to monitor the value of a specific column in a table, and ensure that every row +adheres to a specific condition. In comparison, a **Column Metric Assertion** is used to compute a metric for that column, +and ensure that the value of that metric adheres to a specific condition. + +At the most basic level, both types consist of a few important parts: + +1. An **Evaluation Schedule** +2. A **Column Selection** +3. A **Evaluation Criteria** +4. A **Row Evaluation Type** + +In this section, we'll give an overview of each. + +#### 1. Evaluation Schedule + +The **Evaluation Schedule**: This defines how often to evaluate the Column Assertion against the given warehouse table. +This should usually be configured to match the expected change frequency of the table, although it can also be less +frequently depending on your requirements. You can also specify specific days of the week, hours in the day, or even +minutes in an hour. + +#### 2. Column Selection + +The **Column Selection**: This defines the column that should be monitored by the Column Assertion. You can choose from +any of the columns from the table listed in the dropdown. Note that columns of struct / object type are not currently supported. + +#### 3. Evaluation Criteria + +The **Evaluation Criteria**: This defines the condition that must be satisfied in order for the Column +Assertion to pass. + +For **Column Value Assertions**, you will be able to choose from a set of operators that can be applied to the column +value. The options presented will vary based on the data type of the selected column. For example, if you've selected a numeric column, you +can verify that the column value is greater than a particular value. For string types, you can check that the column value +matches a particular regex pattern. Additionally, you are able to control the behavior of the check in the presence of NULL values. If the +**Allow Nulls** option is _disabled_, then any null values encountered will be reported as a failure when evaluating the +assertion. If **Allow Nulls** is enabled, then nulls will be ignored; the condition will be evaluated for rows where the column value is non-null. + +For **Column Metric Assertions**, you will be able to choose from a list of common column metrics - MAX, MIN, MEAN, NULL COUNT, etc - and then compare these metric values to an expected value. The list of metrics will vary based on the type of the selected column. For example +if you've selected a numeric column, you can choose to compute the MEAN value of the column, and then assert that it is greater than a +specific number. For string types, you can choose to compute the MAX LENGTH of the string across all column values, and then assert that it +is less than a specific number. + +#### 4. Row Selection Set + +The **Row Selection Set**: This defines which rows in the table the Column Assertion will be evaluated across. You can choose +from the following options: + +- **All Table Rows**: Evaluate the Column Assertion across all rows in the table. This is the default option. Note that +this may not be desirable for large tables. + +- **Only Rows That Have Changed**: Evaluate the Column Assertion only against rows that have changed since the last +evaluation of the assertion. If you choose this option, you will need to specify a **High Watermark Column** to help determine which rows +have changed. A **High Watermark Column** is a column that contains a constantly incrementing value - a date, a time, or +another always-increasing number - that can be used to find the "new rows" that were added since previous evaluation. When selected, a query will be issued to the table to find only the rows that have changed since the previous assertion evaluation. + +## Creating a Column Assertion + +### Prerequisites + +1. **Permissions**: To create or delete Column Assertions for a specific entity on DataHub, you'll need to be granted the + `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. + +2. **Data Platform Connection**: In order to create a Column Assertion, you'll need to have an **Ingestion Source** + configured to your Data Platform: Snowflake, BigQuery, or Redshift under the **Ingestion** tab. + +Once these are in place, you're ready to create your Column Assertions! + +### Steps + +1. Navigate to the Table that you want to monitor +2. Click the **Validations** tab + +

+ +

+ +3. Click **+ Create Assertion** + +

+ +

+ +4. Choose **Column** + +5. Configure the evaluation **schedule**. This is the frequency at which the assertion will be evaluated to produce a + pass or fail result, and the times when the column values will be checked. + +6. Configure the **column assertion type**. You can choose from **Column Value** or **Column Metric**. + **Column Value** assertions are used to monitor the value of a specific column in a table, and ensure that every row + adheres to a specific condition. **Column Metric** assertions are used to compute a metric for that column, and then compare the value of that metric to your expectations. + +

+ +

+ +7. Configure the **column selection**. This defines the column that should be monitored by the Column Assertion. + You can choose from any of the columns from the table listed in the dropdown. + +

+ +

+ +8. Configure the **evaluation criteria**. This step varies based on the type of assertion you chose in the previous step. + + - **Column Value Assertions**: You will be able to choose from a set of operators that can be applied to the column + value. The options presented will vary based on the data type of the selected column. For example with numeric types, you + can check that the column value is greater than a specific value. For string types, you can check that the column value + matches a particular regex pattern. You will also be able to control the behavior of null values in the column. If the + **Allow Nulls** option is _disabled_, any null values encountered will be reported as a failure when evaluating the + assertion. + + - **Column Metric Assertions**: You will be able to choose from a list of common metrics and then specify the operator + and value to compare against. The list of metrics will vary based on the data type of the selected column. For example + with numeric types, you can choose to compute the average value of the column, and then assert that it is greater than a + specific number. For string types, you can choose to compute the max length of all column values, and then assert that it + is less than a specific number. + +9. Configure the **row evaluation type**. This defines which rows in the table the Column Assertion should evaluate. You can choose + from the following options: + + - **All Table Rows**: Evaluate the Column Assertion against all rows in the table. This is the default option. Note that + this may not be desirable for large tables. + + - **Only Rows That Have Changed**: Evaluate the Column Assertion only against rows that have changed since the last + evaluation. If you choose this option, you will need to specify a **High Watermark Column** to help determine which rows + have changed. A **High Watermark Column** is a column that contains a constantly-incrementing value - a date, a time, or + another always-increasing number. When selected, a query will be issued to the table find only the rows which have changed since the last assertion run. + +

+ +

+ +10. (Optional) Click **Advanced** to further customize the Column Assertion. The options listed here will vary based on the + type of assertion you chose in the previous step. + + - **Invalid Values Threshold**: For **Column Value** assertions, you can configure the number of invalid values + (i.e. rows) that are allowed to fail before the assertion is marked as failing. This is useful if you want to allow a limited number + of invalid values in the column. By default this is 0, meaning the assertion will fail if any rows have an invalid column value. + + - **Source**: For **Column Metric** assertions, you can choose the mechanism that will be used to obtain the column + metric. **Query** will issue a query to the dataset to compute the metric. **DataHub Dataset Profile** will use the + DataHub Dataset Profile metadata to compute the metric. Note that this option requires that dataset profiling + statistics are up-to-date as of the assertion run time. + + - **Additional Filters**: You can choose to add additional filters to the query that will be used to evaluate the + assertion. This is useful if you want to limit the assertion to a subset of rows in the table. Note this option will not + be available if you choose **DataHub Dataset Profile** as the **source**. + +11. Click **Next** +12. Configure actions that should be taken when the Column Assertion passes or fails + +

+ +

+ +- **Raise incident**: Automatically raise a new DataHub `Column` Incident for the Table whenever the Column Assertion is failing. This + may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when + an incident is created due to an Assertion failure. +- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Column Assertion. Note that + any other incidents will not be impacted. + +10. Click **Save**. + +And that's it! DataHub will now begin to monitor your Column Assertion for the table. + +To view the time of the next Column Assertion evaluation, simply click **Column** and then click on your +new Assertion: + +

+ +

+ +Once your assertion has run, you will begin to see Success or Failure status for the Table + +

+ +

+ +## Stopping a Column Assertion + +In order to temporarily stop the evaluation of a Column Assertion: + +1. Navigate to the **Validations** tab of the table with the assertion +2. Click **Column** to open the Column Assertions list +3. Click the three-dot menu on the right side of the assertion you want to disable +4. Click **Stop** + +

+ +

+ +To resume the Column Assertion, simply click **Turn On**. + +

+ +

+ +## Creating Column Assertions via API + +Under the hood, Acryl DataHub implements Column Assertion Monitoring using two "entity" concepts: + +- **Assertion**: The specific expectation for the column metric. e.g. "The value of an integer column is greater than 10 for all rows in the table." This is the "what". + +- **Monitor**: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific + mechanisms. This is the "how". + +Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you'll need the +`Edit Assertions` and `Edit Monitors` privileges for it. + +#### GraphQL + +In order to create a Column Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 +GraphQL mutation queries to create a Column Assertion entity and create an Assertion Monitor entity responsible for evaluating it. + +Start by creating the Column Assertion entity using the `createFieldAssertion` query and hang on to the 'urn' field of the Assertion entity +you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. + +##### Examples + +To create a Column Assertion Entity that checks that the value of an integer column is greater than 10: + +```json +mutation createFieldAssertion { + createFieldAssertion( + input: { + entityUrn: "", + type: FIELD_VALUES, + fieldValuesAssertion: { + field: { + path: "", + type: "NUMBER", + nativeType: "NUMBER(38,0)" + }, + operator: GREATER_THAN, + parameters: { + value: { + type: NUMBER, + value: "10" + } + }, + failThreshold: { + type: COUNT, + value: 0 + }, + excludeNulls: true + } + } + ) { + urn +} +} +``` + +To create an Assertion Monitor Entity that evaluates the column assertion every 8 hours using all rows in the table: + +```json +mutation createAssertionMonitor { + createAssertionMonitor( + input: { + entityUrn: "", + assertionUrn: "", + schedule: { + cron: "0 */8 * * *", + timezone: "America/Los_Angeles" + }, + parameters: { + type: DATASET_FIELD, + datasetFieldParameters: { + sourceType: ALL_ROWS_QUERY + } + } + } + ) { + urn + } +} +``` + +This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using a query against all rows of the table). + +After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. + +You can delete assertions along with their monitors using GraphQL mutations: `deleteAssertion` and `deleteMonitor`. + +### Tips + +:::info +**Authorization** + +Remember to always provide a DataHub Personal Access Token when calling the GraphQL API. To do so, just add the 'Authorization' header as follows: + +``` +Authorization: Bearer +``` + +**Exploring GraphQL API** + +Also, remember that you can play with an interactive version of the Acryl GraphQL API at `https://your-account-id.acryl.io/api/graphiql` +::: From f402090c1ebec9601e5fef6e45879d3a0a015dbd Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:44:32 +0530 Subject: [PATCH 32/80] feat(ingest): support view lineage for all sqlalchemy sources (#9039) --- metadata-ingestion/setup.py | 52 +- .../src/datahub/configuration/common.py | 2 +- .../datahub/emitter/sql_parsing_builder.py | 5 +- .../api/incremental_lineage_helper.py | 13 +- .../src/datahub/ingestion/api/source.py | 1 + .../ingestion/source/dbt/dbt_common.py | 5 + .../source/snowflake/snowflake_lineage_v2.py | 14 +- .../src/datahub/ingestion/source/sql/hive.py | 83 ++- .../datahub/ingestion/source/sql/postgres.py | 20 +- .../ingestion/source/sql/sql_common.py | 126 +++- .../ingestion/source/sql/sql_config.py | 19 +- .../datahub/ingestion/source/sql/teradata.py | 54 +- .../source/sql/two_tier_sql_source.py | 6 +- .../datahub/ingestion/source/sql/vertica.py | 2 +- .../source/state/stateful_ingestion_base.py | 3 +- .../ingestion/source_config/sql/snowflake.py | 12 +- .../src/datahub/utilities/sqlglot_lineage.py | 21 +- .../hive/hive_mces_all_db_golden.json | 581 +++++++++++++++--- .../integration/hive/hive_mces_golden.json | 530 ++++++++++++++-- .../tests/integration/hive/hive_setup.sql | 22 +- .../mysql/mysql_mces_no_db_golden.json | 272 ++++++-- .../postgres_all_db_mces_with_db_golden.json | 324 ++++++++-- ..._db_to_file_with_db_estimate_row_count.yml | 2 +- .../postgres_mces_with_db_golden.json | 264 +++++++- ...res_to_file_with_db_estimate_row_count.yml | 2 +- .../snowflake/test_snowflake_failures.py | 3 +- .../trino/trino_hive_mces_golden.json | 211 +++++-- .../test_incremental_lineage_helper.py | 21 + 28 files changed, 2193 insertions(+), 477 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 0b8661b0df5f5..7f7826abe2095 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -101,22 +101,36 @@ "grpcio-tools>=1.44.0,<2", } -sql_common = { - # Required for all SQL sources. - # This is temporary lower bound that we're open to loosening/tightening as requirements show up - "sqlalchemy>=1.4.39, <2", - # Required for SQL profiling. - "great-expectations>=0.15.12, <=0.15.50", - # scipy version restricted to reduce backtracking, used by great-expectations, - "scipy>=1.7.2", - # GE added handling for higher version of jinja2 - # https://github.com/great-expectations/great_expectations/pull/5382/files - # datahub does not depend on traitlets directly but great expectations does. - # https://github.com/ipython/traitlets/issues/741 - "traitlets<5.2.2", - "greenlet", +usage_common = { + "sqlparse", +} + +sqlglot_lib = { + # Using an Acryl fork of sqlglot. + # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 + "acryl-sqlglot==18.5.2.dev45", } +sql_common = ( + { + # Required for all SQL sources. + # This is temporary lower bound that we're open to loosening/tightening as requirements show up + "sqlalchemy>=1.4.39, <2", + # Required for SQL profiling. + "great-expectations>=0.15.12, <=0.15.50", + # scipy version restricted to reduce backtracking, used by great-expectations, + "scipy>=1.7.2", + # GE added handling for higher version of jinja2 + # https://github.com/great-expectations/great_expectations/pull/5382/files + # datahub does not depend on traitlets directly but great expectations does. + # https://github.com/ipython/traitlets/issues/741 + "traitlets<5.2.2", + "greenlet", + } + | usage_common + | sqlglot_lib +) + sqllineage_lib = { "sqllineage==1.3.8", # We don't have a direct dependency on sqlparse but it is a dependency of sqllineage. @@ -125,12 +139,6 @@ "sqlparse==0.4.4", } -sqlglot_lib = { - # Using an Acryl fork of sqlglot. - # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==18.5.2.dev45", -} - aws_common = { # AWS Python SDK "boto3", @@ -243,10 +251,6 @@ powerbi_report_server = {"requests", "requests_ntlm"} -usage_common = { - "sqlparse", -} - databricks = { # 0.1.11 appears to have authentication issues with azure databricks "databricks-sdk>=0.9.0", diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index c909b89eb0c2d..73ac4baac48c0 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -283,7 +283,7 @@ class VersionedConfig(ConfigModel): class LineageConfig(ConfigModel): incremental_lineage: bool = Field( - default=True, + default=False, description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", ) diff --git a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py index dedcfa0385f75..cedaa4fbbd7f6 100644 --- a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py +++ b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py @@ -106,6 +106,7 @@ def process_sql_parsing_result( user: Optional[UserUrn] = None, custom_operation_type: Optional[str] = None, include_urns: Optional[Set[DatasetUrn]] = None, + include_column_lineage: bool = True, ) -> Iterable[MetadataWorkUnit]: """Process a single query and yield any generated workunits. @@ -130,7 +131,9 @@ def process_sql_parsing_result( _merge_lineage_data( downstream_urn=downstream_urn, upstream_urns=result.in_tables, - column_lineage=result.column_lineage, + column_lineage=result.column_lineage + if include_column_lineage + else None, upstream_edges=self._lineage_map[downstream_urn], query_timestamp=query_timestamp, is_view_ddl=is_view_ddl, diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py index 9478c5cf7efa2..945b201ca5758 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py +++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py @@ -130,10 +130,13 @@ def auto_incremental_lineage( if len(wu.metadata.proposedSnapshot.aspects) > 0: yield wu - yield _lineage_wu_via_read_modify_write( - graph, urn, lineage_aspect, wu.metadata.systemMetadata - ) if lineage_aspect.fineGrainedLineages else _convert_upstream_lineage_to_patch( - urn, lineage_aspect, wu.metadata.systemMetadata - ) + if lineage_aspect.fineGrainedLineages: + yield _lineage_wu_via_read_modify_write( + graph, urn, lineage_aspect, wu.metadata.systemMetadata + ) + elif lineage_aspect.upstreams: + yield _convert_upstream_lineage_to_patch( + urn, lineage_aspect, wu.metadata.systemMetadata + ) else: yield wu diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index b86844b1c4c83..8940642f7008a 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -215,6 +215,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: ) ): auto_lowercase_dataset_urns = auto_lowercase_urns + return [ auto_lowercase_dataset_urns, auto_status_aspect, diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 48d2118a9b091..c4de24bf192f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -280,6 +280,11 @@ class DBTCommonConfig( default=False, description="When enabled, dbt test warnings will be treated as failures.", ) + # override fault value to True. + incremental_lineage: bool = Field( + default=True, + description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run.", + ) @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 0a15c352fc842..9649054dbe6cb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -136,7 +136,6 @@ def get_workunits( return self._populate_external_lineage_map(discovered_tables) - if self.config.include_view_lineage: if len(discovered_views) > 0: yield from self.get_view_upstream_workunits( @@ -200,14 +199,15 @@ def _gen_workunit_from_sql_parsing_result( self, dataset_identifier: str, result: SqlParsingResult, - ) -> MetadataWorkUnit: + ) -> Iterable[MetadataWorkUnit]: upstreams, fine_upstreams = self.get_upstreams_from_sql_parsing_result( self.dataset_urn_builder(dataset_identifier), result ) - self.report.num_views_with_upstreams += 1 - return self._create_upstream_lineage_workunit( - dataset_identifier, upstreams, fine_upstreams - ) + if upstreams: + self.report.num_views_with_upstreams += 1 + yield self._create_upstream_lineage_workunit( + dataset_identifier, upstreams, fine_upstreams + ) def _gen_workunits_from_query_result( self, @@ -251,7 +251,7 @@ def get_view_upstream_workunits( ) if result: views_processed.add(view_identifier) - yield self._gen_workunit_from_sql_parsing_result( + yield from self._gen_workunit_from_sql_parsing_result( view_identifier, result ) self.report.view_lineage_parse_secs = timer.elapsed_seconds() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py index 63b21bc82eddd..d081acb6c1eff 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py @@ -1,15 +1,18 @@ import json import logging import re -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Union from pydantic.class_validators import validator from pydantic.fields import Field # This import verifies that the dependencies are available. from pyhive import hive # noqa: F401 -from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveTimestamp +from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveDialect, HiveTimestamp +from sqlalchemy.engine.reflection import Inspector +from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.decorators import ( SourceCapability, SupportStatus, @@ -18,8 +21,10 @@ platform_name, support_status, ) +from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.extractor import schema_util -from datahub.ingestion.source.sql.sql_common import register_custom_type +from datahub.ingestion.source.sql.sql_common import SqlWorkUnit, register_custom_type +from datahub.ingestion.source.sql.sql_config import SQLCommonConfig from datahub.ingestion.source.sql.two_tier_sql_source import ( TwoTierSQLAlchemyConfig, TwoTierSQLAlchemySource, @@ -31,6 +36,7 @@ SchemaField, TimeTypeClass, ) +from datahub.metadata.schema_classes import ViewPropertiesClass from datahub.utilities import config_clean from datahub.utilities.hive_schema_to_avro import get_avro_schema_for_hive_column @@ -90,19 +96,34 @@ def dbapi_get_columns_patched(self, connection, table_name, schema=None, **kw): logger.warning(f"Failed to patch method due to {e}") +@reflection.cache # type: ignore +def get_view_names_patched(self, connection, schema=None, **kw): + query = "SHOW VIEWS" + if schema: + query += " IN " + self.identifier_preparer.quote_identifier(schema) + return [row[0] for row in connection.execute(query)] + + +@reflection.cache # type: ignore +def get_view_definition_patched(self, connection, view_name, schema=None, **kw): + full_table = self.identifier_preparer.quote_identifier(view_name) + if schema: + full_table = "{}.{}".format( + self.identifier_preparer.quote_identifier(schema), + self.identifier_preparer.quote_identifier(view_name), + ) + row = connection.execute("SHOW CREATE TABLE {}".format(full_table)).fetchone() + return row[0] + + +HiveDialect.get_view_names = get_view_names_patched +HiveDialect.get_view_definition = get_view_definition_patched + + class HiveConfig(TwoTierSQLAlchemyConfig): # defaults scheme = Field(default="hive", hidden_from_docs=True) - # Hive SQLAlchemy connector returns views as tables. - # See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. - # Disabling views helps us prevent this duplication. - include_views = Field( - default=False, - hidden_from_docs=True, - description="Hive SQLAlchemy connector returns views as tables. See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. Disabling views helps us prevent this duplication.", - ) - @validator("host_port") def clean_host_port(cls, v): return config_clean.remove_protocol(v) @@ -174,3 +195,41 @@ def get_schema_fields_for_column( return new_fields return fields + + # Hive SQLAlchemy connector returns views as tables in get_table_names. + # See https://github.com/dropbox/PyHive/blob/b21c507a24ed2f2b0cf15b0b6abb1c43f31d3ee0/pyhive/sqlalchemy_hive.py#L270-L273. + # This override makes sure that we ingest view definitions for views + def _process_view( + self, + dataset_name: str, + inspector: Inspector, + schema: str, + view: str, + sql_config: SQLCommonConfig, + ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) + + try: + view_definition = inspector.get_view_definition(view, schema) + if view_definition is None: + view_definition = "" + else: + # Some dialects return a TextClause instead of a raw string, + # so we need to convert them to a string. + view_definition = str(view_definition) + except NotImplementedError: + view_definition = "" + + if view_definition: + view_properties_aspect = ViewPropertiesClass( + materialized=False, viewLanguage="SQL", viewLogic=view_definition + ) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=view_properties_aspect, + ).as_workunit() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py index a6a9d8e2c8597..4f133c6459a0f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py @@ -103,10 +103,6 @@ class BasePostgresConfig(BasicSQLAlchemyConfig): class PostgresConfig(BasePostgresConfig): - include_view_lineage = Field( - default=False, description="Include table lineage for views" - ) - database_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), description=( @@ -183,9 +179,10 @@ def get_inspectors(self) -> Iterable[Inspector]: def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: yield from super().get_workunits_internal() - for inspector in self.get_inspectors(): - if self.config.include_view_lineage: - yield from self._get_view_lineage_workunits(inspector) + if self.views_failed_parsing: + for inspector in self.get_inspectors(): + if self.config.include_view_lineage: + yield from self._get_view_lineage_workunits(inspector) def _get_view_lineage_elements( self, inspector: Inspector @@ -245,11 +242,14 @@ def _get_view_lineage_workunits( dependent_view, dependent_schema = key # Construct a lineage object. + view_identifier = self.get_identifier( + schema=dependent_schema, entity=dependent_view, inspector=inspector + ) + if view_identifier not in self.views_failed_parsing: + return urn = mce_builder.make_dataset_urn_with_platform_instance( platform=self.platform, - name=self.get_identifier( - schema=dependent_schema, entity=dependent_view, inspector=inspector - ), + name=view_identifier, platform_instance=self.config.platform_instance, env=self.config.env, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index fad9b9e8018a5..51909eaf4ed55 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -2,12 +2,14 @@ import logging import traceback from dataclasses import dataclass, field +from functools import partial from typing import ( TYPE_CHECKING, Any, Dict, Iterable, List, + MutableMapping, Optional, Set, Tuple, @@ -29,7 +31,9 @@ make_tag_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.sql_parsing_builder import SqlParsingBuilder from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.common.subtypes import ( @@ -86,9 +90,16 @@ ViewPropertiesClass, ) from datahub.telemetry import telemetry +from datahub.utilities.file_backed_collections import FileBackedDict from datahub.utilities.lossy_collections import LossyList from datahub.utilities.registries.domain_registry import DomainRegistry from datahub.utilities.sqlalchemy_query_combiner import SQLAlchemyQueryCombinerReport +from datahub.utilities.sqlglot_lineage import ( + SchemaResolver, + SqlParsingResult, + sqlglot_lineage, + view_definition_lineage_helper, +) if TYPE_CHECKING: from datahub.ingestion.source.ge_data_profiler import ( @@ -110,6 +121,11 @@ class SQLSourceReport(StaleEntityRemovalSourceReport): query_combiner: Optional[SQLAlchemyQueryCombinerReport] = None + num_view_definitions_parsed: int = 0 + num_view_definitions_failed_parsing: int = 0 + num_view_definitions_failed_column_parsing: int = 0 + view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList) + def report_entity_scanned(self, name: str, ent_type: str = "table") -> None: """ Entity could be a view or a table @@ -319,6 +335,18 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str) cached_domains=[k for k in self.config.domain], graph=self.ctx.graph ) + self.views_failed_parsing: Set[str] = set() + self.schema_resolver: SchemaResolver = SchemaResolver( + platform=self.platform, + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + self._view_definition_cache: MutableMapping[str, str] + if self.config.use_file_backed_cache: + self._view_definition_cache = FileBackedDict[str]() + else: + self._view_definition_cache = {} + def warn(self, log: logging.Logger, key: str, reason: str) -> None: self.report.report_warning(key, reason[:100]) log.warning(f"{key} => {reason}") @@ -455,6 +483,11 @@ def get_schema_level_workunits( def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ *super().get_workunit_processors(), + partial( + auto_incremental_lineage, + self.ctx.graph, + self.config.incremental_lineage, + ), StaleEntityRemovalHandler.create( self, self.config, self.ctx ).workunit_processor, @@ -512,6 +545,35 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit profile_requests, profiler, platform=self.platform ) + if self.config.include_view_lineage: + yield from self.get_view_lineage() + + def get_view_lineage(self) -> Iterable[MetadataWorkUnit]: + builder = SqlParsingBuilder( + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=False, + ) + for dataset_name in self._view_definition_cache.keys(): + view_definition = self._view_definition_cache[dataset_name] + result = self._run_sql_parser( + dataset_name, + view_definition, + self.schema_resolver, + ) + if result and result.out_tables: + # This does not yield any workunits but we use + # yield here to execute this method + yield from builder.process_sql_parsing_result( + result=result, + query=view_definition, + is_view_ddl=True, + include_column_lineage=self.config.include_view_column_lineage, + ) + else: + self.views_failed_parsing.add(dataset_name) + yield from builder.gen_workunits() + def get_identifier( self, *, schema: str, entity: str, inspector: Inspector, **kwargs: Any ) -> str: @@ -658,6 +720,8 @@ def _process_table( schema_fields, ) dataset_snapshot.aspects.append(schema_metadata) + if self.config.include_view_lineage: + self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) db_name = self.get_db_name(inspector) yield from self.add_table_to_schema_container( @@ -862,6 +926,12 @@ def _process_view( view: str, sql_config: SQLCommonConfig, ) -> Iterable[Union[SqlWorkUnit, MetadataWorkUnit]]: + dataset_urn = make_dataset_urn_with_platform_instance( + self.platform, + dataset_name, + self.config.platform_instance, + self.config.env, + ) try: columns = inspector.get_columns(view, schema) except KeyError: @@ -877,6 +947,8 @@ def _process_view( columns, canonical_schema=schema_fields, ) + if self.config.include_view_lineage: + self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) description, properties, _ = self.get_table_properties(inspector, schema, view) try: view_definition = inspector.get_view_definition(view, schema) @@ -890,12 +962,9 @@ def _process_view( view_definition = "" properties["view_definition"] = view_definition properties["is_view"] = "True" - dataset_urn = make_dataset_urn_with_platform_instance( - self.platform, - dataset_name, - self.config.platform_instance, - self.config.env, - ) + if view_definition and self.config.include_view_lineage: + self._view_definition_cache[dataset_name] = view_definition + dataset_snapshot = DatasetSnapshot( urn=dataset_urn, aspects=[StatusClass(removed=False)], @@ -942,6 +1011,51 @@ def _process_view( domain_registry=self.domain_registry, ) + def _run_sql_parser( + self, view_identifier: str, query: str, schema_resolver: SchemaResolver + ) -> Optional[SqlParsingResult]: + try: + database, schema = self.get_db_schema(view_identifier) + except ValueError: + logger.warning(f"Invalid view identifier: {view_identifier}") + return None + raw_lineage = sqlglot_lineage( + query, + schema_resolver=schema_resolver, + default_db=database, + default_schema=schema, + ) + view_urn = make_dataset_urn_with_platform_instance( + self.platform, + view_identifier, + self.config.platform_instance, + self.config.env, + ) + + if raw_lineage.debug_info.table_error: + logger.debug( + f"Failed to parse lineage for view {view_identifier}: " + f"{raw_lineage.debug_info.table_error}" + ) + self.report.num_view_definitions_failed_parsing += 1 + self.report.view_definitions_parsing_failures.append( + f"Table-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.table_error}" + ) + return None + + elif raw_lineage.debug_info.column_error: + self.report.num_view_definitions_failed_column_parsing += 1 + self.report.view_definitions_parsing_failures.append( + f"Column-level sql parsing error for view {view_identifier}: {raw_lineage.debug_info.column_error}" + ) + else: + self.report.num_view_definitions_parsed += 1 + return view_definition_lineage_helper(raw_lineage, view_urn) + + def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]: + database, schema, _view = dataset_identifier.split(".") + return database, schema + def get_profiler_instance(self, inspector: Inspector) -> "DatahubGEProfiler": from datahub.ingestion.source.ge_data_profiler import DatahubGEProfiler diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py index 57aae32b361cf..095b8e6443171 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py @@ -6,7 +6,7 @@ from pydantic import Field from sqlalchemy.engine import URL -from datahub.configuration.common import AllowDenyPattern, ConfigModel +from datahub.configuration.common import AllowDenyPattern, ConfigModel, LineageConfig from datahub.configuration.source_common import ( DatasetSourceConfigMixin, LowerCaseDatasetUrnConfigMixin, @@ -28,6 +28,7 @@ class SQLCommonConfig( StatefulIngestionConfigBase, DatasetSourceConfigMixin, LowerCaseDatasetUrnConfigMixin, + LineageConfig, ): options: dict = pydantic.Field( default_factory=dict, @@ -70,6 +71,22 @@ class SQLCommonConfig( description="If the source supports it, include table lineage to the underlying storage location.", ) + include_view_lineage: bool = Field( + default=True, + description="Populates view->view and table->view lineage using DataHub's sql parser.", + ) + + include_view_column_lineage: bool = Field( + default=True, + description="Populates column-level lineage for view->view and table->view lineage using DataHub's sql parser." + " Requires `include_view_lineage` to be enabled.", + ) + + use_file_backed_cache: bool = Field( + default=True, + description="Whether to use a file backed cache for the view definitions.", + ) + profiling: GEProfilingConfig = GEProfilingConfig() # Custom Stateful Ingestion settings stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py index e628e4dbd3446..899a7b6697c0a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass from datetime import datetime -from typing import Iterable, MutableMapping, Optional, Union +from typing import Iterable, Optional, Union # This import verifies that the dependencies are available. import teradatasqlalchemy # noqa: F401 @@ -33,14 +33,11 @@ from datahub.ingestion.source.usage.usage_common import BaseUsageConfig from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport -from datahub.metadata._schema_classes import SchemaMetadataClass, ViewPropertiesClass from datahub.metadata.com.linkedin.pegasus2avro.schema import ( BytesTypeClass, TimeTypeClass, ) -from datahub.utilities.file_backed_collections import FileBackedDict from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage -from datahub.utilities.urns.dataset_urn import DatasetUrn logger: logging.Logger = logging.getLogger(__name__) @@ -87,11 +84,6 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig): "This requires to have the table lineage feature enabled.", ) - include_view_lineage = Field( - default=True, - description="Whether to include view lineage in the ingestion. " - "This requires to have the view lineage feature enabled.", - ) usage: BaseUsageConfig = Field( description="The usage config to use when generating usage statistics", default=BaseUsageConfig(), @@ -107,11 +99,6 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig): description="Generate usage statistic.", ) - use_file_backed_cache: bool = Field( - default=True, - description="Whether to use a file backed cache for the view definitions.", - ) - @platform_name("Teradata") @config_class(TeradataConfig) @@ -143,8 +130,6 @@ class TeradataSource(TwoTierSQLAlchemySource): and "timestamp" < TIMESTAMP '{end_time}' """ - _view_definition_cache: MutableMapping[str, str] - def __init__(self, config: TeradataConfig, ctx: PipelineContext): super().__init__(config, ctx, "teradata") @@ -167,34 +152,11 @@ def __init__(self, config: TeradataConfig, ctx: PipelineContext): env=self.config.env, ) - if self.config.use_file_backed_cache: - self._view_definition_cache = FileBackedDict[str]() - else: - self._view_definition_cache = {} - @classmethod def create(cls, config_dict, ctx): config = TeradataConfig.parse_obj(config_dict) return cls(config, ctx) - def get_view_lineage(self) -> Iterable[MetadataWorkUnit]: - for key in self._view_definition_cache.keys(): - view_definition = self._view_definition_cache[key] - dataset_urn = DatasetUrn.create_from_string(key) - - db_name: Optional[str] = None - # We need to get the default db from the dataset urn otherwise the builder generates the wrong urns - if "." in dataset_urn.get_dataset_name(): - db_name = dataset_urn.get_dataset_name().split(".", 1)[0] - - self.report.num_view_ddl_parsed += 1 - if self.report.num_view_ddl_parsed % 1000 == 0: - logger.info(f"Parsed {self.report.num_queries_parsed} view ddl") - - yield from self.gen_lineage_from_query( - query=view_definition, default_database=db_name, is_view_ddl=True - ) - def get_audit_log_mcps(self) -> Iterable[MetadataWorkUnit]: engine = self.get_metadata_engine() for entry in engine.execute( @@ -252,19 +214,7 @@ def get_metadata_engine(self) -> Engine: def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: # Add all schemas to the schema resolver - for wu in super().get_workunits_internal(): - urn = wu.get_urn() - schema_metadata = wu.get_aspect_of_type(SchemaMetadataClass) - if schema_metadata: - self.schema_resolver.add_schema_metadata(urn, schema_metadata) - view_properties = wu.get_aspect_of_type(ViewPropertiesClass) - if view_properties and self.config.include_view_lineage: - self._view_definition_cache[urn] = view_properties.viewLogic - yield wu - - if self.config.include_view_lineage: - self.report.report_ingestion_stage_start("view lineage extraction") - yield from self.get_view_lineage() + yield from super().get_workunits_internal() if self.config.include_table_lineage or self.config.include_usage_statistics: self.report.report_ingestion_stage_start("audit log extraction") diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py index 7a49551dc1235..efb1d3ffe119f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py @@ -1,6 +1,6 @@ import typing import urllib.parse -from typing import Any, Dict, Iterable, Optional +from typing import Any, Dict, Iterable, Optional, Tuple from pydantic.fields import Field from sqlalchemy import create_engine, inspect @@ -71,6 +71,10 @@ def __init__(self, config, ctx, platform): super().__init__(config, ctx, platform) self.config: TwoTierSQLAlchemyConfig = config + def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]: + schema, _view = dataset_identifier.split(".", 1) + return None, schema + def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index a417cae2b1ab0..b89db755853bc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -86,7 +86,7 @@ class VerticaConfig(BasicSQLAlchemyConfig): default=True, description="Whether Models should be ingested." ) - include_view_lineage: Optional[bool] = pydantic.Field( + include_view_lineage: bool = pydantic.Field( default=True, description="If the source supports it, include view lineage to the underlying storage location.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py index be97e9380f1f5..7fb2cf9813cab 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py @@ -11,7 +11,6 @@ ConfigModel, ConfigurationError, DynamicTypedConfig, - LineageConfig, ) from datahub.configuration.time_window_config import BaseTimeWindowConfig from datahub.configuration.validate_field_rename import pydantic_renamed_field @@ -100,7 +99,7 @@ class StatefulIngestionConfigBase(GenericModel, Generic[CustomConfig]): ) -class StatefulLineageConfigMixin(LineageConfig): +class StatefulLineageConfigMixin: enable_stateful_lineage_ingestion: bool = Field( default=True, description="Enable stateful lineage ingestion." diff --git a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py index 0d72fc52da0ca..c3e8c175f1de5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py +++ b/metadata-ingestion/src/datahub/ingestion/source_config/sql/snowflake.py @@ -166,13 +166,17 @@ def _check_oauth_config(oauth_config: Optional[OAuthConfiguration]) -> None: "but should be set when using use_certificate false for oauth_config" ) - @pydantic.validator("include_view_lineage") - def validate_include_view_lineage(cls, v, values): - if not values.get("include_table_lineage") and v: + @pydantic.root_validator() + def validate_include_view_lineage(cls, values): + if ( + "include_table_lineage" in values + and not values.get("include_table_lineage") + and values.get("include_view_lineage") + ): raise ValueError( "include_table_lineage must be True for include_view_lineage to be set." ) - return v + return values def get_sql_alchemy_url( self, diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 526d90b2a1bfa..1d74b20569814 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -623,9 +623,9 @@ def _schema_aware_fuzzy_column_resolve( statement = sqlglot.optimizer.annotate_types.annotate_types( statement, schema=sqlglot_db_schema ) - except sqlglot.errors.OptimizeError as e: + except (sqlglot.errors.OptimizeError, sqlglot.errors.ParseError) as e: # This is not a fatal error, so we can continue. - logger.debug("sqlglot failed to annotate types: %s", e) + logger.debug("sqlglot failed to annotate or parse types: %s", e) try: assert isinstance(statement, _SupportedColumnLineageTypesTuple) @@ -1156,3 +1156,20 @@ def create_lineage_sql_parsed_result( finally: if needs_close: schema_resolver.close() + + +def view_definition_lineage_helper( + result: SqlParsingResult, view_urn: str +) -> SqlParsingResult: + if result.query_type is QueryType.SELECT: + # Some platforms (e.g. postgres) store only ` . For such view definitions, `result.out_tables` and + # `result.column_lineage[].downstream` are empty in `sqlglot_lineage` response, whereas upstream + # details and downstream column details are extracted correctly. + # Here, we inject view V's urn in `result.out_tables` and `result.column_lineage[].downstream` + # to get complete lineage result. + result.out_tables = [view_urn] + if result.column_lineage: + for col_result in result.column_lineage: + col_result.downstream.table = view_urn + return result diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json index f3b6d2b8138cc..6774d4c7055b9 100644 --- a/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json +++ b/metadata-ingestion/tests/integration/hive/hive_mces_all_db_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -93,7 +98,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -111,7 +117,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", @@ -121,7 +127,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721972", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -187,7 +193,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -204,7 +211,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -224,7 +232,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -239,7 +248,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -257,17 +267,19 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", "Table Type:": "MANAGED_TABLE", "Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}", + "Table Parameters: another.comment": "This table has no partitions", + "Table Parameters: comment": "This table has array of structs", "Table Parameters: numFiles": "1", "Table Parameters: numRows": "1", "Table Parameters: rawDataSize": "32", "Table Parameters: totalSize": "33", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721976", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -278,6 +290,7 @@ "Storage Desc Params: serialization.format": "1" }, "name": "array_struct_test", + "description": "This table has array of structs", "tags": [] } }, @@ -304,6 +317,7 @@ { "fieldPath": "property_id", "nullable": true, + "description": "id of property", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -316,6 +330,7 @@ { "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", "nullable": true, + "description": "service types and providers", "type": { "type": { "com.linkedin.pegasus2avro.schema.ArrayType": { @@ -368,7 +383,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -385,7 +401,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -405,7 +422,189 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Table Type:": "VIRTUAL_VIEW", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "null", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.array_struct_test", + "View Expanded Text:": "select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "View Rewrite Enabled:": "No" + }, + "name": "array_struct_test_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test_view", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -420,7 +619,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -438,7 +638,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", @@ -448,7 +648,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721978", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -518,7 +718,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -535,7 +736,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -555,7 +757,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -570,7 +773,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -588,7 +792,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", @@ -598,7 +802,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721978", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -717,7 +921,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -734,7 +939,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -754,7 +960,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -769,7 +976,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -787,16 +995,17 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:22 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:08 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", "Table Type:": "MANAGED_TABLE", "Table Parameters: numFiles": "1", + "Table Parameters: numPartitions": "1", "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1688578704", + "Table Parameters: transient_lastDdlTime": "1697721968", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -853,6 +1062,18 @@ "nativeDataType": "string", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "baz", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false } ] } @@ -862,7 +1083,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -879,7 +1101,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +1122,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +1138,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -932,7 +1157,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", @@ -942,7 +1167,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721972", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1039,7 +1264,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1056,7 +1282,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1076,7 +1303,188 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test_view_materialized", + "Table Type:": "MATERIALIZED_VIEW", + "Table Parameters: numFiles": "0", + "Table Parameters: totalSize": "0", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", + "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.struct_test", + "View Expanded Text:": "null", + "View Rewrite Enabled:": "No" + }, + "name": "struct_test_view_materialized", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.struct_test_view_materialized", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1091,7 +1499,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1109,7 +1518,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", @@ -1119,10 +1528,10 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", - "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", - "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", - "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", + "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", "Compressed:": "No", "Num Buckets:": "-1", "Bucket Columns:": "[]", @@ -1285,7 +1694,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1302,7 +1712,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1322,7 +1733,26 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW `db1.array_struct_test_view` AS select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1342,7 +1772,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1357,7 +1788,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1372,7 +1804,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1389,7 +1822,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1404,7 +1838,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1419,7 +1854,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1437,7 +1873,7 @@ "customProperties": { "Database:": "db2", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:24 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:10 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db2.db/pokes", @@ -1446,7 +1882,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721971", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1454,10 +1890,7 @@ "Num Buckets:": "-1", "Bucket Columns:": "[]", "Sort Columns:": "[]", - "Storage Desc Params: serialization.format": "1", - "Table:": "db2.pokes", - "Constraint Name:": "pk_1173723383_1683022998392_0", - "Column Names:": "foo" + "Storage Desc Params: serialization.format": "1" }, "name": "pokes", "tags": [] @@ -1515,7 +1948,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1966,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1552,7 +1987,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1572,7 +2008,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1587,7 +2024,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1602,7 +2040,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1619,7 +2058,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1634,7 +2074,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json index 08f281f398909..e93924049f626 100644 --- a/metadata-ingestion/tests/integration/hive/hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/hive/hive_mces_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -93,7 +98,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -111,7 +117,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", @@ -121,7 +127,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721972", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -187,7 +193,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -204,7 +211,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -224,7 +232,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -239,7 +248,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -257,17 +267,19 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", "Table Type:": "MANAGED_TABLE", "Table Parameters: COLUMN_STATS_ACCURATE": "{\\\"BASIC_STATS\\\":\\\"true\\\"}", + "Table Parameters: another.comment": "This table has no partitions", + "Table Parameters: comment": "This table has array of structs", "Table Parameters: numFiles": "1", "Table Parameters: numRows": "1", "Table Parameters: rawDataSize": "32", "Table Parameters: totalSize": "33", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721976", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -278,6 +290,7 @@ "Storage Desc Params: serialization.format": "1" }, "name": "array_struct_test", + "description": "This table has array of structs", "tags": [] } }, @@ -304,6 +317,7 @@ { "fieldPath": "property_id", "nullable": true, + "description": "id of property", "type": { "type": { "com.linkedin.pegasus2avro.schema.NumberType": {} @@ -316,6 +330,7 @@ { "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", "nullable": true, + "description": "service types and providers", "type": { "type": { "com.linkedin.pegasus2avro.schema.ArrayType": { @@ -368,7 +383,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -385,7 +401,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -405,7 +422,189 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Table Type:": "VIRTUAL_VIEW", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "null", + "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.array_struct_test", + "View Expanded Text:": "select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "View Rewrite Enabled:": "No" + }, + "name": "array_struct_test_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.array_struct_test_view", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -420,7 +619,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -438,7 +638,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", @@ -448,7 +648,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721978", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -518,7 +718,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -535,7 +736,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -555,7 +757,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -570,7 +773,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -588,7 +792,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", @@ -598,7 +802,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", + "Table Parameters: transient_lastDdlTime": "1697721978", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -717,7 +921,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -734,7 +939,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -754,7 +960,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -769,7 +976,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -787,16 +995,17 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:22 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:08 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", "Table Type:": "MANAGED_TABLE", "Table Parameters: numFiles": "1", + "Table Parameters: numPartitions": "1", "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "5812", - "Table Parameters: transient_lastDdlTime": "1688578704", + "Table Parameters: transient_lastDdlTime": "1697721968", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -853,6 +1062,18 @@ "nativeDataType": "string", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "baz", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false } ] } @@ -862,7 +1083,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -879,7 +1101,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +1122,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +1138,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -932,7 +1157,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:26 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:12 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", @@ -942,7 +1167,7 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578706", + "Table Parameters: transient_lastDdlTime": "1697721972", "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", @@ -1039,7 +1264,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1056,7 +1282,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1076,7 +1303,188 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "Database:": "db1", + "Owner:": "root", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", + "LastAccessTime:": "UNKNOWN", + "Retention:": "0", + "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test_view_materialized", + "Table Type:": "MATERIALIZED_VIEW", + "Table Parameters: numFiles": "0", + "Table Parameters: totalSize": "0", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", + "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", + "Compressed:": "No", + "Num Buckets:": "-1", + "Bucket Columns:": "[]", + "Sort Columns:": "[]", + "View Original Text:": "select * from db1.struct_test", + "View Expanded Text:": "null", + "View Rewrite Enabled:": "No" + }, + "name": "struct_test_view_materialized", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "db1.struct_test_view_materialized", + "platform": "urn:li:dataPlatform:hive", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "property_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "int", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.RecordType": {} + } + }, + "nativeDataType": "struct>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"struct>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=array].[type=int].provider", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "int" + ] + } + } + }, + "nativeDataType": "array", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array\"}" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test_view_materialized,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:ded36d15fcfbbb939830549697122661", + "urn": "urn:li:container:ded36d15fcfbbb939830549697122661" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1091,7 +1499,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1109,7 +1518,7 @@ "customProperties": { "Database:": "db1", "Owner:": "root", - "CreateTime:": "Wed Jul 05 17:38:30 UTC 2023", + "CreateTime:": "Thu Oct 19 13:26:18 UTC 2023", "LastAccessTime:": "UNKNOWN", "Retention:": "0", "Location:": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", @@ -1119,10 +1528,10 @@ "Table Parameters: numRows": "0", "Table Parameters: rawDataSize": "0", "Table Parameters: totalSize": "0", - "Table Parameters: transient_lastDdlTime": "1688578710", - "SerDe Library:": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", - "InputFormat:": "org.apache.hadoop.mapred.TextInputFormat", - "OutputFormat:": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Table Parameters: transient_lastDdlTime": "1697721978", + "SerDe Library:": "org.apache.hadoop.hive.ql.io.orc.OrcSerde", + "InputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat", + "OutputFormat:": "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat", "Compressed:": "No", "Num Buckets:": "-1", "Bucket Columns:": "[]", @@ -1285,7 +1694,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1302,7 +1712,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1322,7 +1733,26 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "hive-test" + "runId": "hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW `db1.array_struct_test_view` AS select `array_struct_test`.`property_id`, `array_struct_test`.`service` from `db1`.`array_struct_test`", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/hive/hive_setup.sql b/metadata-ingestion/tests/integration/hive/hive_setup.sql index 8fb8498894bc0..323a78e24d10b 100644 --- a/metadata-ingestion/tests/integration/hive/hive_setup.sql +++ b/metadata-ingestion/tests/integration/hive/hive_setup.sql @@ -1,10 +1,10 @@ CREATE DATABASE IF NOT EXISTS db1; CREATE DATABASE IF NOT EXISTS db2; -- Setup a "pokes" example table. -CREATE TABLE IF NOT EXISTS db1.pokes (foo INT, bar STRING); -LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db1.pokes; +CREATE TABLE IF NOT EXISTS db1.pokes (foo INT, bar STRING) PARTITIONED BY (baz STRING); +LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db1.pokes PARTITION (baz='dummy'); -CREATE TABLE IF NOT EXISTS db2.pokes (foo INT, bar STRING, CONSTRAINT pk_1173723383_1683022998392_0 primary key(foo) DISABLE NOVALIDATE NORELY); +CREATE TABLE IF NOT EXISTS db2.pokes (foo INT, bar STRING); LOAD DATA LOCAL INPATH '/opt/hive/examples/files/kv1.txt' OVERWRITE INTO TABLE db2.pokes; -- Setup a table with a special character. @@ -23,12 +23,12 @@ CREATE TABLE IF NOT EXISTS db1.struct_test CREATE TABLE IF NOT EXISTS db1.array_struct_test ( - property_id INT, + property_id INT COMMENT 'id of property', service array - >> -); + >> COMMENT 'service types and providers' +) TBLPROPERTIES ('comment' = 'This table has array of structs', 'another.comment' = 'This table has no partitions');; WITH test_data as ( @@ -39,6 +39,9 @@ test_data as ( INSERT INTO TABLE db1.array_struct_test select * from test_data; +CREATE MATERIALIZED VIEW db1.struct_test_view_materialized as select * from db1.struct_test; +CREATE VIEW db1.array_struct_test_view as select * from db1.array_struct_test; + CREATE TABLE IF NOT EXISTS db1.nested_struct_test ( property_id INT, @@ -50,9 +53,6 @@ CREATE TABLE IF NOT EXISTS db1.nested_struct_test CREATE TABLE db1.union_test( foo UNIONTYPE, struct, struct> -); +) STORED AS ORC ; -CREATE TABLE db1.map_test( - KeyValue String, - RecordId map -); \ No newline at end of file +CREATE TABLE db1.map_test(KeyValue String, RecordId map); \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json index 4aaefb48d33e1..38b03ce238d1c 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mces_no_db_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -93,7 +98,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -213,7 +219,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -230,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -250,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -265,7 +274,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -361,7 +371,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -378,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -398,7 +410,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +567,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -969,7 +983,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -989,7 +1004,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1004,7 +1020,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1019,7 +1036,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1036,7 +1054,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1053,7 +1072,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1068,7 +1088,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1083,7 +1104,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1215,7 +1237,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1232,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1249,7 +1273,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1269,7 +1294,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1284,7 +1310,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1445,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1435,7 +1463,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1452,7 +1481,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1472,7 +1502,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1487,7 +1518,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1586,7 +1618,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1603,7 +1636,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1620,7 +1654,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1637,7 +1672,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1657,7 +1693,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1677,7 +1714,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1692,7 +1730,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1707,7 +1746,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1724,7 +1764,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1739,7 +1780,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1754,7 +1796,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1874,7 +1917,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1891,7 +1935,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1911,7 +1956,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1926,7 +1972,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2022,7 +2069,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2039,7 +2087,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2059,7 +2108,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2182,7 +2232,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2233,7 +2284,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2253,7 +2305,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2268,7 +2321,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2283,7 +2337,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2300,7 +2355,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2315,7 +2371,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2330,7 +2387,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2390,7 +2448,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2407,7 +2466,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2427,7 +2487,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2442,7 +2503,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2502,7 +2564,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2519,7 +2582,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2539,7 +2603,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2568,7 +2633,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2597,7 +2663,79 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "mysql-test" + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),doubleVal)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),doubleVal)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),path)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),path)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD),urn)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json index 535ce964c6058..b9b2a3b2141a8 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -99,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +154,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +170,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -181,7 +191,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -201,7 +212,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -216,7 +228,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -231,7 +244,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -248,7 +262,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -263,7 +278,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -284,7 +300,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -299,7 +316,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -314,7 +332,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -331,7 +350,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -346,7 +366,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -366,7 +387,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -381,7 +403,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -537,7 +560,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +578,186 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", + "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + }, + { + "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", + "is_view": "True" + }, + "name": "metadata_aspect_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "postgrestest.public.metadata_aspect_view", + "platform": "urn:li:dataPlatform:postgres", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "urn", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=500)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "aspect", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=200)", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", + "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + }, + { + "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -634,31 +837,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", - "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" - }, - { - "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", - "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -675,29 +855,39 @@ "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", - "type": "TRANSFORMED" + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" + ], + "confidenceScore": 1.0 } ] } }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml b/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml index b390d9246677e..2bfa39a65363b 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml +++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_to_file_with_db_estimate_row_count.yml @@ -25,7 +25,7 @@ source: include_field_distinct_value_frequencies: false include_field_histogram: false catch_exceptions: true - include_views: false + include_views: true sink: type: file config: diff --git a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json index bf36a39a8c103..f6fa0a0ed032e 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -99,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +154,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +170,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -181,7 +191,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -196,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -352,7 +364,8 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +382,186 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", + "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + }, + { + "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", + "is_view": "True" + }, + "name": "metadata_aspect_view", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "postgrestest.public.metadata_aspect_view", + "platform": "urn:li:dataPlatform:postgres", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "urn", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=500)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "aspect", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=200)", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": " SELECT metadata_aspect_v2.urn,\n metadata_aspect_v2.aspect\n FROM metadata_aspect_v2\n WHERE (metadata_aspect_v2.version = 0);", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", + "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + }, + { + "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", + "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1646575200000, + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { @@ -391,31 +583,57 @@ }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "upstreamLineage", "aspect": { "json": { - "path": [ + "upstreams": [ { - "id": "urn:li:container:a6097853edba03be190d99ece4b307ff", - "urn": "urn:li:container:a6097853edba03be190d99ece4b307ff" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),aspect)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),aspect)" + ], + "confidenceScore": 1.0 }, { - "id": "urn:li:container:51904fc8cd5cc729bc630decff284525", - "urn": "urn:li:container:51904fc8cd5cc729bc630decff284525" + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_v2,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgrestest.public.metadata_aspect_view,PROD),urn)" + ], + "confidenceScore": 1.0 } ] } }, "systemMetadata": { "lastObserved": 1646575200000, - "runId": "postgres-test" + "runId": "postgres-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml b/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml index a489877d52a23..4a2cc543f2d01 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml +++ b/metadata-ingestion/tests/integration/postgres/postgres_to_file_with_db_estimate_row_count.yml @@ -13,7 +13,7 @@ source: profile_table_row_count_estimate_only: true turn_off_expensive_profiling_metrics: true catch_exceptions: true - include_views: false + include_views: true sink: type: file config: diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py index cd53b8f7db4f6..4b0dd2b1045a3 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py @@ -287,8 +287,9 @@ def test_snowflake_unexpected_snowflake_view_lineage_error_causes_pipeline_warni SnowflakeV2Config, cast(PipelineConfig, snowflake_pipeline_config1).source.config, ) + config.include_table_lineage = True config.include_view_lineage = True - config.incremental_lineage = False + pipeline = Pipeline(snowflake_pipeline_config1) pipeline.run() pipeline.raise_from_status() # pipeline should not fail diff --git a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json index 19961e48b4a33..c43223c68a6b6 100644 --- a/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json +++ b/metadata-ingestion/tests/integration/trino/trino_hive_mces_golden.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -99,7 +104,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -114,7 +120,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -129,7 +136,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -146,7 +154,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -161,7 +170,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -181,7 +191,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -196,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -219,7 +231,7 @@ "numrows": "1", "rawdatasize": "32", "totalsize": "33", - "transient_lastddltime": "1688422059" + "transient_lastddltime": "1698223433" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -315,7 +327,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -332,7 +345,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -356,7 +370,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -371,7 +386,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -392,7 +408,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688422063" + "transient_lastddltime": "1698223435" }, "name": "map_test", "tags": [] @@ -454,7 +470,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -471,7 +488,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -495,7 +513,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -510,7 +529,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -531,7 +551,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688422062" + "transient_lastddltime": "1698223435" }, "name": "nested_struct_test", "tags": [] @@ -642,7 +662,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -659,7 +680,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -683,7 +705,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -698,7 +721,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -714,7 +738,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1688421792" + "transient_lastddltime": "1698223429" }, "name": "pokes", "tags": [] @@ -784,7 +808,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -801,7 +826,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -825,7 +851,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -840,7 +867,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -861,7 +889,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688421808" + "transient_lastddltime": "1698223431" }, "name": "struct_test", "tags": [] @@ -950,7 +978,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -967,7 +996,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -991,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1006,7 +1037,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1024,7 +1056,7 @@ "customProperties": { "numfiles": "0", "totalsize": "0", - "transient_lastddltime": "1688422062" + "transient_lastddltime": "1698223435" }, "name": "struct_test_view_materialized", "tags": [] @@ -1113,7 +1145,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1130,7 +1163,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1154,7 +1188,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1169,7 +1204,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1190,7 +1226,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688421807" + "transient_lastddltime": "1698223431" }, "name": "_test_table_underscore", "tags": [] @@ -1248,7 +1284,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1265,7 +1302,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1289,7 +1327,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1304,7 +1343,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1325,7 +1365,7 @@ "numrows": "0", "rawdatasize": "0", "totalsize": "0", - "transient_lastddltime": "1688422062" + "transient_lastddltime": "1698223435" }, "name": "union_test", "tags": [] @@ -1467,7 +1507,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1484,7 +1525,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1550,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1523,7 +1566,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1539,7 +1583,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastddltime": "1688422062", + "transient_lastddltime": "1698223435", "view_definition": "SELECT \"property_id\", \"service\"\nFROM \"db1\".\"array_struct_test\"", "is_view": "True" }, @@ -1634,7 +1678,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1651,7 +1696,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1668,7 +1714,57 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),property_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),property_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test,PROD),service)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:trino,hivedb.db1.array_struct_test_view,PROD),service)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1692,7 +1788,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "trino-hive-test" + "runId": "trino-hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py index 54a22d860285c..e8485106c6a81 100644 --- a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py @@ -104,6 +104,27 @@ def test_incremental_table_lineage(tmp_path, pytestconfig): ) +def test_incremental_table_lineage_empty_upstreams(tmp_path, pytestconfig): + + urn = make_dataset_urn(platform, "dataset1") + aspect = make_lineage_aspect( + "dataset1", + upstreams=[], + ) + + processed_wus = auto_incremental_lineage( + graph=None, + incremental_lineage=True, + stream=[ + MetadataChangeProposalWrapper( + entityUrn=urn, aspect=aspect, systemMetadata=system_metadata + ).as_workunit() + ], + ) + + assert [wu.metadata for wu in processed_wus] == [] + + @pytest.mark.parametrize( "gms_aspect,current_aspect,output_aspect", [ From a96a512166564cf9c40af4b83e7138dcb48c914d Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Thu, 26 Oct 2023 18:46:10 +0200 Subject: [PATCH 33/80] fix(ingest/bigquery): Fixing lineage filter query (#9114) --- .../ingestion/source/bigquery_v2/bigquery_config.py | 1 + .../datahub/ingestion/source/bigquery_v2/lineage.py | 8 ++++++-- .../src/datahub/ingestion/source/bigquery_v2/usage.py | 10 ++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index 6203192769750..f762d451849ab 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -309,6 +309,7 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict: "dataset_pattern is not set but schema_pattern is set, using schema_pattern as dataset_pattern. schema_pattern will be deprecated, please use dataset_pattern instead." ) values["dataset_pattern"] = schema_pattern + dataset_pattern = schema_pattern elif ( dataset_pattern != AllowDenyPattern.allow_all() and schema_pattern != AllowDenyPattern.allow_all() diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index aa462435b8105..e9acf5ea86044 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -20,6 +20,7 @@ from google.cloud.datacatalog import lineage_v1 from google.cloud.logging_v2.client import Client as GCPLoggingClient +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter import mce_builder from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit @@ -683,8 +684,11 @@ def _create_lineage_map( self.report.num_skipped_lineage_entries_missing_data[e.project_id] += 1 continue - if not self.config.dataset_pattern.allowed( - destination_table.table_identifier.dataset + if not is_schema_allowed( + self.config.dataset_pattern, + destination_table.table_identifier.dataset, + destination_table.table_identifier.project_id, + self.config.match_fully_qualified_names, ) or not self.config.table_pattern.allowed( destination_table.table_identifier.get_table_name() ): diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index 7fc38991e5928..65b559550ffc5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -21,6 +21,7 @@ import humanfriendly +from datahub.configuration.pattern_utils import is_schema_allowed from datahub.configuration.time_window_config import ( BaseTimeWindowConfig, get_time_bucket, @@ -335,10 +336,11 @@ def get_time_window(self) -> Tuple[datetime, datetime]: def _is_table_allowed(self, table_ref: Optional[BigQueryTableRef]) -> bool: return ( table_ref is not None - and self.config.dataset_pattern.allowed( - f"{table_ref.table_identifier.project_id}.{table_ref.table_identifier.dataset}" - if self.config.match_fully_qualified_names - else table_ref.table_identifier.dataset + and is_schema_allowed( + self.config.dataset_pattern, + table_ref.table_identifier.dataset, + table_ref.table_identifier.project_id, + self.config.match_fully_qualified_names, ) and self.config.table_pattern.allowed(str(table_ref.table_identifier)) ) From 852267972c8efc1ceb5a0cbd71594d7ea2529d49 Mon Sep 17 00:00:00 2001 From: "nicholas.fwang" Date: Fri, 27 Oct 2023 01:57:43 +0900 Subject: [PATCH 34/80] refactor(ingestion/mongodb): Add platform_instance to mongodb (#8663) Co-authored-by: Harshal Sheth --- .../src/datahub/ingestion/source/mongodb.py | 16 +++++++++++++--- .../integration/mongodb/mongodb_mces_golden.json | 16 ++++++++-------- .../tests/integration/mongodb/test_mongodb.py | 1 + 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index f02b6845e40b5..890c5c64bd5e6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -11,7 +11,11 @@ from pymongo.mongo_client import MongoClient from datahub.configuration.common import AllowDenyPattern -from datahub.configuration.source_common import EnvConfigMixin +from datahub.configuration.source_common import ( + EnvConfigMixin, + PlatformInstanceConfigMixin, +) +from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -55,7 +59,7 @@ DENY_DATABASE_LIST = set(["admin", "config", "local"]) -class MongoDBConfig(EnvConfigMixin): +class MongoDBConfig(PlatformInstanceConfigMixin, EnvConfigMixin): # See the MongoDB authentication docs for details and examples. # https://pymongo.readthedocs.io/en/stable/examples/authentication.html connect_uri: str = Field( @@ -199,6 +203,7 @@ def construct_schema_pymongo( @platform_name("MongoDB") @config_class(MongoDBConfig) @support_status(SupportStatus.CERTIFIED) +@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @dataclass class MongoDBSource(Source): @@ -320,7 +325,12 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.report.report_dropped(dataset_name) continue - dataset_urn = f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{self.config.env})" + dataset_urn = make_dataset_urn_with_platform_instance( + platform=platform, + name=dataset_name, + env=self.config.env, + platform_instance=self.config.platform_instance, + ) dataset_snapshot = DatasetSnapshot( urn=dataset_urn, diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json index 1f662cfe514e2..e16101b137ac9 100644 --- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json @@ -2,7 +2,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.emptyCollection,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -41,7 +41,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.firstCollection,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -345,7 +345,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.largeCollection,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -3988,7 +3988,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.secondCollection,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { @@ -4135,7 +4135,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.emptyCollection,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4150,7 +4150,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.firstCollection,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4165,7 +4165,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.largeCollection,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4180,7 +4180,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,mngdb.secondCollection,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py index 5228c21223e24..56fb471d4c9f1 100644 --- a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py +++ b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py @@ -25,6 +25,7 @@ def test_mongodb_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time "username": "mongoadmin", "password": "examplepass", "maxDocumentSize": 25000, + "platform_instance": "instance", }, }, "sink": { From ce6f833be444497972f17fd8bfe170f00af4bca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mert=20Tun=C3=A7?= Date: Fri, 27 Oct 2023 01:06:37 +0300 Subject: [PATCH 35/80] fix(kafka-setup): Don't set truststore pass for PEM files (#8656) Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com> --- docker/kafka-setup/kafka-setup.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/kafka-setup/kafka-setup.sh b/docker/kafka-setup/kafka-setup.sh index 629e9bc9484ee..b5024e49e59f1 100755 --- a/docker/kafka-setup/kafka-setup.sh +++ b/docker/kafka-setup/kafka-setup.sh @@ -36,7 +36,9 @@ if [[ $KAFKA_PROPERTIES_SECURITY_PROTOCOL == "SSL" ]]; then fi if [[ -n $KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION ]]; then echo "ssl.truststore.location=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION" >> $CONNECTION_PROPERTIES_PATH - echo "ssl.truststore.password=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD" >> $CONNECTION_PROPERTIES_PATH + if [[ $KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE != "PEM" ]]; then + echo "ssl.truststore.password=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD" >> $CONNECTION_PROPERTIES_PATH + fi if [[ -n $KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE ]]; then echo "ssl.truststore.type=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE" >> $CONNECTION_PROPERTIES_PATH fi From 12f6fe0f906dd21fbc3985cfd13ceac4fc9ac8f0 Mon Sep 17 00:00:00 2001 From: Tony Ouyang Date: Thu, 26 Oct 2023 15:07:36 -0700 Subject: [PATCH 36/80] fix(ingest): Fix roll back failure when REST_API_AUTHORIZATION_ENABLED is set to true (#9092) --- metadata-service/war/src/main/resources/boot/policies.json | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json index 410596cc30cbe..18cb48bfcf1f0 100644 --- a/metadata-service/war/src/main/resources/boot/policies.json +++ b/metadata-service/war/src/main/resources/boot/policies.json @@ -64,7 +64,8 @@ "GET_TIMELINE_PRIVILEGE", "PRODUCE_PLATFORM_EVENT_PRIVILEGE", "MANAGE_DATA_PRODUCTS", - "MANAGE_GLOBAL_OWNERSHIP_TYPES" + "MANAGE_GLOBAL_OWNERSHIP_TYPES", + "DELETE_ENTITY" ], "displayName":"Root User - Edit and View All Resources", "description":"Grants full edit and view privileges for all resources to root 'datahub' root user.", @@ -263,7 +264,8 @@ "GET_ENTITY_PRIVILEGE", "GET_TIMELINE_PRIVILEGE", "PRODUCE_PLATFORM_EVENT_PRIVILEGE", - "MANAGE_DATA_PRODUCTS" + "MANAGE_DATA_PRODUCTS", + "DELETE_ENTITY" ], "displayName":"Admins - Metadata Policy", "description":"Admins have all metadata privileges.", From 1ac831f07aa2bdab555acf50431f6466bb291f61 Mon Sep 17 00:00:00 2001 From: Raj Tekal Date: Thu, 26 Oct 2023 19:33:09 -0400 Subject: [PATCH 37/80] (fix): Avoid java.util.ConcurrentModificationException (#9090) Co-authored-by: Pedro Silva --- .../authorization/DataHubAuthorizer.java | 105 +++++++++++------- 1 file changed, 64 insertions(+), 41 deletions(-) diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java index 4553139e3ca54..e30fb93109915 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authorization/DataHubAuthorizer.java @@ -19,6 +19,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -53,6 +55,7 @@ public enum AuthorizationMode { // Maps privilege name to the associated set of policies for fast access. // Not concurrent data structure because writes are always against the entire thing. private final Map> _policyCache = new HashMap<>(); // Shared Policy Cache. + private final ReadWriteLock _lockPolicyCache = new ReentrantReadWriteLock(); private final ScheduledExecutorService _refreshExecutorService = Executors.newScheduledThreadPool(1); private final PolicyRefreshRunnable _policyRefreshRunnable; @@ -71,7 +74,7 @@ public DataHubAuthorizer( _systemAuthentication = Objects.requireNonNull(systemAuthentication); _mode = Objects.requireNonNull(mode); _policyEngine = new PolicyEngine(systemAuthentication, Objects.requireNonNull(entityClient)); - _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache); + _policyRefreshRunnable = new PolicyRefreshRunnable(systemAuthentication, new PolicyFetcher(entityClient), _policyCache, _lockPolicyCache); _refreshExecutorService.scheduleAtFixedRate(_policyRefreshRunnable, delayIntervalSeconds, refreshIntervalSeconds, TimeUnit.SECONDS); } @@ -90,31 +93,41 @@ public AuthorizationResult authorize(@Nonnull final AuthorizationRequest request Optional resolvedResourceSpec = request.getResourceSpec().map(_entitySpecResolver::resolve); - // 1. Fetch the policies relevant to the requested privilege. - final List policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>()); - - // 2. Evaluate each policy. - for (DataHubPolicyInfo policy : policiesToEvaluate) { - if (isRequestGranted(policy, request, resolvedResourceSpec)) { - // Short circuit if policy has granted privileges to this actor. - return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW, - String.format("Granted by policy with type: %s", policy.getType())); + _lockPolicyCache.readLock().lock(); + try { + // 1. Fetch the policies relevant to the requested privilege. + final List policiesToEvaluate = _policyCache.getOrDefault(request.getPrivilege(), new ArrayList<>()); + + // 2. Evaluate each policy. + for (DataHubPolicyInfo policy : policiesToEvaluate) { + if (isRequestGranted(policy, request, resolvedResourceSpec)) { + // Short circuit if policy has granted privileges to this actor. + return new AuthorizationResult(request, AuthorizationResult.Type.ALLOW, + String.format("Granted by policy with type: %s", policy.getType())); + } } + return new AuthorizationResult(request, AuthorizationResult.Type.DENY, null); + } finally { + _lockPolicyCache.readLock().unlock(); } - return new AuthorizationResult(request, AuthorizationResult.Type.DENY, null); } public List getGrantedPrivileges(final String actor, final Optional resourceSpec) { - // 1. Fetch all policies - final List policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>()); + _lockPolicyCache.readLock().lock(); + try { + // 1. Fetch all policies + final List policiesToEvaluate = _policyCache.getOrDefault(ALL, new ArrayList<>()); - Urn actorUrn = UrnUtils.getUrn(actor); - final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor)); + Urn actorUrn = UrnUtils.getUrn(actor); + final ResolvedEntitySpec resolvedActorSpec = _entitySpecResolver.resolve(new EntitySpec(actorUrn.getEntityType(), actor)); - Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve); + Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve); - return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec); + return _policyEngine.getGrantedPrivileges(policiesToEvaluate, resolvedActorSpec, resolvedResourceSpec); + } finally { + _lockPolicyCache.readLock().unlock(); + } } /** @@ -124,36 +137,42 @@ public List getGrantedPrivileges(final String actor, final Optional resourceSpec) { - // Step 1: Find policies granting the privilege. - final List policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>()); - - Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve); final List authorizedUsers = new ArrayList<>(); final List authorizedGroups = new ArrayList<>(); boolean allUsers = false; boolean allGroups = false; - // Step 2: For each policy, determine whether the resource is a match. - for (DataHubPolicyInfo policy : policiesToEvaluate) { - if (!PoliciesConfig.ACTIVE_POLICY_STATE.equals(policy.getState())) { - // Policy is not active, skip. - continue; - } + _lockPolicyCache.readLock().lock(); + try { + // Step 1: Find policies granting the privilege. + final List policiesToEvaluate = _policyCache.getOrDefault(privilege, new ArrayList<>()); - final PolicyEngine.PolicyActors matchingActors = _policyEngine.getMatchingActors(policy, resolvedResourceSpec); + Optional resolvedResourceSpec = resourceSpec.map(_entitySpecResolver::resolve); - // Step 3: For each matching policy, add actors that are authorized. - authorizedUsers.addAll(matchingActors.getUsers()); - authorizedGroups.addAll(matchingActors.getGroups()); - if (matchingActors.allUsers()) { - allUsers = true; - } - if (matchingActors.allGroups()) { - allGroups = true; + + // Step 2: For each policy, determine whether the resource is a match. + for (DataHubPolicyInfo policy : policiesToEvaluate) { + if (!PoliciesConfig.ACTIVE_POLICY_STATE.equals(policy.getState())) { + // Policy is not active, skip. + continue; + } + + final PolicyEngine.PolicyActors matchingActors = _policyEngine.getMatchingActors(policy, resolvedResourceSpec); + + // Step 3: For each matching policy, add actors that are authorized. + authorizedUsers.addAll(matchingActors.getUsers()); + authorizedGroups.addAll(matchingActors.getGroups()); + if (matchingActors.allUsers()) { + allUsers = true; + } + if (matchingActors.allGroups()) { + allGroups = true; + } } + } finally { + _lockPolicyCache.readLock().unlock(); } - // Step 4: Return all authorized users and groups. return new AuthorizedActors(privilege, authorizedUsers, authorizedGroups, allUsers, allGroups); } @@ -228,6 +247,7 @@ static class PolicyRefreshRunnable implements Runnable { private final Authentication _systemAuthentication; private final PolicyFetcher _policyFetcher; private final Map> _policyCache; + private final ReadWriteLock _lockPolicyCache; @Override public void run() { @@ -253,10 +273,13 @@ public void run() { "Failed to retrieve policy urns! Skipping updating policy cache until next refresh. start: {}, count: {}", start, count, e); return; } - synchronized (_policyCache) { - _policyCache.clear(); - _policyCache.putAll(newCache); - } + } + _lockPolicyCache.writeLock().lock(); + try { + _policyCache.clear(); + _policyCache.putAll(newCache); + } finally { + _lockPolicyCache.writeLock().unlock(); } log.debug(String.format("Successfully fetched %s policies.", total)); } catch (Exception e) { From cf617d77f383a19bd6a9bce00bb2cfbd6a226e55 Mon Sep 17 00:00:00 2001 From: David Sanchez Date: Fri, 27 Oct 2023 07:01:30 +0200 Subject: [PATCH 38/80] Fix(ingest/bigquery): fix extracting comments from complex types (#8950) Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> --- .../datahub/ingestion/source/bigquery_v2/bigquery.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 692d8c4f81bb6..6959a48313010 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -1050,8 +1050,14 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: for idx, field in enumerate(schema_fields): # Remove all the [version=2.0].[type=struct]. tags to get the field path if ( - re.sub(r"\[.*?\]\.", "", field.fieldPath, 0, re.MULTILINE) - == col.field_path + re.sub( + r"\[.*?\]\.", + "", + field.fieldPath.lower(), + 0, + re.MULTILINE, + ) + == col.field_path.lower() ): field.description = col.comment schema_fields[idx] = field From cc7511501b051b9a9f66dbcc4dc4ab16ce2668e5 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 27 Oct 2023 19:42:53 +0900 Subject: [PATCH 39/80] docs: add versions 0.12.0 (#9125) --- docs-website/versions.json | 1 + 1 file changed, 1 insertion(+) diff --git a/docs-website/versions.json b/docs-website/versions.json index a5493c26a4c65..a66607b67ddd5 100644 --- a/docs-website/versions.json +++ b/docs-website/versions.json @@ -1,4 +1,5 @@ [ + "0.12.0", "0.11.0", "0.10.5" ] From 07a5e4c81b9e7b46faf9dbc830c9bac9648e5161 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Fri, 27 Oct 2023 07:52:24 -0400 Subject: [PATCH 40/80] fix(ui) Fix filtering logic for everwhere generating OR filters (#9116) --- .../src/app/search/useGetSearchQueryInputs.ts | 12 ++---- .../utils/__tests__/generateOrFilters.test.ts | 38 ++++++++++++++----- .../src/app/search/utils/generateOrFilters.ts | 16 +++++--- 3 files changed, 43 insertions(+), 23 deletions(-) diff --git a/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts b/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts index 05419e5abed35..9a3af8fb8d56c 100644 --- a/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts +++ b/datahub-web-react/src/app/search/useGetSearchQueryInputs.ts @@ -3,7 +3,7 @@ import { useLocation, useParams } from 'react-router'; import { useMemo } from 'react'; import { FacetFilterInput, EntityType } from '../../types.generated'; import { useEntityRegistry } from '../useEntityRegistry'; -import { ENTITY_FILTER_NAME, FILTER_DELIMITER, UnionType } from './utils/constants'; +import { ENTITY_FILTER_NAME, UnionType } from './utils/constants'; import { useUserContext } from '../context/useUserContext'; import useFilters from './utils/useFilters'; import { generateOrFilters } from './utils/generateOrFilters'; @@ -27,12 +27,6 @@ export default function useGetSearchQueryInputs(excludedFilterFields?: Array = useFilters(params); - const nonNestedFilters = filters.filter( - (f) => !f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field), - ); - const nestedFilters = filters.filter( - (f) => f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field), - ); const entityFilters: Array = useMemo( () => filters @@ -43,8 +37,8 @@ export default function useGetSearchQueryInputs(excludedFilterFields?: Array generateOrFilters(unionType, nonNestedFilters, nestedFilters), - [nonNestedFilters, nestedFilters, unionType], + () => generateOrFilters(unionType, filters, excludedFilterFields), + [filters, excludedFilterFields, unionType], ); return { entityFilters, query, unionType, filters, orFilters, viewUrn, page, activeType, sortInput }; diff --git a/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts b/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts index 505c50efb289f..fd5a5691b454e 100644 --- a/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts +++ b/datahub-web-react/src/app/search/utils/__tests__/generateOrFilters.test.ts @@ -1,7 +1,7 @@ import { DOMAINS_FILTER_NAME, ENTITY_SUB_TYPE_FILTER_NAME, - ENTITY_TYPE_FILTER_NAME, + ENTITY_FILTER_NAME, TAGS_FILTER_NAME, UnionType, } from '../constants'; @@ -10,7 +10,7 @@ import { generateOrFilters } from '../generateOrFilters'; describe('generateOrFilters', () => { it('should generate orFilters with UnionType.AND', () => { const filters = [ - { field: ENTITY_TYPE_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, + { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, ]; const orFilters = generateOrFilters(UnionType.AND, filters); @@ -24,7 +24,7 @@ describe('generateOrFilters', () => { it('should generate orFilters with UnionType.OR', () => { const filters = [ - { field: ENTITY_TYPE_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, + { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, ]; const orFilters = generateOrFilters(UnionType.OR, filters); @@ -43,17 +43,23 @@ describe('generateOrFilters', () => { const filters = [ { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] }, + { field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }, ]; - const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }]; - const orFilters = generateOrFilters(UnionType.AND, filters, nestedFilters); + // const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }]; + const orFilters = generateOrFilters(UnionType.AND, filters); expect(orFilters).toMatchObject([ { - and: [...filters, { field: '_entityType', values: ['CONTAINER'] }], + and: [ + { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, + { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] }, + { field: '_entityType', values: ['CONTAINER'] }, + ], }, { and: [ - ...filters, + { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, + { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] }, { field: '_entityType', values: ['DATASET'] }, { field: 'typeNames', values: ['table'] }, ], @@ -65,9 +71,9 @@ describe('generateOrFilters', () => { const filters = [ { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, { field: DOMAINS_FILTER_NAME, values: ['urn:li:domains:domain1'] }, + { field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }, ]; - const nestedFilters = [{ field: ENTITY_SUB_TYPE_FILTER_NAME, values: ['CONTAINER', 'DATASET␞table'] }]; - const orFilters = generateOrFilters(UnionType.OR, filters, nestedFilters); + const orFilters = generateOrFilters(UnionType.OR, filters); expect(orFilters).toMatchObject([ { @@ -87,4 +93,18 @@ describe('generateOrFilters', () => { }, ]); }); + + it('should generate orFilters and exclude filters with a provided exclude field', () => { + const filters = [ + { field: ENTITY_FILTER_NAME, values: ['DATASET', 'CONTAINER'] }, + { field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }, + ]; + const orFilters = generateOrFilters(UnionType.AND, filters, [ENTITY_FILTER_NAME]); + + expect(orFilters).toMatchObject([ + { + and: [{ field: TAGS_FILTER_NAME, values: ['urn:li:tag:tag1'] }], + }, + ]); + }); }); diff --git a/datahub-web-react/src/app/search/utils/generateOrFilters.ts b/datahub-web-react/src/app/search/utils/generateOrFilters.ts index b665a2e0f0495..fa2939b3436f5 100644 --- a/datahub-web-react/src/app/search/utils/generateOrFilters.ts +++ b/datahub-web-react/src/app/search/utils/generateOrFilters.ts @@ -26,20 +26,26 @@ function generateInputWithNestedFilters(filters: FacetFilterInput[], nestedFilte export function generateOrFilters( unionType: UnionType, filters: FacetFilterInput[], - nestedFilters: FacetFilterInput[] = [], + excludedFilterFields: string[] = [], ): AndFilterInput[] { - if ((filters?.length || 0) === 0 && nestedFilters.length === 0) { + if ((filters?.length || 0) === 0) { return []; } + const nonNestedFilters = filters.filter( + (f) => !f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field), + ); + const nestedFilters = filters.filter( + (f) => f.field.includes(FILTER_DELIMITER) && !excludedFilterFields?.includes(f.field), + ); if (unionType === UnionType.OR) { const orFiltersWithNestedFilters = generateInputWithNestedFilters([], nestedFilters); - const orFilters = filters.map((filter) => ({ + const orFilters = nonNestedFilters.map((filter) => ({ and: [filter], })); return [...orFilters, ...orFiltersWithNestedFilters]; } - const andFiltersWithNestedFilters = generateInputWithNestedFilters(filters, nestedFilters); + const andFiltersWithNestedFilters = generateInputWithNestedFilters(nonNestedFilters, nestedFilters); if (andFiltersWithNestedFilters.length) { return andFiltersWithNestedFilters; @@ -47,7 +53,7 @@ export function generateOrFilters( return [ { - and: filters, + and: nonNestedFilters, }, ]; } From 379ffc8d9457bb86029383e857aaa41eae40f329 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Fri, 27 Oct 2023 17:17:27 +0100 Subject: [PATCH 41/80] build(release): Update files for 0.12.0 release (#9130) --- .../src/app/ingest/source/builder/NameSourceStep.tsx | 2 +- gradle/versioning/versioning.gradle | 2 +- .../tests/cypress/cypress/e2e/mutations/managed_ingestion.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index f4c048bcaf0d2..3092364bb8bdd 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -190,7 +190,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) setVersion(event.target.value)} /> diff --git a/gradle/versioning/versioning.gradle b/gradle/versioning/versioning.gradle index 1fac894d165a8..39a8a3faf8011 100644 --- a/gradle/versioning/versioning.gradle +++ b/gradle/versioning/versioning.gradle @@ -21,7 +21,7 @@ Produces the following variables and supports token replacement import org.apache.tools.ant.filters.ReplaceTokens def detailedVersionString = "0.0.0-unknown-SNAPSHOT" -def cliMajorVersion = "0.10.5" // base default cli major version +def cliMajorVersion = "0.12.0" // base default cli major version def snapshotVersion = false if (project.hasProperty("releaseVersion")) { version = releaseVersion diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js index 24a24cc21138d..3d052695e818f 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js @@ -7,7 +7,7 @@ describe("run managed ingestion", () => { it("create run managed ingestion source", () => { let number = Math.floor(Math.random() * 100000); let testName = `cypress test source ${number}` - let cli_version = "0.10.5.4"; + let cli_version = "0.12.0"; cy.login(); cy.goToIngestionPage(); cy.clickOptionWithText("Create new source"); From 5166d90433123891bc8f9555d4c6660a2b5c1451 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Fri, 27 Oct 2023 22:49:51 +0530 Subject: [PATCH 42/80] fix(ingest/sql-server): update queries to use escaped procedure name (#9127) --- .../ingestion/source/sql/mssql/source.py | 4 +- .../golden_mces_mssql_no_db_to_file.json | 774 +++++++++++------- .../golden_mces_mssql_no_db_with_filter.json | 471 +++++++---- .../golden_mces_mssql_to_file.json | 471 +++++++---- ...golden_mces_mssql_with_lower_case_urn.json | 365 ++++++--- .../integration/sql_server/setup/setup.sql | 2 +- 6 files changed, 1324 insertions(+), 763 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 685d4fb3074c9..710825c8ba55d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -530,7 +530,7 @@ def _get_procedure_inputs( def _get_procedure_code( conn: Connection, procedure: StoredProcedure ) -> Tuple[Optional[str], Optional[str]]: - query = f"EXEC [{procedure.db}].dbo.sp_helptext '{procedure.full_name}'" + query = f"EXEC [{procedure.db}].dbo.sp_helptext '{procedure.escape_full_name}'" try: code_data = conn.execute(query) except ProgrammingError: @@ -567,7 +567,7 @@ def _get_procedure_properties( create_date as date_created, modify_date as date_modified FROM sys.procedures - WHERE object_id = object_id('{procedure.full_name}') + WHERE object_id = object_id('{procedure.escape_full_name}') """ ) properties = {} diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index a495d04c4e398..2fe7a76fd01ae 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -80,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -91,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362", + "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-03-10 16:27:54.970000", - "date_modified": "2023-03-10 16:27:55.097000", + "date_created": "2023-10-27 10:11:55.540000", + "date_modified": "2023-10-27 10:11:55.667000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -110,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -127,22 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -178,7 +187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -193,7 +203,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -245,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +312,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +330,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -348,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -399,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -416,7 +439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -431,7 +455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +476,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -502,7 +530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -590,7 +623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -605,7 +639,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +657,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -637,7 +673,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +694,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +716,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +748,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -725,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -740,7 +782,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -760,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -811,7 +857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -828,7 +875,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -843,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -863,7 +912,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -884,7 +934,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -931,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -946,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -966,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -987,7 +1043,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1002,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1017,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1034,7 +1093,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1109,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,7 +1130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1090,7 +1152,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1105,7 +1168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1120,7 +1184,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1137,7 +1202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1172,7 +1239,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1259,7 +1328,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1276,7 +1346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1321,7 +1393,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1336,7 +1409,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1351,7 +1425,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,7 +1443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1383,7 +1459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1403,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1496,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1491,7 +1570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1613,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1547,7 +1629,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1644,7 +1727,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1661,7 +1745,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1685,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1700,7 +1786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1796,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1813,7 +1901,33 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + }, + { + "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", + "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1830,12 +1944,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -1843,14 +1958,14 @@ "customProperties": { "procedure_depends_on": "{}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n", + "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-03-10 16:27:54.907000", - "date_modified": "2023-03-10 16:27:54.907000" + "date_created": "2023-10-27 10:11:55.460000", + "date_modified": "2023-10-27 10:11:55.460000" }, "externalUrl": "", - "name": "demodata.Foo.DBs", + "name": "demodata.Foo.Proc.With.SpecialChar", "type": { "string": "MSSQL_STORED_PROCEDURE" } @@ -1858,12 +1973,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -1875,31 +1991,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1920,7 +2013,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1935,7 +2029,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2045,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1967,7 +2063,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1982,7 +2079,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2002,7 +2100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2023,7 +2122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2038,7 +2138,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2053,7 +2154,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2070,7 +2172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2085,7 +2188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2105,7 +2209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2126,7 +2231,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2141,7 +2247,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2156,7 +2263,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2173,7 +2281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,7 +2297,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2208,7 +2318,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2228,7 +2339,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2243,7 +2355,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2258,7 +2371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2275,7 +2389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2290,7 +2405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2311,7 +2427,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2326,7 +2443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2341,7 +2459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2358,7 +2477,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2373,7 +2493,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2393,7 +2514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2414,7 +2536,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2429,7 +2552,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2444,7 +2568,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2461,7 +2586,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2476,7 +2602,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2496,7 +2623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2517,7 +2645,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2532,7 +2661,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2547,7 +2677,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2564,7 +2695,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2579,7 +2711,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2599,7 +2732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2620,7 +2754,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2635,7 +2770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2650,7 +2786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2667,7 +2804,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2682,7 +2820,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2702,7 +2841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2723,7 +2863,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2738,7 +2879,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2753,7 +2895,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2770,7 +2913,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2785,7 +2929,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2805,7 +2950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2826,7 +2972,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2841,7 +2988,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2856,7 +3004,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2873,7 +3022,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2888,7 +3038,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2908,7 +3059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2929,7 +3081,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2944,7 +3097,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2959,7 +3113,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2976,7 +3131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2991,7 +3147,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3011,7 +3168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3032,7 +3190,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3047,7 +3206,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3062,7 +3222,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3079,7 +3240,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3094,7 +3256,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3114,7 +3277,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3135,7 +3299,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3150,7 +3315,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3165,7 +3331,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3182,7 +3349,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3197,7 +3365,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3217,7 +3386,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3238,7 +3408,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3253,7 +3424,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3268,7 +3440,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3285,7 +3458,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3300,7 +3474,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3320,7 +3495,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3335,7 +3511,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3407,7 +3584,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3424,7 +3602,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3448,7 +3627,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3469,7 +3649,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3484,7 +3665,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3499,7 +3681,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3516,7 +3699,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3531,7 +3715,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3551,7 +3736,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3566,7 +3752,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3638,7 +3825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3655,7 +3843,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3679,7 +3868,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3694,7 +3884,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3790,7 +3981,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3807,7 +3999,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3831,7 +4024,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3852,7 +4046,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3867,7 +4062,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3882,7 +4078,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3899,7 +4096,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3914,7 +4112,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3934,7 +4133,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3955,7 +4155,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3970,7 +4171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -3985,7 +4187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4002,7 +4205,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4017,7 +4221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4037,7 +4242,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4058,7 +4264,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4073,7 +4280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4088,7 +4296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4105,7 +4314,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4120,27 +4330,34 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:c6627af82d44de89492e1a9315ae9f4b", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", + "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4150,12 +4367,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4165,12 +4383,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4180,27 +4399,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:c6627af82d44de89492e1a9315ae9f4b", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59", - "urn": "urn:li:container:9447d283fb4f95ce7474f1db0179bb59" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 8277ff8bf7e89..c1984828750eb 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -80,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -91,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362", + "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-03-10 16:27:54.970000", - "date_modified": "2023-03-10 16:27:55.097000", + "date_created": "2023-10-27 10:11:55.540000", + "date_modified": "2023-10-27 10:11:55.667000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -110,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -127,22 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -178,7 +187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -193,7 +203,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -245,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +312,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +330,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -348,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -399,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -416,7 +439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -431,7 +455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +476,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -502,7 +530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -590,7 +623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -605,7 +639,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +657,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -637,7 +673,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +694,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +716,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +748,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -725,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -740,7 +782,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -760,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -811,7 +857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -828,7 +875,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -843,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -863,7 +912,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -884,7 +934,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -931,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -946,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -966,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -987,7 +1043,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1002,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1017,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1034,7 +1093,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1109,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,7 +1130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1090,7 +1152,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1105,7 +1168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1120,7 +1184,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1137,7 +1202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1172,7 +1239,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1259,7 +1328,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1276,7 +1346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1321,7 +1393,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1336,7 +1409,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1351,7 +1425,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,7 +1443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1383,7 +1459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1403,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1496,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1491,7 +1570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1613,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1547,7 +1629,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1644,7 +1727,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1661,7 +1745,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1685,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1700,7 +1786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1796,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1813,7 +1901,33 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + }, + { + "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", + "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1830,12 +1944,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -1843,14 +1958,14 @@ "customProperties": { "procedure_depends_on": "{}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n", + "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-03-10 16:27:54.907000", - "date_modified": "2023-03-10 16:27:54.907000" + "date_created": "2023-10-27 10:11:55.460000", + "date_modified": "2023-10-27 10:11:55.460000" }, "externalUrl": "", - "name": "demodata.Foo.DBs", + "name": "demodata.Foo.Proc.With.SpecialChar", "type": { "string": "MSSQL_STORED_PROCEDURE" } @@ -1858,12 +1973,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -1875,31 +1991,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1920,7 +2013,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1935,7 +2029,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2045,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1967,7 +2063,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1982,7 +2079,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2002,7 +2100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2023,7 +2122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2038,7 +2138,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2053,7 +2154,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2070,7 +2172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2085,7 +2188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2105,7 +2209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2126,7 +2231,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2141,7 +2247,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2156,7 +2263,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2173,7 +2281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,27 +2297,34 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2218,12 +2334,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2233,12 +2350,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2248,27 +2366,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index f3714bba6364d..804a8d74d0d51 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -80,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -91,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1df94c0f-15fd-4b68-8ca3-6053a0332362", + "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-03-10 16:27:54.970000", - "date_modified": "2023-03-10 16:27:55.097000", + "date_created": "2023-10-27 10:11:55.540000", + "date_modified": "2023-10-27 10:11:55.667000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -110,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -127,22 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -178,7 +187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -193,7 +203,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -245,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +312,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +330,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -348,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -399,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -416,7 +439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -431,7 +455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +476,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -502,7 +530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -590,7 +623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -605,7 +639,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +657,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -637,7 +673,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +694,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +716,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +748,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -725,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -740,7 +782,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -760,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -811,7 +857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -828,7 +875,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -843,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -863,7 +912,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -884,7 +934,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -931,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -946,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -966,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -987,7 +1043,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1002,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1017,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1034,7 +1093,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1109,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,7 +1130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1090,7 +1152,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1105,7 +1168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1120,7 +1184,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1137,7 +1202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1172,7 +1239,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1259,7 +1328,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1276,7 +1346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1321,7 +1393,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1336,7 +1409,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1351,7 +1425,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,7 +1443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1383,7 +1459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1403,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1496,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1491,7 +1570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1613,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1547,7 +1629,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1644,7 +1727,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1661,7 +1745,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1685,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1700,7 +1786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1796,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1813,7 +1901,33 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + }, + { + "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", + "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1830,12 +1944,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -1843,14 +1958,14 @@ "customProperties": { "procedure_depends_on": "{}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n", + "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-03-10 16:27:54.907000", - "date_modified": "2023-03-10 16:27:54.907000" + "date_created": "2023-10-27 10:11:55.460000", + "date_modified": "2023-10-27 10:11:55.460000" }, "externalUrl": "", - "name": "demodata.Foo.DBs", + "name": "demodata.Foo.Proc.With.SpecialChar", "type": { "string": "MSSQL_STORED_PROCEDURE" } @@ -1858,12 +1973,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -1875,31 +1991,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - }, - { - "id": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671", - "urn": "urn:li:container:6e5c6d608d0a2dcc4eb03591382e5671" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1920,7 +2013,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1935,7 +2029,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2045,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1967,7 +2063,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1982,7 +2079,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2002,7 +2100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2023,7 +2122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2038,7 +2138,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2053,7 +2154,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2070,7 +2172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2085,7 +2188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2105,7 +2209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2126,7 +2231,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2141,7 +2247,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2156,7 +2263,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2173,7 +2281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,27 +2297,34 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", + "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" + } + ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2218,12 +2334,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2233,12 +2350,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.Weekly Demo Data Backup,PROD),localhost.Weekly Demo Data Backup)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2248,27 +2366,24 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { - "entityType": "container", - "entityUrn": "urn:li:container:3f157d8292fb473142f19e2250af537f", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5", - "urn": "urn:li:container:b7062d1c0c650d9de0f7a9a5de00b1b5" - } - ] + "removed": false } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index d25d23daae2ea..9d1b288057a16 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -16,7 +16,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -31,7 +32,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -46,7 +48,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -63,7 +66,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -78,7 +82,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -95,7 +100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -106,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "b6a0c1e2-f90a-4c86-a226-bf7ca59ad79f", + "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-08-06 21:01:05.157000", - "date_modified": "2023-08-06 21:01:05.283000", + "date_created": "2023-10-27 10:11:55.540000", + "date_modified": "2023-10-27 10:11:55.667000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -125,7 +131,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -142,7 +149,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -163,7 +171,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -178,7 +187,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -193,7 +203,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -210,7 +221,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -225,7 +237,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -245,7 +258,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -281,7 +296,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -296,7 +312,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -313,7 +330,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -328,7 +346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -348,7 +367,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -369,7 +389,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -384,7 +405,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -399,7 +421,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -416,7 +439,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -431,7 +455,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -451,7 +476,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -472,7 +498,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -487,7 +514,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -502,7 +530,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -519,7 +548,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,7 +564,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -554,7 +585,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -575,7 +607,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -590,7 +623,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -605,7 +639,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -622,7 +657,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -637,7 +673,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -657,7 +694,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -678,7 +716,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -693,7 +732,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -708,7 +748,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -725,7 +766,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -740,7 +782,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -760,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +825,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +841,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -811,7 +857,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -828,7 +875,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -843,7 +891,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -863,7 +912,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -884,7 +934,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -899,7 +950,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -914,7 +966,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -931,7 +984,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -946,7 +1000,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -966,7 +1021,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -987,7 +1043,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1002,7 +1059,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1017,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1034,7 +1093,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1049,7 +1109,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1069,7 +1130,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1090,7 +1152,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1105,7 +1168,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1120,7 +1184,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1137,7 +1202,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1152,7 +1218,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1172,7 +1239,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1187,7 +1255,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1259,7 +1328,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1276,7 +1346,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1300,7 +1371,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1321,7 +1393,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1336,7 +1409,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1351,7 +1425,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,7 +1443,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1383,7 +1459,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1403,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1418,7 +1496,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1491,7 +1570,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,7 +1588,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1532,7 +1613,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1547,7 +1629,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1644,7 +1727,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1661,7 +1745,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1685,7 +1770,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1700,7 +1786,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1796,7 +1883,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1813,7 +1901,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1837,7 +1926,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1854,12 +1944,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInfo", "aspect": { @@ -1867,14 +1958,14 @@ "customProperties": { "procedure_depends_on": "{}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE Foo.DBs @ID INT\nAS\n SELECT @ID AS ThatDB;\n", + "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-08-06 21:01:05.093000", - "date_modified": "2023-08-06 21:01:05.093000" + "date_created": "2023-10-27 10:11:55.460000", + "date_modified": "2023-10-27 10:11:55.460000" }, "externalUrl": "", - "name": "demodata.Foo.DBs", + "name": "demodata.Foo.Proc.With.SpecialChar", "type": { "string": "MSSQL_STORED_PROCEDURE" } @@ -1882,12 +1973,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { @@ -1899,7 +1991,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1920,7 +2013,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1935,7 +2029,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1950,7 +2045,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1967,7 +2063,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1982,7 +2079,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2002,7 +2100,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2023,7 +2122,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2038,7 +2138,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2053,7 +2154,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2070,7 +2172,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2085,7 +2188,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2105,7 +2209,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2126,7 +2231,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2141,7 +2247,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2156,7 +2263,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2173,7 +2281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2188,7 +2297,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2208,7 +2318,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2223,7 +2334,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2238,7 +2350,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2253,12 +2366,13 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),DBs)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,localhost.demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2268,7 +2382,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mssql-test" + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index c1347a7c8caca..a17d52f9a39b1 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -45,7 +45,7 @@ CREATE TABLE Foo.SalesReason ) ; GO -CREATE PROCEDURE Foo.DBs @ID INT +CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT AS SELECT @ID AS ThatDB; GO From 8b1d2094aa768d1f795a2d240de888e65c26f6fc Mon Sep 17 00:00:00 2001 From: richenc <125420929+richenc@users.noreply.github.com> Date: Fri, 27 Oct 2023 10:36:47 -0700 Subject: [PATCH 43/80] feat(airflow): retry callback, support ExternalTaskSensor subclasses (#8514) Co-authored-by: Richie Chen Co-authored-by: Harshal Sheth --- .../client/airflow_generator.py | 7 +++- .../datahub_plugin_v22.py | 36 ++++++++++++++++++- .../integration/goldens/v1_basic_iolets.json | 7 +++- .../integration/goldens/v1_simple_dag.json | 14 ++++++-- .../integration/goldens/v2_basic_iolets.json | 7 +++- .../v2_basic_iolets_no_dag_listener.json | 7 +++- .../integration/goldens/v2_simple_dag.json | 12 +++++-- .../v2_simple_dag_no_dag_listener.json | 14 ++++++-- .../goldens/v2_snowflake_operator.json | 7 +++- .../goldens/v2_sqlite_operator.json | 27 +++++++++++--- .../v2_sqlite_operator_no_dag_listener.json | 35 +++++++++++++++--- 11 files changed, 151 insertions(+), 22 deletions(-) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py index 16585f70e820b..e1d53be7bae6b 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py @@ -98,7 +98,7 @@ def _get_dependencies( # It is possible to tie an external sensor to DAG if external_task_id is omitted but currently we can't tie # jobflow to anothet jobflow. external_task_upstreams = [] - if task.task_type == "ExternalTaskSensor": + if isinstance(task, ExternalTaskSensor): task = cast(ExternalTaskSensor, task) if hasattr(task, "external_task_id") and task.external_task_id is not None: external_task_upstreams = [ @@ -155,6 +155,8 @@ def generate_dataflow( "_concurrency", # "_default_view", "catchup", + "description", + "doc_md", "fileloc", "is_paused_upon_creation", "start_date", @@ -431,6 +433,9 @@ def run_datajob( job_property_bag["operator"] = str(ti.operator) job_property_bag["priority_weight"] = str(ti.priority_weight) job_property_bag["log_url"] = ti.log_url + job_property_bag["orchestrator"] = "airflow" + job_property_bag["dag_id"] = str(dag.dag_id) + job_property_bag["task_id"] = str(ti.task_id) dpi.properties.update(job_property_bag) dpi.url = ti.log_url diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py index 046fbb5efaa03..f9a2119f51e32 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py @@ -23,6 +23,7 @@ TASK_ON_FAILURE_CALLBACK = "on_failure_callback" TASK_ON_SUCCESS_CALLBACK = "on_success_callback" +TASK_ON_RETRY_CALLBACK = "on_retry_callback" def get_task_inlets_advanced(task: BaseOperator, context: Any) -> Iterable[Any]: @@ -259,6 +260,28 @@ def custom_on_success_callback(context): return custom_on_success_callback +def _wrap_on_retry_callback(on_retry_callback): + def custom_on_retry_callback(context): + config = get_lineage_config() + if config.enabled: + context["_datahub_config"] = config + try: + datahub_task_status_callback( + context, status=InstanceRunResult.UP_FOR_RETRY + ) + except Exception as e: + if not config.graceful_exceptions: + raise e + else: + print(f"Exception: {traceback.format_exc()}") + + # Call original policy + if on_retry_callback: + on_retry_callback(context) + + return custom_on_retry_callback + + def task_policy(task: Union[BaseOperator, MappedOperator]) -> None: task.log.debug(f"Setting task policy for Dag: {task.dag_id} Task: {task.task_id}") # task.add_inlets(["auto"]) @@ -274,7 +297,14 @@ def task_policy(task: Union[BaseOperator, MappedOperator]) -> None: on_success_callback_prop: property = getattr( MappedOperator, TASK_ON_SUCCESS_CALLBACK ) - if not on_failure_callback_prop.fset or not on_success_callback_prop.fset: + on_retry_callback_prop: property = getattr( + MappedOperator, TASK_ON_RETRY_CALLBACK + ) + if ( + not on_failure_callback_prop.fset + or not on_success_callback_prop.fset + or not on_retry_callback_prop.fset + ): task.log.debug( "Using MappedOperator's partial_kwargs instead of callback properties" ) @@ -284,10 +314,14 @@ def task_policy(task: Union[BaseOperator, MappedOperator]) -> None: task.partial_kwargs[TASK_ON_SUCCESS_CALLBACK] = _wrap_on_success_callback( task.on_success_callback ) + task.partial_kwargs[TASK_ON_RETRY_CALLBACK] = _wrap_on_retry_callback( + task.on_retry_callback + ) return task.on_failure_callback = _wrap_on_failure_callback(task.on_failure_callback) # type: ignore task.on_success_callback = _wrap_on_success_callback(task.on_success_callback) # type: ignore + task.on_retry_callback = _wrap_on_retry_callback(task.on_retry_callback) # type: ignore # task.pre_execute = _wrap_pre_execution(task.pre_execute) diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json index 26aa2afaa831a..a4c17c73e9c7e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_basic_iolets.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -373,7 +375,10 @@ "state": "success", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets", + "orchestrator": "airflow", + "dag_id": "basic_iolets", + "task_id": "run_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets", "name": "basic_iolets_run_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json index b2e3a1fe47da7..a0a95716a0993 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v1_simple_dag.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -302,7 +304,10 @@ "state": "success", "operator": "BashOperator", "priority_weight": "2", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "task_1" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag", "name": "simple_dag_task_1_manual_run_test", @@ -433,6 +438,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -654,7 +661,10 @@ "state": "success", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "run_another_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag", "name": "simple_dag_run_another_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json index 2e733c2ad40a9..1974f1f085df0 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -224,7 +226,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1", + "orchestrator": "airflow", + "dag_id": "basic_iolets", + "task_id": "run_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1", "name": "basic_iolets_run_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json index 44b288efda954..d02951bc9e82d 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/basic_iolets.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -224,7 +226,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1", + "orchestrator": "airflow", + "dag_id": "basic_iolets", + "task_id": "run_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_data_task&dag_id=basic_iolets&map_index=-1", "name": "basic_iolets_run_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json index 454c509279e11..9acc47ec1321e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -189,7 +191,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "2", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "task_1" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1", "name": "simple_dag_task_1_manual_run_test", @@ -523,7 +528,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "run_another_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1", "name": "simple_dag_run_another_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json index 73b5765e96b7d..03299c483f57f 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -189,7 +191,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "2", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "task_1" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=task_1&dag_id=simple_dag&map_index=-1", "name": "simple_dag_task_1_manual_run_test", @@ -435,6 +440,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "'A simple DAG that runs a few fake data tasks.'", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/simple_dag.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -579,7 +586,10 @@ "state": "running", "operator": "BashOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1", + "orchestrator": "airflow", + "dag_id": "simple_dag", + "task_id": "run_another_data_task" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=run_another_data_task&dag_id=simple_dag&map_index=-1", "name": "simple_dag_run_another_data_task_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json index affc395d421da..11a0b17b45b95 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/snowflake_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -234,7 +236,10 @@ "state": "running", "operator": "SnowflakeOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "snowflake_operator", + "task_id": "transform_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=snowflake_operator&map_index=-1", "name": "snowflake_operator_transform_cost_table_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json index 81d0a71b651d9..19e4aac9fb95e 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -201,7 +203,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "5", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "create_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_create_cost_table_manual_run_test", @@ -562,7 +567,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "4", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "populate_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_populate_cost_table_manual_run_test", @@ -922,7 +930,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "3", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "transform_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_transform_cost_table_manual_run_test", @@ -1364,7 +1375,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "cleanup_costs" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_cleanup_costs_manual_run_test", @@ -1658,7 +1672,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "cleanup_processed_costs" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_cleanup_processed_costs_manual_run_test", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json index 96a0f02ccec17..b67464b385335 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json @@ -9,6 +9,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -201,7 +203,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "5", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "create_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=create_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_create_cost_table_manual_run_test", @@ -460,6 +465,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -617,7 +624,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "4", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "populate_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=populate_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_populate_cost_table_manual_run_test", @@ -805,6 +815,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -1032,7 +1044,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "3", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "transform_cost_table" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=transform_cost_table&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_transform_cost_table_manual_run_test", @@ -1370,6 +1385,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -1529,7 +1546,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "cleanup_costs" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_costs&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_cleanup_costs_manual_run_test", @@ -1719,6 +1739,8 @@ "customProperties": { "_access_control": "None", "catchup": "False", + "description": "None", + "doc_md": "None", "fileloc": "'/Users/hsheth/projects/datahub/metadata-ingestion-modules/airflow-plugin/tests/integration/dags/sqlite_operator.py'", "is_paused_upon_creation": "None", "start_date": "DateTime(2023, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))", @@ -1878,7 +1900,10 @@ "state": "running", "operator": "SqliteOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1" + "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1", + "orchestrator": "airflow", + "dag_id": "sqlite_operator", + "task_id": "cleanup_processed_costs" }, "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=cleanup_processed_costs&dag_id=sqlite_operator&map_index=-1", "name": "sqlite_operator_cleanup_processed_costs_manual_run_test", From 649f6d031789252fb9ac97d932fd71396f4875f2 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Sat, 28 Oct 2023 04:02:43 +0900 Subject: [PATCH 44/80] docs: fix saasonly flags for some pages (#9124) --- docs-website/sidebars.js | 29 +++++++++++++++++++----- docs/managed-datahub/chrome-extension.md | 2 -- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 31d69aec46d8b..39eaea57444ed 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -442,11 +442,29 @@ module.exports = { }, "docs/act-on-metadata/impact-analysis", { - Observability: [ - "docs/managed-datahub/observe/freshness-assertions", - "docs/managed-datahub/observe/volume-assertions", - "docs/managed-datahub/observe/custom-sql-assertions", - "docs/managed-datahub/observe/column-assertions", + label: "Observability", + type: "category", + items: [ + { + type: "doc", + id: "docs/managed-datahub/observe/freshness-assertions", + className: "saasOnly", + }, + { + type: "doc", + id: "docs/managed-datahub/observe/volume-assertions", + className: "saasOnly", + }, + { + type: "doc", + id: "docs/managed-datahub/observe/custom-sql-assertions", + className: "saasOnly", + }, + { + type: "doc", + id: "docs/managed-datahub/observe/column-assertions", + className: "saasOnly", + }, ], }, { @@ -606,7 +624,6 @@ module.exports = { { type: "doc", id: "docs/managed-datahub/chrome-extension", - className: "saasOnly", }, { "Managed DataHub Release History": [ diff --git a/docs/managed-datahub/chrome-extension.md b/docs/managed-datahub/chrome-extension.md index 0aa0860d03b67..a4560bc8cc09b 100644 --- a/docs/managed-datahub/chrome-extension.md +++ b/docs/managed-datahub/chrome-extension.md @@ -1,10 +1,8 @@ --- description: Learn how to upload and use the Acryl DataHub Chrome extension (beta) locally before it's available on the Chrome store. --- -import FeatureAvailability from '@site/src/components/FeatureAvailability'; # Acryl DataHub Chrome Extension - ## Installing the Extension From e02b9096bd68c14944e640dbd3a235651ecebbaf Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Sat, 28 Oct 2023 00:33:43 +0530 Subject: [PATCH 45/80] fix(ingest/snowflake): missing view downstream cll if platform instance is set (#8966) --- .../source/snowflake/snowflake_lineage_v2.py | 75 +- .../tests/integration/snowflake/common.py | 2 +- .../snowflake/snowflake_golden.json | 706 ++++-- .../snowflake_privatelink_golden.json | 2075 ++++++++++++----- .../integration/snowflake/test_snowflake.py | 5 +- 5 files changed, 2055 insertions(+), 808 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 9649054dbe6cb..4219533dc217c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -20,12 +20,12 @@ import datahub.emitter.mce_builder as builder from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.sql_parsing_builder import SqlParsingBuilder from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.aws.s3_util import make_s3_urn_for_lineage from datahub.ingestion.source.snowflake.constants import ( LINEAGE_PERMISSION_ERROR, SnowflakeEdition, - SnowflakeObjectDomain, ) from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery @@ -53,7 +53,6 @@ sqlglot_lineage, ) from datahub.utilities.time import ts_millis_to_datetime -from datahub.utilities.urns.dataset_urn import DatasetUrn logger: logging.Logger = logging.getLogger(__name__) @@ -195,20 +194,6 @@ def get_table_upstream_workunits( f"Upstream lineage detected for {self.report.num_tables_with_upstreams} tables.", ) - def _gen_workunit_from_sql_parsing_result( - self, - dataset_identifier: str, - result: SqlParsingResult, - ) -> Iterable[MetadataWorkUnit]: - upstreams, fine_upstreams = self.get_upstreams_from_sql_parsing_result( - self.dataset_urn_builder(dataset_identifier), result - ) - if upstreams: - self.report.num_views_with_upstreams += 1 - yield self._create_upstream_lineage_workunit( - dataset_identifier, upstreams, fine_upstreams - ) - def _gen_workunits_from_query_result( self, discovered_assets: Collection[str], @@ -242,18 +227,31 @@ def get_view_upstream_workunits( schema_resolver: SchemaResolver, view_definitions: MutableMapping[str, str], ) -> Iterable[MetadataWorkUnit]: - views_processed = set() + views_failed_parsing = set() if self.config.include_view_column_lineage: with PerfTimer() as timer: + builder = SqlParsingBuilder( + generate_lineage=True, + generate_usage_statistics=False, + generate_operations=False, + ) for view_identifier, view_definition in view_definitions.items(): result = self._run_sql_parser( view_identifier, view_definition, schema_resolver ) - if result: - views_processed.add(view_identifier) - yield from self._gen_workunit_from_sql_parsing_result( - view_identifier, result + if result and result.out_tables: + self.report.num_views_with_upstreams += 1 + # This does not yield any workunits but we use + # yield here to execute this method + yield from builder.process_sql_parsing_result( + result=result, + query=view_definition, + is_view_ddl=True, ) + else: + views_failed_parsing.add(view_identifier) + + yield from builder.gen_workunits() self.report.view_lineage_parse_secs = timer.elapsed_seconds() with PerfTimer() as timer: @@ -261,7 +259,7 @@ def get_view_upstream_workunits( if results: yield from self._gen_workunits_from_query_result( - set(discovered_views) - views_processed, + views_failed_parsing, results, upstream_for_view=True, ) @@ -349,39 +347,6 @@ def get_upstreams_from_query_result_row( return upstreams, fine_upstreams - def get_upstreams_from_sql_parsing_result( - self, downstream_table_urn: str, result: SqlParsingResult - ) -> Tuple[List[UpstreamClass], List[FineGrainedLineage]]: - # Note: This ignores the out_tables section of the sql parsing result. - upstreams = [ - UpstreamClass(dataset=upstream_table_urn, type=DatasetLineageTypeClass.VIEW) - for upstream_table_urn in set(result.in_tables) - ] - - # Maps downstream_col -> [upstream_col] - fine_lineage: Dict[str, Set[SnowflakeColumnId]] = defaultdict(set) - for column_lineage in result.column_lineage or []: - out_column = column_lineage.downstream.column - for upstream_column_info in column_lineage.upstreams: - upstream_table_name = DatasetUrn.create_from_string( - upstream_column_info.table - ).get_dataset_name() - fine_lineage[out_column].add( - SnowflakeColumnId( - columnName=upstream_column_info.column, - objectName=upstream_table_name, - objectDomain=SnowflakeObjectDomain.VIEW.value, - ) - ) - fine_upstreams = [ - self.build_finegrained_lineage( - downstream_table_urn, downstream_col, upstream_cols - ) - for downstream_col, upstream_cols in fine_lineage.items() - ] - - return upstreams, list(filter(None, fine_upstreams)) - def _populate_external_lineage_map(self, discovered_tables: List[str]) -> None: with PerfTimer() as timer: self.report.num_external_table_edges_scanned = 0 diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py index 81e307a78ae9e..ff448eca01071 100644 --- a/metadata-ingestion/tests/integration/snowflake/common.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -94,7 +94,7 @@ def default_query_results( # noqa: C901 "name": "VIEW_{}".format(view_idx), "created_on": datetime(2021, 6, 8, 0, 0, 0, 0), "comment": "Comment for View", - "text": None, + "text": f"create view view_{view_idx} as select * from table_{view_idx}", } for view_idx in range(1, num_views + 1) ] diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json index a424b258e68ff..c7273fee5a2e5 100644 --- a/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_golden.json @@ -24,7 +24,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -39,7 +40,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -54,7 +56,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -71,7 +74,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -90,7 +94,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -105,7 +110,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -121,7 +127,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -150,7 +157,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -165,7 +173,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -180,7 +189,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -197,7 +207,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -216,7 +227,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -231,7 +243,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -251,7 +264,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -266,7 +280,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -462,7 +477,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -488,7 +504,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -503,7 +520,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -520,7 +538,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -544,7 +563,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -559,7 +579,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -755,7 +776,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -781,7 +803,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -796,7 +819,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -813,7 +837,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -837,7 +862,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -852,7 +878,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1048,7 +1075,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1074,7 +1102,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -1089,7 +1118,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1106,7 +1136,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1130,7 +1161,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1145,7 +1177,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1341,7 +1374,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1367,7 +1401,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -1382,7 +1417,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1399,7 +1435,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1423,7 +1460,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1438,7 +1476,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1634,7 +1673,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1660,7 +1700,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -1675,7 +1716,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1692,7 +1734,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1716,7 +1759,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1731,7 +1775,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1927,7 +1972,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1953,7 +1999,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -1968,7 +2015,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -1985,7 +2033,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2009,7 +2058,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2024,7 +2074,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2220,7 +2271,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2246,7 +2298,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -2261,7 +2314,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2278,7 +2332,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2302,7 +2357,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2317,7 +2373,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2513,7 +2570,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2539,7 +2597,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -2554,7 +2613,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2571,7 +2631,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2595,7 +2656,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2610,7 +2672,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2806,7 +2869,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2832,7 +2896,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -2847,7 +2912,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2864,7 +2930,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2888,7 +2955,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -2903,7 +2971,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3099,7 +3168,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3125,7 +3195,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -3140,7 +3211,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3157,7 +3229,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3181,7 +3254,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3197,7 +3271,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3212,7 +3287,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3382,7 +3458,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3408,7 +3485,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -3423,7 +3501,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3440,7 +3519,26 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_1 as select * from table_1", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2023_10_06-17_59_03", + "lastRunId": "no-run-id-provided" } }, { @@ -3464,7 +3562,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3480,7 +3579,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3496,7 +3596,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3512,7 +3613,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3527,7 +3629,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3690,7 +3793,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3716,7 +3820,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "snowflake-2023_08_04-09_52_28" + "runId": "snowflake-2023_08_04-09_52_28", + "lastRunId": "no-run-id-provided" } }, { @@ -3731,7 +3836,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3748,7 +3854,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3773,7 +3880,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3797,7 +3905,26 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_2 as select * from table_2", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2023_10_06-17_59_03", + "lastRunId": "no-run-id-provided" } }, { @@ -3819,7 +3946,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3841,7 +3969,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3863,7 +3992,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3885,7 +4015,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3907,7 +4038,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3929,7 +4061,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3951,7 +4084,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3973,7 +4107,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -3995,7 +4130,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4017,7 +4153,145 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_10)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_4)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_5)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_6)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_7)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_8)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_1,PROD),col_9)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "snowflake-2023_10_06-17_59_03", + "lastRunId": "no-run-id-provided" } }, { @@ -4034,14 +4308,127 @@ "actor": "urn:li:corpuser:unknown" }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_10)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_4)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_5)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_6)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_7)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_8)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.view_2,PROD),col_9)" + ], + "confidenceScore": 1.0 } ] } }, "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "lastObserved": 1615443388097, + "runId": "snowflake-2023_10_06-17_59_03", + "lastRunId": "no-run-id-provided" } }, { @@ -4204,7 +4591,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4340,7 +4728,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4476,7 +4865,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4612,7 +5002,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4748,7 +5139,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -4884,7 +5276,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5020,7 +5413,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5156,7 +5550,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5292,7 +5687,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5428,7 +5824,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5456,7 +5853,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5484,7 +5882,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5512,7 +5911,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5540,7 +5940,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5568,7 +5969,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5596,7 +5998,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5624,7 +6027,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5652,7 +6056,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5680,7 +6085,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5708,7 +6114,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5736,7 +6143,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5764,7 +6172,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5786,7 +6195,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5808,7 +6218,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5830,7 +6241,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5852,7 +6264,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5874,7 +6287,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5896,7 +6310,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5918,7 +6333,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5940,7 +6356,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5962,7 +6379,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5984,7 +6402,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -5999,7 +6418,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6014,7 +6434,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6029,7 +6450,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6044,7 +6466,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } }, { @@ -6059,7 +6482,8 @@ }, "systemMetadata": { "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00" + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json index 5057dacd5b0c8..5e55860483d24 100644 --- a/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json +++ b/metadata-ingestion/tests/integration/snowflake/snowflake_privatelink_golden.json @@ -1,13 +1,14 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { "json": { "customProperties": { "platform": "snowflake", + "instance": "instance1", "env": "PROD", "database": "test_db" }, @@ -29,13 +30,14 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { @@ -46,12 +48,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:snowflake" + "removed": false } }, "systemMetadata": { @@ -61,15 +63,13 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "status", "aspect": { "json": { - "typeNames": [ - "Database" - ] + "removed": false } }, "systemMetadata": { @@ -80,12 +80,14 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "subTypes", "aspect": { "json": { - "path": [] + "typeNames": [ + "Database" + ] } }, "systemMetadata": { @@ -96,26 +98,17 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", - "aspectName": "containerProperties", + "aspectName": "browsePathsV2", "aspect": { "json": { - "customProperties": { - "platform": "snowflake", - "env": "PROD", - "database": "test_db", - "schema": "test_schema" - }, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/", - "name": "TEST_SCHEMA", - "description": "comment for TEST_DB.TEST_SCHEMA", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - } + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + ] } }, "systemMetadata": { @@ -126,12 +119,13 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityUrn": "urn:li:container:900b1327253068cb1537b1b3c807ddab", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "removed": false + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { @@ -141,13 +135,24 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:snowflake" + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/", + "name": "TABLE_3", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { @@ -157,14 +162,14 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Schema" + "Table" ] } }, @@ -175,32 +180,24 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } @@ -212,13 +209,28 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "containerProperties", "aspect": { "json": { - "removed": false + "customProperties": { + "platform": "snowflake", + "instance": "instance1", + "env": "PROD", + "database": "test_db", + "schema": "test_schema" + }, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/", + "name": "TEST_SCHEMA", + "description": "comment for TEST_DB.TEST_SCHEMA", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + } } }, "systemMetadata": { @@ -229,12 +241,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_1", + "schemaName": "test_db.test_schema.table_3", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -392,24 +404,13 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "status", "aspect": { "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/", - "name": "TABLE_1", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] + "removed": false } }, "systemMetadata": { @@ -420,12 +421,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_3,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { @@ -435,14 +436,14 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { "json": { "typeNames": [ - "Table" + "Schema" ] } }, @@ -453,20 +454,53 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" } ] } @@ -479,7 +513,85 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_1 as select * from table_1", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_1/", + "name": "VIEW_1", + "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_1", + "description": "Comment for View", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -495,12 +607,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_2", + "schemaName": "test_db.test_schema.table_1", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -659,23 +771,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/", - "name": "TABLE_2", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -686,12 +789,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { @@ -702,39 +805,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_1/", + "name": "TABLE_1", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_1", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { @@ -745,12 +832,25 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] } }, "systemMetadata": { @@ -761,12 +861,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_3", + "schemaName": "test_db.test_schema.view_1", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -925,23 +1025,43 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "subTypes", "aspect": { "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_3/", - "name": "TABLE_3", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_3", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] } }, "systemMetadata": { @@ -952,12 +1072,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { @@ -968,14 +1088,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { @@ -986,19 +1105,131 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "upstreamLineage", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, + "upstreams": [ { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_10)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_4)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_5)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_6)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_7)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_8)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_1,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_1,PROD),col_9)" + ], + "confidenceScore": 1.0 } ] } @@ -1011,7 +1242,40 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1027,12 +1291,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_4", + "schemaName": "test_db.test_schema.table_2", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -1191,15 +1455,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/", - "name": "TABLE_4", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4", + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/", + "name": "TABLE_10", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10", "description": "Comment for Table", "created": { "time": 1623110400000 @@ -1218,66 +1482,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1293,7 +1498,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { @@ -1457,7 +1662,39 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { @@ -1484,12 +1721,41 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_2/", + "name": "TABLE_2", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_2", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "typeNames": [ + "Table" + ] } }, "systemMetadata": { @@ -1500,7 +1766,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1518,19 +1784,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } @@ -1543,12 +1813,25 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "browsePathsV2", "aspect": { "json": { - "removed": false + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] } }, "systemMetadata": { @@ -1559,12 +1842,76 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_5,PROD)", "changeType": "UPSERT", - "aspectName": "schemaMetadata", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_6", + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.table_10", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -1723,93 +2070,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/", - "name": "TABLE_6", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -1825,12 +2086,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_7", + "schemaName": "test_db.test_schema.table_6", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -1989,15 +2250,48 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/", - "name": "TABLE_7", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7", + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_6/", + "name": "TABLE_6", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_6", "description": "Comment for Table", "created": { "time": 1623110400000 @@ -2016,12 +2310,29 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_10,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { @@ -2032,7 +2343,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2050,19 +2361,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } @@ -2075,7 +2390,40 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_6,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2091,12 +2439,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_8", + "schemaName": "test_db.test_schema.table_4", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -2255,15 +2603,15 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", "aspectName": "datasetProperties", "aspect": { "json": { "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/", - "name": "TABLE_8", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8", + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_7/", + "name": "TABLE_7", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_7", "description": "Comment for Table", "created": { "time": 1623110400000 @@ -2282,12 +2630,39 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_4/", + "name": "TABLE_4", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_4", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { @@ -2298,7 +2673,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -2316,19 +2691,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" }, { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } @@ -2341,12 +2720,43 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "subTypes", "aspect": { "json": { - "removed": false + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + ] } }, "systemMetadata": { @@ -2357,12 +2767,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_9", + "schemaName": "test_db.test_schema.table_7", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -2521,93 +2931,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/", - "name": "TABLE_9", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Table" - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1654621200000, - "runId": "snowflake-2022_06_07-17_00_00", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2623,12 +2947,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", "aspectName": "schemaMetadata", "aspect": { "json": { - "schemaName": "test_db.test_schema.table_10", + "schemaName": "test_db.test_schema.table_8", "platform": "urn:li:dataPlatform:snowflake", "version": 0, "created": { @@ -2787,23 +3111,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_4,PROD)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": {}, - "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_10/", - "name": "TABLE_10", - "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_10", - "description": "Comment for Table", - "created": { - "time": 1623110400000 - }, - "lastModified": { - "time": 1623110400000 - }, - "tags": [] + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { @@ -2814,12 +3128,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { @@ -2830,14 +3144,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "datasetProperties", "aspect": { "json": { - "typeNames": [ - "Table" - ] + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_8/", + "name": "TABLE_8", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_8", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { @@ -2848,21 +3171,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_7,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:5e359958be02ce647cd9ac196dbd4585", - "urn": "urn:li:container:5e359958be02ce647cd9ac196dbd4585" - }, - { - "id": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c", - "urn": "urn:li:container:94c696a054bab40b73e640a7f82e3b1c" - } - ] + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } }, "systemMetadata": { @@ -2873,21 +3187,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_1,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { @@ -2898,20 +3204,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -2923,19 +3222,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_3,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } @@ -2948,21 +3251,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_4,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] + "removed": false } }, "systemMetadata": { @@ -2973,21 +3267,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_5,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_8,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" } }, "systemMetadata": { @@ -2998,21 +3284,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_6,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "datasetProperties", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } - ] + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/table/TABLE_9/", + "name": "TABLE_9", + "qualifiedName": "TEST_DB.TEST_SCHEMA.TABLE_9", + "description": "Comment for Table", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] } }, "systemMetadata": { @@ -3023,20 +3311,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_7,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "subTypes", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } + "typeNames": [ + "Table" ] } }, @@ -3048,19 +3329,23 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_8,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "browsePathsV2", "aspect": { "json": { - "upstreams": [ + "path": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" } ] } @@ -3073,20 +3358,300 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_9,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "schemaMetadata", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" - } + "schemaName": "test_db.test_schema.table_9", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_9,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "create view view_2 as select * from table_2", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:snowflake", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "externalUrl": "https://app.abc12345.ap-south-1.privatelink.snowflakecomputing.com/#/data/databases/TEST_DB/schemas/TEST_SCHEMA/view/VIEW_2/", + "name": "VIEW_2", + "qualifiedName": "TEST_DB.TEST_SCHEMA.VIEW_2", + "description": "Comment for View", + "created": { + "time": 1623110400000 + }, + "lastModified": { + "time": 1623110400000 + }, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:snowflake,instance1)" + }, + { + "id": "urn:li:container:900b1327253068cb1537b1b3c807ddab", + "urn": "urn:li:container:900b1327253068cb1537b1b3c807ddab" + }, + { + "id": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f", + "urn": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } ] } }, @@ -3098,7 +3663,187 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_10,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "test_db.test_schema.view_2", + "platform": "urn:li:dataPlatform:snowflake", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "col_1", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "NUMBER(38,0)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_2", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_3", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_4", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_5", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_6", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_7", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_8", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_9", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "col_10", + "nullable": false, + "description": "Comment for column", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255)", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:eac598ee71ef1b5e24448d650c08aa5f" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "snowflake-2022_06_07-17_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD)", "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { @@ -3109,8 +3854,120 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_db.test_schema.table_2,PROD)", - "type": "TRANSFORMED" + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_1)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_1)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_10)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_10)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_2)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_2)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_3)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_3)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_4)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_4)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_5)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_5)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_6)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_6)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_7)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_7)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_8)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_8)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.table_2,PROD),col_9)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,instance1.test_db.test_schema.view_2,PROD),col_9)" + ], + "confidenceScore": 1.0 } ] } diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py index 3dafe85ef950a..4c00e48ede9fb 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py @@ -211,11 +211,12 @@ def test_snowflake_private_link(pytestconfig, tmp_path, mock_time, mock_datahub_ include_technical_schema=True, include_table_lineage=True, include_column_lineage=False, - include_views=False, - include_view_lineage=False, + include_views=True, + include_view_lineage=True, include_usage_stats=False, incremental_lineage=False, include_operational_stats=False, + platform_instance="instance1", start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace( tzinfo=timezone.utc ), From 4d2c009d400406b3cc41767864b07e9933dfe841 Mon Sep 17 00:00:00 2001 From: kushagra-apptware <81357546+kushagra-apptware@users.noreply.github.com> Date: Sat, 28 Oct 2023 01:32:11 +0530 Subject: [PATCH 46/80] feat: Add flag to hide/display the autocomplete query for search bar (#9104) Co-authored-by: John Joyce --- datahub-web-react/src/app/home/HomePageHeader.tsx | 1 + datahub-web-react/src/app/search/SearchBar.tsx | 6 ++++-- datahub-web-react/src/app/search/SearchHeader.tsx | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/datahub-web-react/src/app/home/HomePageHeader.tsx b/datahub-web-react/src/app/home/HomePageHeader.tsx index 5919d2dbf5b7e..e5c01252a865b 100644 --- a/datahub-web-react/src/app/home/HomePageHeader.tsx +++ b/datahub-web-react/src/app/home/HomePageHeader.tsx @@ -275,6 +275,7 @@ export const HomePageHeader = () => { viewsEnabled={viewsEnabled} combineSiblings showQuickFilters + showViewAllResults /> {searchResultsToShow && searchResultsToShow.length > 0 && ( diff --git a/datahub-web-react/src/app/search/SearchBar.tsx b/datahub-web-react/src/app/search/SearchBar.tsx index b4699994bc460..5f797e68fe0e8 100644 --- a/datahub-web-react/src/app/search/SearchBar.tsx +++ b/datahub-web-react/src/app/search/SearchBar.tsx @@ -119,6 +119,7 @@ interface Props { setIsSearchBarFocused?: (isSearchBarFocused: boolean) => void; onFocus?: () => void; onBlur?: () => void; + showViewAllResults?: boolean; } const defaultProps = { @@ -146,6 +147,7 @@ export const SearchBar = ({ setIsSearchBarFocused, onFocus, onBlur, + showViewAllResults = false, }: Props) => { const history = useHistory(); const [searchQuery, setSearchQuery] = useState(initialQuery); @@ -203,7 +205,7 @@ export const SearchBar = ({ const { quickFilters, selectedQuickFilter, setSelectedQuickFilter } = useQuickFiltersContext(); const autoCompleteQueryOptions = useMemo(() => { - if (effectiveQuery === '') return []; + if (effectiveQuery === '' || !showViewAllResults) return []; return [ { @@ -212,7 +214,7 @@ export const SearchBar = ({ type: EXACT_AUTOCOMPLETE_OPTION_TYPE, }, ]; - }, [effectiveQuery]); + }, [effectiveQuery, showViewAllResults]); const autoCompleteEntityOptions = useMemo(() => { return suggestions.map((suggestion: AutoCompleteResultForEntity) => { diff --git a/datahub-web-react/src/app/search/SearchHeader.tsx b/datahub-web-react/src/app/search/SearchHeader.tsx index 74bc562e275d1..91f9753a3d601 100644 --- a/datahub-web-react/src/app/search/SearchHeader.tsx +++ b/datahub-web-react/src/app/search/SearchHeader.tsx @@ -107,6 +107,7 @@ export const SearchHeader = ({ combineSiblings fixAutoComplete showQuickFilters + showViewAllResults /> From aceff13ebb2d6758a5e42b592f4b5eb7d5af29e3 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Sat, 28 Oct 2023 04:55:57 +0530 Subject: [PATCH 47/80] docs(timeline): correct markdown heading level (#9126) --- docs/dev-guides/timeline.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/dev-guides/timeline.md b/docs/dev-guides/timeline.md index 829aef1d3eefa..6a8e158d40ebf 100644 --- a/docs/dev-guides/timeline.md +++ b/docs/dev-guides/timeline.md @@ -228,7 +228,7 @@ http://localhost:8080/openapi/timeline/v1/urn%3Ali%3Adataset%3A%28urn%3Ali%3Adat REMOVE GLOSSARY_TERM dataset:hive:testTimelineDataset (urn:li:glossaryTerm:SavingsAccount): The GlossaryTerm 'SavingsAccount' for the entity 'urn:li:dataset:(urn:li:dataPlatform:hive,testTimelineDataset,PROD)' has been removed. ``` -# Explore the API +## Explore the API The API is browse-able via the UI through through the dropdown. Here are a few screenshots showing how to navigate to it. You can try out the API and send example requests. @@ -243,7 +243,7 @@ Here are a few screenshots showing how to navigate to it. You can try out the AP

-# Future Work +## Future Work - Supporting versions as start and end parameters as part of the call to the timeline API - Supporting entities beyond Datasets From 9ae0e93d82eac2040af2c3d23d52878e57e19df1 Mon Sep 17 00:00:00 2001 From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com> Date: Fri, 27 Oct 2023 20:18:31 -0700 Subject: [PATCH 48/80] docs(graphql): Correct mutation -> query for searchAcrossLineage examples (#9134) --- docs/api/tutorials/lineage.md | 8 ++------ metadata-ingestion/examples/library/read_lineage_rest.py | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/docs/api/tutorials/lineage.md b/docs/api/tutorials/lineage.md index 4baad09099d07..13ec716b7870b 100644 --- a/docs/api/tutorials/lineage.md +++ b/docs/api/tutorials/lineage.md @@ -113,12 +113,10 @@ Expected Response: You can now see the lineage between `fct_users_deleted` and `logging_events`. -

- ## Add Column-level Lineage @@ -135,12 +133,10 @@ You can now see the lineage between `fct_users_deleted` and `logging_events`. You can now see the column-level lineage between datasets. Note that you have to enable `Show Columns` to be able to see the column-level lineage. -

- ## Read Lineage @@ -180,7 +176,7 @@ query searchAcrossLineage { } ``` -This example shows using lineage degrees as a filter, but additional search filters can be included here as well. +This example shows using lineage degrees as a filter, but additional search filters can be included here as well.
@@ -188,7 +184,7 @@ This example shows using lineage degrees as a filter, but additional search filt ```shell curl --location --request POST 'http://localhost:8080/api/graphql' \ --header 'Authorization: Bearer ' \ ---header 'Content-Type: application/json' --data-raw '{ { "query": "mutation searchAcrossLineage { searchAcrossLineage( input: { query: \"*\" urn: \"urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.adoption.human_profiles,PROD)\" start: 0 count: 10 direction: DOWNSTREAM orFilters: [ { and: [ { condition: EQUAL negated: false field: \"degree\" values: [\"1\", \"2\", \"3+\"] } ] } ] } ) { searchResults { degree entity { urn type } } }}" +--header 'Content-Type: application/json' --data-raw '{ { "query": "query searchAcrossLineage { searchAcrossLineage( input: { query: \"*\" urn: \"urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.adoption.human_profiles,PROD)\" start: 0 count: 10 direction: DOWNSTREAM orFilters: [ { and: [ { condition: EQUAL negated: false field: \"degree\" values: [\"1\", \"2\", \"3+\"] } ] } ] } ) { searchResults { degree entity { urn type } } }}" }}' ``` diff --git a/metadata-ingestion/examples/library/read_lineage_rest.py b/metadata-ingestion/examples/library/read_lineage_rest.py index 34437ed86280d..bd9b4e8651dba 100644 --- a/metadata-ingestion/examples/library/read_lineage_rest.py +++ b/metadata-ingestion/examples/library/read_lineage_rest.py @@ -6,7 +6,7 @@ # Query multiple aspects from entity query = """ -mutation searchAcrossLineage { +query searchAcrossLineage { searchAcrossLineage( input: { query: "*" From 3f4ab44a91bff734e0a0437622d7579410875ec5 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Sun, 29 Oct 2023 16:26:05 -0500 Subject: [PATCH 49/80] feat(kafka): increase kafka message size and enable compression (#9038) Co-authored-by: Pedro Silva --- .github/workflows/docker-unified.yml | 10 +++- build.gradle | 2 +- .../app/client/KafkaTrackingProducer.java | 10 ++-- .../app/config/ConfigurationProvider.java | 6 ++- docker/broker/env/docker.env | 4 +- docker/datahub-frontend/Dockerfile | 4 +- docker/datahub-gms/Dockerfile | 4 +- docker/datahub-mae-consumer/Dockerfile | 4 +- docker/datahub-mce-consumer/Dockerfile | 4 +- docker/datahub-upgrade/Dockerfile | 4 +- docker/kafka-setup/kafka-config.sh | 2 + docker/kafka-setup/kafka-setup.sh | 46 ++++++++++++------- docker/kafka-setup/kafka-topic-workers.sh | 10 +++- .../docker-compose-m1.quickstart.yml | 2 + ...er-compose-without-neo4j-m1.quickstart.yml | 2 + ...ocker-compose-without-neo4j.quickstart.yml | 2 + .../quickstart/docker-compose.quickstart.yml | 2 + docs/deploy/environment-vars.md | 22 +++++---- .../config/kafka/ConsumerConfiguration.java | 10 ++++ .../config/kafka/KafkaConfiguration.java | 2 + .../config/kafka/ProducerConfiguration.java | 4 ++ .../src/main/resources/application.yml | 4 ++ .../kafka/DataHubKafkaProducerFactory.java | 2 + .../kafka/KafkaEventConsumerFactory.java | 4 ++ .../kafka/SimpleKafkaConsumerFactory.java | 9 +++- 25 files changed, 135 insertions(+), 40 deletions(-) create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 8666a5e2e2171..5f5a62de6288c 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -851,8 +851,14 @@ jobs: if: failure() run: | docker ps -a - docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log - docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log + docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log || true + docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log || true + docker logs datahub-mae-consumer >& mae-${{ matrix.test_strategy }}.log || true + docker logs datahub-mce-consumer >& mce-${{ matrix.test_strategy }}.log || true + docker logs broker >& broker-${{ matrix.test_strategy }}.log || true + docker logs mysql >& mysql-${{ matrix.test_strategy }}.log || true + docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true + docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true - name: Upload logs uses: actions/upload-artifact@v3 if: failure() diff --git a/build.gradle b/build.gradle index cf55a59cfe694..bd282535fa13c 100644 --- a/build.gradle +++ b/build.gradle @@ -39,7 +39,7 @@ buildscript { plugins { id 'com.gorylenko.gradle-git-properties' version '2.4.0-rc2' id 'com.github.johnrengelman.shadow' version '6.1.0' - id 'com.palantir.docker' version '0.35.0' + id 'com.palantir.docker' version '0.35.0' apply false // https://blog.ltgt.net/javax-jakarta-mess-and-gradle-solution/ // TODO id "org.gradlex.java-ecosystem-capabilities" version "1.0" } diff --git a/datahub-frontend/app/client/KafkaTrackingProducer.java b/datahub-frontend/app/client/KafkaTrackingProducer.java index fab17f9215d4a..59e91a6d5a0f7 100644 --- a/datahub-frontend/app/client/KafkaTrackingProducer.java +++ b/datahub-frontend/app/client/KafkaTrackingProducer.java @@ -1,6 +1,8 @@ package client; +import com.linkedin.metadata.config.kafka.ProducerConfiguration; import com.typesafe.config.Config; +import config.ConfigurationProvider; import org.apache.kafka.clients.CommonClientConfigs; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerConfig; @@ -35,12 +37,12 @@ public class KafkaTrackingProducer { private final KafkaProducer _producer; @Inject - public KafkaTrackingProducer(@Nonnull Config config, ApplicationLifecycle lifecycle) { + public KafkaTrackingProducer(@Nonnull Config config, ApplicationLifecycle lifecycle, final ConfigurationProvider configurationProvider) { _isEnabled = !config.hasPath("analytics.enabled") || config.getBoolean("analytics.enabled"); if (_isEnabled) { _logger.debug("Analytics tracking is enabled"); - _producer = createKafkaProducer(config); + _producer = createKafkaProducer(config, configurationProvider.getKafka().getProducer()); lifecycle.addStopHook( () -> { @@ -62,13 +64,15 @@ public void send(ProducerRecord record) { _producer.send(record); } - private static KafkaProducer createKafkaProducer(Config config) { + private static KafkaProducer createKafkaProducer(Config config, ProducerConfiguration producerConfiguration) { final Properties props = new Properties(); props.put(ProducerConfig.CLIENT_ID_CONFIG, "datahub-frontend"); props.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, config.getString("analytics.kafka.delivery.timeout.ms")); props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getString("analytics.kafka.bootstrap.server")); props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer"); // Actor urn. props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer"); // JSON object. + props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, producerConfiguration.getMaxRequestSize()); + props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, producerConfiguration.getCompressionType()); final String securityProtocolConfig = "analytics.kafka.security.protocol"; if (config.hasPath(securityProtocolConfig) diff --git a/datahub-frontend/app/config/ConfigurationProvider.java b/datahub-frontend/app/config/ConfigurationProvider.java index 00a5472ec3476..8f526c831b5c9 100644 --- a/datahub-frontend/app/config/ConfigurationProvider.java +++ b/datahub-frontend/app/config/ConfigurationProvider.java @@ -1,6 +1,7 @@ package config; import com.linkedin.metadata.config.cache.CacheConfiguration; +import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.spring.YamlPropertySourceFactory; import lombok.Data; @@ -11,7 +12,6 @@ /** * Minimal sharing between metadata-service and frontend - * Initially for use of client caching configuration. * Does not use the factories module to avoid transitive dependencies. */ @EnableConfigurationProperties @@ -19,6 +19,10 @@ @ConfigurationProperties @Data public class ConfigurationProvider { + /** + * Kafka related configs. + */ + private KafkaConfiguration kafka; /** * Configuration for caching diff --git a/docker/broker/env/docker.env b/docker/broker/env/docker.env index 18115697c2832..6eb958609daf1 100644 --- a/docker/broker/env/docker.env +++ b/docker/broker/env/docker.env @@ -5,4 +5,6 @@ KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 KAFKA_HEAP_OPTS=-Xms256m -Xmx256m -KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false \ No newline at end of file +KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false +KAFKA_MESSAGE_MAX_BYTES=5242880 +KAFKA_MAX_MESSAGE_BYTES=5242880 \ No newline at end of file diff --git a/docker/datahub-frontend/Dockerfile b/docker/datahub-frontend/Dockerfile index 9efc0d2ce8753..9c13e73078042 100644 --- a/docker/datahub-frontend/Dockerfile +++ b/docker/datahub-frontend/Dockerfile @@ -8,10 +8,12 @@ RUN addgroup -S datahub && adduser -S datahub -G datahub # Upgrade Alpine and base packages # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add curl sqlite \ + && apk --no-cache add curl sqlite libc6-compat java-snappy \ && apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ && apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ +ENV LD_LIBRARY_PATH="/lib:/lib64" + FROM base as prod-install COPY ./datahub-frontend.zip / diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index f5428f7480403..e271188a703cc 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -18,7 +18,7 @@ FROM alpine:3 AS base ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add curl bash coreutils gcompat sqlite \ + && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \ && apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ && apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \ && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \ @@ -29,6 +29,8 @@ RUN apk --no-cache --update-cache --available upgrade \ && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin +ENV LD_LIBRARY_PATH="/lib:/lib64" + FROM base as prod-install COPY war.war /datahub/datahub-gms/bin/war.war COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index 4b321b1639c1b..ec3da4de71d15 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -18,7 +18,7 @@ FROM alpine:3 AS base ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add curl bash coreutils sqlite \ + && apk --no-cache add curl bash coreutils sqlite libc6-compat java-snappy \ && apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ && apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \ && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ @@ -26,6 +26,8 @@ RUN apk --no-cache --update-cache --available upgrade \ && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin +ENV LD_LIBRARY_PATH="/lib:/lib64" + FROM base as prod-install COPY mae-consumer-job.jar /datahub/datahub-mae-consumer/bin/ COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mae-consumer/resources/entity-registry.yml diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index 4d38ee6daa235..f9c47f77a98f5 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -18,7 +18,7 @@ FROM alpine:3 AS base ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add curl bash sqlite \ + && apk --no-cache add curl bash sqlite libc6-compat java-snappy \ && apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ && apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \ && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ @@ -33,6 +33,8 @@ COPY docker/datahub-mce-consumer/start.sh /datahub/datahub-mce-consumer/scripts/ COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-mce-consumer/scripts/prometheus-config.yaml RUN chmod +x /datahub/datahub-mce-consumer/scripts/start.sh +ENV LD_LIBRARY_PATH="/lib:/lib64" + FROM base as dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. # See this excellent thread https://github.com/docker/cli/issues/1134 diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index 945be54678a24..f08e7268e4018 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -18,7 +18,7 @@ FROM alpine:3 AS base ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ - && apk --no-cache add curl bash coreutils gcompat sqlite \ + && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \ && apk --no-cache add openjdk11-jre --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \ && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \ @@ -28,6 +28,8 @@ RUN apk --no-cache --update-cache --available upgrade \ && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin +ENV LD_LIBRARY_PATH="/lib:/lib64" + FROM base as prod-install COPY datahub-upgrade.jar /datahub/datahub-upgrade/bin/ COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml diff --git a/docker/kafka-setup/kafka-config.sh b/docker/kafka-setup/kafka-config.sh index 2ba8e2d7c5d47..4d5698ccc3856 100644 --- a/docker/kafka-setup/kafka-config.sh +++ b/docker/kafka-setup/kafka-config.sh @@ -2,6 +2,7 @@ : ${PARTITIONS:=1} : ${REPLICATION_FACTOR:=1} +: ${MAX_MESSAGE_BYTES:=5242880} : ${KAFKA_PROPERTIES_SECURITY_PROTOCOL:=PLAINTEXT} @@ -12,3 +13,4 @@ export KAFKA_HEAP_OPTS="-Xmx64M" CONNECTION_PROPERTIES_PATH=/tmp/connection.properties WORKERS=4 +DELIMITER=";" diff --git a/docker/kafka-setup/kafka-setup.sh b/docker/kafka-setup/kafka-setup.sh index b5024e49e59f1..439ffb4d4d829 100755 --- a/docker/kafka-setup/kafka-setup.sh +++ b/docker/kafka-setup/kafka-setup.sh @@ -102,24 +102,43 @@ exec 4<&- send() { work_id=$1 topic_args=$2 - echo sending $work_id $topic_args - echo "$work_id" "$topic_args" 1>&3 ## the fifo is fd 3 + topic_config=$3 + + echo -e "sending $work_id\n worker_args: ${topic_args}${DELIMITER}${topic_config}" + echo "$work_id" "${topic_args}${DELIMITER}${topic_config}" 1>&3 ## the fifo is fd 3 } ## Produce the jobs to run. -send "$METADATA_AUDIT_EVENT_NAME" "--partitions $PARTITIONS --topic $METADATA_AUDIT_EVENT_NAME" -send "$METADATA_CHANGE_EVENT_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_EVENT_NAME" -send "$FAILED_METADATA_CHANGE_EVENT_NAME" "--partitions $PARTITIONS --topic $FAILED_METADATA_CHANGE_EVENT_NAME" -send "$METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME" +send "$METADATA_AUDIT_EVENT_NAME" "--partitions $PARTITIONS --topic $METADATA_AUDIT_EVENT_NAME" \ + "--entity-type topics --entity-name $METADATA_AUDIT_EVENT_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES" + +send "$METADATA_CHANGE_EVENT_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_EVENT_NAME" \ + "--entity-type topics --entity-name $METADATA_CHANGE_EVENT_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES" +send "$FAILED_METADATA_CHANGE_EVENT_NAME" "--partitions $PARTITIONS --topic $FAILED_METADATA_CHANGE_EVENT_NAME" \ + "--entity-type topics --entity-name $FAILED_METADATA_CHANGE_EVENT_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES" + +send "$METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME" \ + "--entity-type topics --entity-name $METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES" # Set retention to 90 days -send "$METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME" "--partitions $PARTITIONS --config retention.ms=7776000000 --topic $METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME" -send "$METADATA_CHANGE_PROPOSAL_TOPIC_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_PROPOSAL_TOPIC_NAME" -send "$FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME" "--partitions $PARTITIONS --topic $FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME" -send "$PLATFORM_EVENT_TOPIC_NAME" "--partitions $PARTITIONS --topic $PLATFORM_EVENT_TOPIC_NAME" +send "$METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME" "--partitions $PARTITIONS --config retention.ms=7776000000 --topic $METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME" \ + "--entity-type topics --entity-name $METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES" + +send "$METADATA_CHANGE_PROPOSAL_TOPIC_NAME" "--partitions $PARTITIONS --topic $METADATA_CHANGE_PROPOSAL_TOPIC_NAME" \ + "--entity-type topics --entity-name $METADATA_CHANGE_PROPOSAL_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES" +send "$FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME" "--partitions $PARTITIONS --topic $FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME" \ + "--entity-type topics --entity-name $FAILED_METADATA_CHANGE_PROPOSAL_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES" + +send "$PLATFORM_EVENT_TOPIC_NAME" "--partitions $PARTITIONS --topic $PLATFORM_EVENT_TOPIC_NAME" \ + "--entity-type topics --entity-name $PLATFORM_EVENT_TOPIC_NAME --alter --add-config max.message.bytes=$MAX_MESSAGE_BYTES" # Infinite retention upgrade topic -send "$DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" "--partitions 1 --config retention.ms=-1 --topic $DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" + # Make sure the retention.ms config for $DATAHUB_UPGRADE_HISTORY_TOPIC_NAME is configured to infinite + # Please see the bug report below for details + # https://github.com/datahub-project/datahub/issues/7882 +send "$DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" "--partitions 1 --config retention.ms=-1 --topic $DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" \ + "--entity-type topics --entity-name "$DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" --alter --add-config retention.ms=-1" + # Create topic for datahub usage event if [[ $DATAHUB_ANALYTICS_ENABLED == true ]]; then send "$DATAHUB_USAGE_EVENT_NAME" "--partitions $PARTITIONS --topic $DATAHUB_USAGE_EVENT_NAME" @@ -150,8 +169,3 @@ if [[ $USE_CONFLUENT_SCHEMA_REGISTRY == "TRUE" ]]; then --entity-name _schemas \ --alter --add-config cleanup.policy=compact fi - -# Make sure the retention.ms config for $DATAHUB_UPGRADE_HISTORY_TOPIC_NAME is configured to infinite -# Please see the bug report below for details -# https://github.com/datahub-project/datahub/issues/7882 -kafka-configs.sh --command-config $CONNECTION_PROPERTIES_PATH --bootstrap-server $KAFKA_BOOTSTRAP_SERVER --entity-type topics --entity-name "$DATAHUB_UPGRADE_HISTORY_TOPIC_NAME" --alter --add-config retention.ms=-1 diff --git a/docker/kafka-setup/kafka-topic-workers.sh b/docker/kafka-setup/kafka-topic-workers.sh index fd0d45c3f4611..3ddf41abbabf5 100644 --- a/docker/kafka-setup/kafka-topic-workers.sh +++ b/docker/kafka-setup/kafka-topic-workers.sh @@ -11,10 +11,18 @@ START_LOCK=$4 ## the queue workers are supposed to be doing job() { i=$1 - topic_args=$2 + worker_args=$2 + topic_args=$(echo $worker_args | cut -d "$DELIMITER" -f 1) + topic_config=$(echo $worker_args | cut -d "$DELIMITER" -f 2) + + echo " $i: kafka-topics.sh --create --if-not-exist $topic_args" kafka-topics.sh --create --if-not-exists --command-config $CONNECTION_PROPERTIES_PATH --bootstrap-server $KAFKA_BOOTSTRAP_SERVER \ --replication-factor $REPLICATION_FACTOR \ $topic_args + if [[ ! -z "$topic_config" ]]; then + echo " $i: kafka-configs.sh $topic_config" + kafka-configs.sh --command-config $CONNECTION_PROPERTIES_PATH --bootstrap-server $KAFKA_BOOTSTRAP_SERVER $topic_config + fi } ## This is the worker to read from the queue. diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 89e9aaa0defd6..c5de687d335b9 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -16,6 +16,8 @@ services: - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false + - KAFKA_MESSAGE_MAX_BYTES=5242880 + - KAFKA_MAX_MESSAGE_BYTES=5242880 healthcheck: interval: 1s retries: 5 diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index f6284edc83648..b6935f24c5ce2 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -16,6 +16,8 @@ services: - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false + - KAFKA_MESSAGE_MAX_BYTES=5242880 + - KAFKA_MAX_MESSAGE_BYTES=5242880 healthcheck: interval: 1s retries: 5 diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 4e3503e35c0db..4ff8bbd70da85 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -16,6 +16,8 @@ services: - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false + - KAFKA_MESSAGE_MAX_BYTES=5242880 + - KAFKA_MAX_MESSAGE_BYTES=5242880 healthcheck: interval: 1s retries: 5 diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index e2f52064389e0..f2950ebab2c9d 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -16,6 +16,8 @@ services: - KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0 - KAFKA_HEAP_OPTS=-Xms256m -Xmx256m - KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false + - KAFKA_MESSAGE_MAX_BYTES=5242880 + - KAFKA_MAX_MESSAGE_BYTES=5242880 healthcheck: interval: 1s retries: 5 diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index 779c3d3d7c432..4c7b249349ca0 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -67,15 +67,19 @@ In general, there are **lots** of Kafka configuration environment variables for These environment variables follow the standard Spring representation of properties as environment variables. Simply replace the dot, `.`, with an underscore, `_`, and convert to uppercase. -| Variable | Default | Unit/Type | Components | Description | -|-----------------------------------------------------|----------------------------------------------|-----------|-----------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `KAFKA_LISTENER_CONCURRENCY` | 1 | integer | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Number of Kafka consumer threads. Optimize throughput by matching to topic partitions. | -| `SPRING_KAFKA_PRODUCER_PROPERTIES_MAX_REQUEST_SIZE` | 1048576 | bytes | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Max produced message size. Note that the topic configuration is not controlled by this variable. | -| `SCHEMA_REGISTRY_TYPE` | `INTERNAL` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Schema registry implementation. One of `INTERNAL` or `KAFKA` or `AWS_GLUE` | -| `KAFKA_SCHEMAREGISTRY_URL` | `http://localhost:8080/schema-registry/api/` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Schema registry url. Used for `INTERNAL` and `KAFKA`. The default value is for the `GMS` component. The `MCE Consumer` and `MAE Consumer` should be the `GMS` hostname and port. | -| `AWS_GLUE_SCHEMA_REGISTRY_REGION` | `us-east-1` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry implementation. | -| `AWS_GLUE_SCHEMA_REGISTRY_NAME` | `` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry. | -| `USE_CONFLUENT_SCHEMA_REGISTRY` | `true` | boolean | [`kafka-setup`] | Enable Confluent schema registry configuration. | +| Variable | Default | Unit/Type | Components | Description | +|-----------------------------------------------------|----------------------------------------------|-----------|--------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `KAFKA_LISTENER_CONCURRENCY` | 1 | integer | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Number of Kafka consumer threads. Optimize throughput by matching to topic partitions. | +| `SPRING_KAFKA_PRODUCER_PROPERTIES_MAX_REQUEST_SIZE` | 1048576 | bytes | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Max produced message size. Note that the topic configuration is not controlled by this variable. | +| `SCHEMA_REGISTRY_TYPE` | `INTERNAL` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Schema registry implementation. One of `INTERNAL` or `KAFKA` or `AWS_GLUE` | +| `KAFKA_SCHEMAREGISTRY_URL` | `http://localhost:8080/schema-registry/api/` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Schema registry url. Used for `INTERNAL` and `KAFKA`. The default value is for the `GMS` component. The `MCE Consumer` and `MAE Consumer` should be the `GMS` hostname and port. | +| `AWS_GLUE_SCHEMA_REGISTRY_REGION` | `us-east-1` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry implementation. | +| `AWS_GLUE_SCHEMA_REGISTRY_NAME` | `` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry. | +| `USE_CONFLUENT_SCHEMA_REGISTRY` | `true` | boolean | [`kafka-setup`] | Enable Confluent schema registry configuration. | +| `KAFKA_PRODUCER_MAX_REQUEST_SIZE` | `5242880` | integer | [`Frontend`, `GMS`, `MCE Consumer`, `MAE Consumer`] | Max produced message size. Note that the topic configuration is not controlled by this variable. | +| `KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES` | `5242880` | integer | [`GMS`, `MCE Consumer`, `MAE Consumer`] | The maximum amount of data per-partition the server will return. Records are fetched in batches by the consumer. If the first record batch in the first non-empty partition of the fetch is larger than this limit, the batch will still be returned to ensure that the consumer can make progress. | +| `MAX_MESSAGE_BYTES` | `5242880` | integer | [`kafka-setup`] | Sets the max message size on the kakfa topics. | +| `KAFKA_PRODUCER_COMPRESSION_TYPE` | `snappy` | string | [`Frontend`, `GMS`, `MCE Consumer`, `MAE Consumer`] | The compression used by the producer. | ## Frontend diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java new file mode 100644 index 0000000000000..7a93119226a2d --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ConsumerConfiguration.java @@ -0,0 +1,10 @@ +package com.linkedin.metadata.config.kafka; + +import lombok.Data; + + +@Data +public class ConsumerConfiguration { + + private int maxPartitionFetchBytes; +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java index 2966abfc63396..2345f88352c17 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java @@ -12,4 +12,6 @@ public class KafkaConfiguration { private SchemaRegistryConfiguration schemaRegistry; private ProducerConfiguration producer; + + private ConsumerConfiguration consumer; } diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java index 2bf4cea3f0c18..26a8c6b649133 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java @@ -13,4 +13,8 @@ public class ProducerConfiguration { private int requestTimeout; private int backoffTimeout; + + private String compressionType; + + private int maxRequestSize; } diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index 5d72e24748072..b817208672e08 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -228,6 +228,10 @@ kafka: deliveryTimeout: ${KAFKA_PRODUCER_DELIVERY_TIMEOUT:30000} requestTimeout: ${KAFKA_PRODUCER_REQUEST_TIMEOUT:3000} backoffTimeout: ${KAFKA_PRODUCER_BACKOFF_TIMEOUT:500} + compressionType: ${KAFKA_PRODUCER_COMPRESSION_TYPE:snappy} # producer's compression algorithm + maxRequestSize: ${KAFKA_PRODUCER_MAX_REQUEST_SIZE:5242880} # the max bytes sent by the producer, also see kafka-setup MAX_MESSAGE_BYTES for matching value + consumer: + maxPartitionFetchBytes: ${KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES:5242880} # the max bytes consumed per partition schemaRegistry: type: ${SCHEMA_REGISTRY_TYPE:KAFKA} # INTERNAL or KAFKA or AWS_GLUE url: ${KAFKA_SCHEMAREGISTRY_URL:http://localhost:8081} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java index c67a2e704681f..78b3de501e0e5 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java @@ -59,6 +59,8 @@ public static Map buildProducerProperties(SchemaRegistryConfig s props.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, kafkaConfiguration.getProducer().getDeliveryTimeout()); props.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, kafkaConfiguration.getProducer().getRequestTimeout()); props.put(ProducerConfig.RETRY_BACKOFF_MS_CONFIG, kafkaConfiguration.getProducer().getBackoffTimeout()); + props.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, kafkaConfiguration.getProducer().getCompressionType()); + props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG, kafkaConfiguration.getProducer().getMaxRequestSize()); // Override KafkaProperties with SchemaRegistryConfig only for non-empty values schemaRegistryConfig.getProperties().entrySet() diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java index ba18be6834d14..7a9e80781d639 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java @@ -70,6 +70,7 @@ private static Map buildCustomizedProperties(KafkaProperties bas consumerProps.setEnableAutoCommit(true); consumerProps.setAutoCommitInterval(Duration.ofSeconds(10)); + // KAFKA_BOOTSTRAP_SERVER has precedence over SPRING_KAFKA_BOOTSTRAP_SERVERS if (kafkaConfiguration.getBootstrapServers() != null && kafkaConfiguration.getBootstrapServers().length() > 0) { consumerProps.setBootstrapServers(Arrays.asList(kafkaConfiguration.getBootstrapServers().split(","))); @@ -84,6 +85,9 @@ private static Map buildCustomizedProperties(KafkaProperties bas .filter(entry -> entry.getValue() != null && !entry.getValue().toString().isEmpty()) .forEach(entry -> customizedProperties.put(entry.getKey(), entry.getValue())); + customizedProperties.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, + kafkaConfiguration.getConsumer().getMaxPartitionFetchBytes()); + return customizedProperties; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java index 05ebfdddf8b80..e12cbec87fe45 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java @@ -4,8 +4,11 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import java.time.Duration; import java.util.Arrays; +import java.util.Map; + import lombok.extern.slf4j.Slf4j; import org.apache.avro.generic.GenericRecord; +import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.common.serialization.StringDeserializer; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.autoconfigure.kafka.KafkaProperties; @@ -40,10 +43,14 @@ protected KafkaListenerContainerFactory createInstance(@Qualifier("configurat consumerProps.setBootstrapServers(Arrays.asList(kafkaConfiguration.getBootstrapServers().split(","))); } // else we rely on KafkaProperties which defaults to localhost:9092 + Map customizedProperties = consumerProps.buildProperties(); + customizedProperties.put(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, + kafkaConfiguration.getConsumer().getMaxPartitionFetchBytes()); + ConcurrentKafkaListenerContainerFactory factory = new ConcurrentKafkaListenerContainerFactory<>(); factory.setContainerCustomizer(new ThreadPoolContainerCustomizer()); - factory.setConsumerFactory(new DefaultKafkaConsumerFactory<>(properties.buildConsumerProperties())); + factory.setConsumerFactory(new DefaultKafkaConsumerFactory<>(customizedProperties)); log.info("Simple KafkaListenerContainerFactory built successfully"); From 758ed47644b330efbbee4e61dde71f6ff5808e23 Mon Sep 17 00:00:00 2001 From: Dmytro Kulyk <34435869+KulykDmytro@users.noreply.github.com> Date: Mon, 30 Oct 2023 06:14:32 +0200 Subject: [PATCH 50/80] feat(ingest/jsonschema) enable schema-aware `JsonSchemaTranslator` (#8971) Co-authored-by: Harshal Sheth --- .../src/datahub/ingestion/extractor/json_schema_util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py index c943b83a887ed..360ddf1129154 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/json_schema_util.py @@ -598,7 +598,8 @@ def get_fields_from_schema( jsonref_schema_dict = schema_dict else: # first validate the schema using a json validator - jsonschema.Draft7Validator.check_schema(schema_dict) + validator = jsonschema.validators.validator_for(schema_dict) + validator.check_schema(schema_dict) # then apply jsonref jsonref_schema_dict = jsonref.loads(schema_string) except Exception as e: From 2c019148ad451752eff582c3206df75c83fe2a63 Mon Sep 17 00:00:00 2001 From: Alex Klavens <123000295+alexklavensnyt@users.noreply.github.com> Date: Mon, 30 Oct 2023 04:43:52 -0400 Subject: [PATCH 51/80] =?UTF-8?q?fix(metadata-ingestion):=20adds=20default?= =?UTF-8?q?=20value=20to=20=5Fresolved=5Fdomain=5Furn=20i=E2=80=A6=20(#911?= =?UTF-8?q?5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alex Klavens --- .../src/datahub/api/entities/dataproduct/dataproduct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 2d9b14ceb2d06..28e4a03b8f75f 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -104,7 +104,7 @@ class DataProduct(ConfigModel): id: str domain: str - _resolved_domain_urn: Optional[str] + _resolved_domain_urn: Optional[str] = None assets: Optional[List[str]] = None display_name: Optional[str] = None owners: Optional[List[Union[str, Ownership]]] = None From f5c8192cca6eacc7e21e62204883854f0f6bcbdb Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 30 Oct 2023 18:32:51 +0530 Subject: [PATCH 52/80] ci: tweak to only run relevant workflows (#9052) --- .github/workflows/airflow-plugin.yml | 2 +- .github/workflows/check-datahub-jars.yml | 12 ++++-------- .github/workflows/documentation.yml | 8 ++++++++ .github/workflows/metadata-ingestion.yml | 2 +- .github/workflows/metadata-model.yml | 5 ++--- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index 54042d104d906..d0c0f52781b9a 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -12,7 +12,7 @@ on: branches: - "**" paths: - - ".github/**" + - ".github/workflows/airflow-plugin.yml" - "metadata-ingestion-modules/airflow-plugin/**" - "metadata-ingestion/**" - "metadata-models/**" diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml index 9a17a70e7f8d4..41f9ea91a94e2 100644 --- a/.github/workflows/check-datahub-jars.yml +++ b/.github/workflows/check-datahub-jars.yml @@ -4,17 +4,13 @@ on: push: branches: - master - paths-ignore: - - "docker/**" - - "docs/**" - - "**.md" + paths: + - "metadata-integration" pull_request: branches: - "**" - paths-ignore: - - "docker/**" - - "docs/**" - - "**.md" + paths: + - "metadata-integration" release: types: [published] diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index ebe2990f3a3cd..c94282938120e 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -4,9 +4,17 @@ on: pull_request: branches: - "**" + paths: + - "metadata-ingestion/**" + - "metadata-models/**" + - "docs-website/**" push: branches: - master + paths: + - "metadata-ingestion/**" + - "metadata-models/**" + - "docs-website/**" # release: # types: [published, edited] diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 699ca330ce0ac..ec6bd4141cc6f 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -11,7 +11,7 @@ on: branches: - "**" paths: - - ".github/**" + - ".github/workflows/metadata-ingestion.yml" - "metadata-ingestion/**" - "metadata-models/**" release: diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index 9d54c88eee591..4bae5ccc9a266 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -3,9 +3,8 @@ on: push: branches: - master - paths-ignore: - - "docs/**" - - "**.md" + paths: + - "metadata-models/**" release: types: [published] From 9c72bd9ed7f1c2b4228fc656c5f8b6f31bf0d431 Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Mon, 30 Oct 2023 14:12:07 -0400 Subject: [PATCH 53/80] fix(test): Fix for flaky download_lineage_results cypress test (#9132) --- .../cypress/cypress/e2e/lineage/download_lineage_results.js | 3 +++ .../tests/cypress/cypress/e2e/mutations/dataset_ownership.js | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js index 315aa7b22b9da..dc6efc9f7df66 100644 --- a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js @@ -27,6 +27,9 @@ const downloadCsvFile = (filename) => { }; describe("download lineage results to .csv file", () => { + beforeEach(() => { + cy.on('uncaught:exception', (err, runnable) => { return false; }); + }); it("download and verify lineage results for 1st, 2nd and 3+ degree of dependencies", () => { cy.loginWithCredentials(); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js index 99ad9a68d35e1..465d7998b9f9a 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js @@ -29,6 +29,10 @@ const addOwner = (owner, type, elementId) => { } describe("add, remove ownership for dataset", () => { + beforeEach(() => { + cy.on('uncaught:exception', (err, runnable) => { return false; }); + }); + it("create test user and test group, add user to a group", () => { cy.loginWithCredentials(); cy.createUser(username, password, email); From 300cea373d6a94f05cf3bd95ab69bc503a28538e Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Mon, 30 Oct 2023 20:50:42 +0000 Subject: [PATCH 54/80] docs: Update updating-datahub.md (#9131) --- docs/how/updating-datahub.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 4d1535f28fa0a..28f11e4b6d707 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -4,10 +4,20 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next -- #9010 - In Redshift source's config `incremental_lineage` is set default to off. +### Breaking Changes + +### Potential Downtime + +### Deprecations + +### Other Notable Changes + +## 0.12.0 ### Breaking Changes +- #9044 - GraphQL APIs for adding ownership now expect either an `ownershipTypeUrn` referencing a customer ownership type or a (deprecated) `type`. Where before adding an ownership without a concrete type was allowed, this is no longer the case. For simplicity you can use the `type` parameter which will get translated to a custom ownership type internally if one exists for the type being added. +- #9010 - In Redshift source's config `incremental_lineage` is set default to off. - #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now. - #8942 - Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted by Looker and LookML source connectors. From 58bcedcd6a091263c6dc3e1181c260233a80575d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 30 Oct 2023 14:18:48 -0700 Subject: [PATCH 55/80] fix(ingest/clickhouse): pin version to solve column reflection regression (#9143) --- metadata-ingestion/setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 7f7826abe2095..b1c5510efd923 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -173,7 +173,9 @@ clickhouse_common = { # Clickhouse 0.2.0 adds support for SQLAlchemy 1.4.x - "clickhouse-sqlalchemy>=0.2.0", + # Disallow 0.2.5 because of https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/272. + # Note that there's also a known issue around nested map types: https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/269. + "clickhouse-sqlalchemy>=0.2.0,<0.2.5", } redshift_common = { From 51d6d1f4531dad133e06db75267fbea77e424d00 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 30 Oct 2023 14:19:52 -0700 Subject: [PATCH 56/80] feat(ingest/looker): cleanup error handling (#9135) --- .../src/datahub/ingestion/api/workunit.py | 6 +++++- .../ingestion/source/looker/looker_lib_wrapper.py | 8 ++++++-- .../ingestion/source/looker/looker_source.py | 14 ++------------ .../ingestion/source/looker/lookml_source.py | 5 +---- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/api/workunit.py b/metadata-ingestion/src/datahub/ingestion/api/workunit.py index 8eea3514a22af..b1c003ee27e12 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/workunit.py +++ b/metadata-ingestion/src/datahub/ingestion/api/workunit.py @@ -22,7 +22,11 @@ class MetadataWorkUnit(WorkUnit): metadata: Union[ MetadataChangeEvent, MetadataChangeProposal, MetadataChangeProposalWrapper ] - # A workunit creator can determine if this workunit is allowed to fail + + # A workunit creator can determine if this workunit is allowed to fail. + # TODO: This flag was initially added during the rollout of the subType aspect + # to improve backwards compatibility, but is not really needed anymore and so + # should be removed. treat_errors_as_warnings: bool = False # When this is set to false, this MWU will be ignored by automatic helpers diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py index cf132b7ef27f7..b00f74b71e792 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py @@ -123,8 +123,12 @@ def get_user(self, id_: str, user_fields: str) -> Optional[User]: transport_options=self.transport_options, ) except SDKError as e: - logger.warning(f"Could not find user with id {id_}") - logger.warning(f"Failure was {e}") + if "Looker Not Found (404)" in str(e): + # User not found + logger.info(f"Could not find user with id {id_}: 404 error") + else: + logger.warning(f"Could not find user with id {id_}") + logger.warning(f"Failure was {e}") # User not found return None diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index a3df977582ca4..09683d790c14c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -926,14 +926,7 @@ def process_metrics_dimensions_and_fields_for_dashboard( mcps = chart_mcps mcps.append(dashboard_mcp) - workunits = [ - MetadataWorkUnit( - id=f"looker-{mcp.aspectName}-{mcp.entityUrn}", - mcp=mcp, - treat_errors_as_warnings=True, - ) - for mcp in mcps - ] + workunits = [mcp.as_workunit() for mcp in mcps] return workunits @@ -1320,10 +1313,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: id=f"looker-{event.proposedSnapshot.urn}", mce=event ) elif isinstance(event, MetadataChangeProposalWrapper): - # We want to treat subtype aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures - yield event.as_workunit( - treat_errors_as_warnings=event.aspectName in ["subTypes"] - ) + yield event.as_workunit() else: raise Exception(f"Unexpected type of event {event}") self.reporter.report_stage_end("explore_metadata") diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index e69c3b6e601bd..e6b78cc7a7745 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -2171,10 +2171,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 for mcp in self._build_dataset_mcps( maybe_looker_view ): - # We want to treat mcp aspects as optional, so allowing failures in this aspect to be treated as warnings rather than failures - yield mcp.as_workunit( - treat_errors_as_warnings=True - ) + yield mcp.as_workunit() else: ( prev_model_name, From 0bd2d9a36cdf18575ac4e54126db5be33ec59d8a Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 30 Oct 2023 14:22:05 -0700 Subject: [PATCH 57/80] feat(ingest): add `entity_supports_aspect` helper (#9120) --- .../src/datahub/emitter/mcp_builder.py | 13 ++++++++++++- metadata-ingestion/tests/unit/test_mcp_builder.py | 9 +++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index 65e0c0d6ba60d..d50feba8b119c 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -1,9 +1,10 @@ -from typing import Dict, Iterable, List, Optional, TypeVar +from typing import Dict, Iterable, List, Optional, Type, TypeVar from pydantic.fields import Field from pydantic.main import BaseModel from datahub.emitter.mce_builder import ( + Aspect, datahub_guid, make_container_urn, make_data_platform_urn, @@ -18,6 +19,7 @@ ) from datahub.metadata.com.linkedin.pegasus2avro.container import ContainerProperties from datahub.metadata.schema_classes import ( + KEY_ASPECTS, ContainerClass, DomainsClass, EmbedClass, @@ -306,3 +308,12 @@ def create_embed_mcp(urn: str, embed_url: str) -> MetadataChangeProposalWrapper: entityUrn=urn, aspect=EmbedClass(renderUrl=embed_url), ) + + +def entity_supports_aspect(entity_type: str, aspect_type: Type[Aspect]) -> bool: + entity_key_aspect = KEY_ASPECTS[entity_type] + aspect_name = aspect_type.get_aspect_name() + + supported_aspects = entity_key_aspect.ASPECT_INFO["entityAspects"] + + return aspect_name in supported_aspects diff --git a/metadata-ingestion/tests/unit/test_mcp_builder.py b/metadata-ingestion/tests/unit/test_mcp_builder.py index 561b782ef9e46..e304edb24789c 100644 --- a/metadata-ingestion/tests/unit/test_mcp_builder.py +++ b/metadata-ingestion/tests/unit/test_mcp_builder.py @@ -1,4 +1,5 @@ import datahub.emitter.mcp_builder as builder +from datahub.metadata.schema_classes import StatusClass, TelemetryClientIdClass def test_guid_generator(): @@ -83,3 +84,11 @@ def test_guid_generators(): guid = key.guid() assert guid == guid_datahub + + +def test_entity_supports_aspect(): + assert builder.entity_supports_aspect("dataset", StatusClass) + assert not builder.entity_supports_aspect("telemetry", StatusClass) + + assert not builder.entity_supports_aspect("dataset", TelemetryClientIdClass) + assert builder.entity_supports_aspect("telemetry", TelemetryClientIdClass) From ce0f36b8bc74e3f0bab447408096347617804d92 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 30 Oct 2023 14:23:19 -0700 Subject: [PATCH 58/80] feat(sqlparser): support more update syntaxes + fix bug with subqueries (#9105) --- .../src/datahub/utilities/sqlglot_lineage.py | 57 ++++++++++- .../test_postgres_select_subquery.json | 64 ++++++++++++ .../test_snowflake_update_from_table.json | 1 + .../test_snowflake_update_hardcoded.json | 4 +- .../goldens/test_snowflake_update_self.json | 29 ++++++ .../unit/sql_parsing/test_sqlglot_lineage.py | 98 +++++++++++++++++++ 6 files changed, 247 insertions(+), 6 deletions(-) create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 1d74b20569814..388388f9f4b38 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -12,8 +12,8 @@ import sqlglot.errors import sqlglot.lineage import sqlglot.optimizer.annotate_types +import sqlglot.optimizer.optimizer import sqlglot.optimizer.qualify -import sqlglot.optimizer.qualify_columns from pydantic import BaseModel from typing_extensions import TypedDict @@ -48,6 +48,19 @@ SQL_PARSE_RESULT_CACHE_SIZE = 1000 +RULES_BEFORE_TYPE_ANNOTATION: tuple = tuple( + filter( + # Skip pushdown_predicates because it sometimes throws exceptions, and we + # don't actually need it for anything. + lambda func: func.__name__ not in {"pushdown_predicates"}, + itertools.takewhile( + lambda func: func != sqlglot.optimizer.annotate_types.annotate_types, + sqlglot.optimizer.optimizer.RULES, + ), + ) +) + + class GraphQLSchemaField(TypedDict): fieldPath: str nativeDataType: str @@ -289,6 +302,10 @@ def _table_level_lineage( ) # TODO: If a CTAS has "LIMIT 0", it's not really lineage, just copying the schema. + # Update statements implicitly read from the table being updated, so add those back in. + if isinstance(statement, sqlglot.exp.Update): + tables = tables | modified + return tables, modified @@ -568,17 +585,20 @@ def _schema_aware_fuzzy_column_resolve( # - the select instead of the full outer statement # - schema info # - column qualification enabled + # - running the full pre-type annotation optimizer # logger.debug("Schema: %s", sqlglot_db_schema.mapping) - statement = sqlglot.optimizer.qualify.qualify( + statement = sqlglot.optimizer.optimizer.optimize( statement, dialect=dialect, schema=sqlglot_db_schema, + qualify_columns=True, validate_qualify_columns=False, identify=True, # sqlglot calls the db -> schema -> table hierarchy "catalog", "db", "table". catalog=default_db, db=default_schema, + rules=RULES_BEFORE_TYPE_ANNOTATION, ) except (sqlglot.errors.OptimizeError, ValueError) as e: raise SqlUnderstandingError( @@ -748,6 +768,7 @@ def _extract_select_from_create( _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT: Set[str] = set( sqlglot.exp.Update.arg_types.keys() ) - set(sqlglot.exp.Select.arg_types.keys()) +_UPDATE_FROM_TABLE_ARGS_TO_MOVE = {"joins", "laterals", "pivot"} def _extract_select_from_update( @@ -774,17 +795,43 @@ def _extract_select_from_update( # they'll get caught later. new_expressions.append(expr) - return sqlglot.exp.Select( + # Special translation for the `from` clause. + extra_args = {} + original_from = statement.args.get("from") + if original_from and isinstance(original_from.this, sqlglot.exp.Table): + # Move joins, laterals, and pivots from the Update->From->Table->field + # to the top-level Select->field. + + for k in _UPDATE_FROM_TABLE_ARGS_TO_MOVE: + if k in original_from.this.args: + # Mutate the from table clause in-place. + extra_args[k] = original_from.this.args.pop(k) + + select_statement = sqlglot.exp.Select( **{ **{ k: v for k, v in statement.args.items() if k not in _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT }, + **extra_args, "expressions": new_expressions, } ) + # Update statements always implicitly have the updated table in context. + # TODO: Retain table name alias. + if select_statement.args.get("from"): + # select_statement = sqlglot.parse_one(select_statement.sql(dialect=dialect)) + + select_statement = select_statement.join( + statement.this, append=True, join_kind="cross" + ) + else: + select_statement = select_statement.from_(statement.this) + + return select_statement + def _is_create_table_ddl(statement: sqlglot.exp.Expression) -> bool: return isinstance(statement, sqlglot.exp.Create) and isinstance( @@ -955,7 +1002,7 @@ def _sqlglot_lineage_inner( # Fetch schema info for the relevant tables. table_name_urn_mapping: Dict[_TableName, str] = {} table_name_schema_mapping: Dict[_TableName, SchemaInfo] = {} - for table in itertools.chain(tables, modified): + for table in tables | modified: # For select statements, qualification will be a no-op. For other statements, this # is where the qualification actually happens. qualified_table = table.qualified( @@ -971,7 +1018,7 @@ def _sqlglot_lineage_inner( # Also include the original, non-qualified table name in the urn mapping. table_name_urn_mapping[table] = urn - total_tables_discovered = len(tables) + len(modified) + total_tables_discovered = len(tables | modified) total_schemas_resolved = len(table_name_schema_mapping) debug_info = SqlParsingDebugInfo( confidence=0.9 if total_tables_discovered == total_schemas_resolved diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json new file mode 100644 index 0000000000000..0c40ce120c934 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_postgres_select_subquery.json @@ -0,0 +1,64 @@ +{ + "query_type": "SELECT", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table2,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "a", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INT" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table1,PROD)", + "column": "a" + } + ] + }, + { + "downstream": { + "table": null, + "column": "b", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "INT" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table1,PROD)", + "column": "b" + } + ] + }, + { + "downstream": { + "table": null, + "column": "c", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": {} + } + }, + "native_column_type": "INT[]" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table2,PROD)", + "column": "c" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json index e2baa34e7fe28..d51001f969799 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_from_table.json @@ -1,6 +1,7 @@ { "query_type": "UPDATE", "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table1,PROD)", "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.table2,PROD)" ], diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json index b41ed61b37cdb..f421b28530c64 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_hardcoded.json @@ -1,6 +1,8 @@ { "query_type": "UPDATE", - "in_tables": [], + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)" + ], "out_tables": [ "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)" ], diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json new file mode 100644 index 0000000000000..c8cc32164a3eb --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_update_self.json @@ -0,0 +1,29 @@ +{ + "query_type": "UPDATE", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)" + ], + "out_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)" + ], + "column_lineage": [ + { + "downstream": { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "orderkey", + "column_type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "native_column_type": "DECIMAL" + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)", + "column": "orderkey" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index dfc5b486abd35..5559ebe1756a6 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -768,3 +768,101 @@ def test_snowflake_update_from_table(): }, expected_file=RESOURCE_DIR / "test_snowflake_update_from_table.json", ) + + +def test_snowflake_update_self(): + assert_sql_result( + """ +UPDATE snowflake_sample_data.tpch_sf1.orders +SET orderkey = orderkey + 1 +""", + dialect="snowflake", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": { + "orderkey": "NUMBER(38,0)", + "totalprice": "NUMBER(12,2)", + }, + }, + expected_file=RESOURCE_DIR / "test_snowflake_update_self.json", + ) + + +def test_postgres_select_subquery(): + assert_sql_result( + """ +SELECT + a, + b, + (SELECT c FROM table2 WHERE table2.id = table1.id) as c +FROM table1 +""", + dialect="postgres", + default_db="my_db", + default_schema="my_schema", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table1,PROD)": { + "id": "INTEGER", + "a": "INTEGER", + "b": "INTEGER", + }, + "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.table2,PROD)": { + "id": "INTEGER", + "c": "INTEGER", + }, + }, + expected_file=RESOURCE_DIR / "test_postgres_select_subquery.json", + ) + + +@pytest.mark.skip(reason="We can't parse column-list syntax with sub-selects yet") +def test_postgres_update_subselect(): + assert_sql_result( + """ +UPDATE accounts SET sales_person_name = + (SELECT name FROM employees + WHERE employees.id = accounts.sales_person_id) +""", + dialect="postgres", + default_db="my_db", + default_schema="my_schema", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.accounts,PROD)": { + "id": "INTEGER", + "sales_person_id": "INTEGER", + "sales_person_name": "VARCHAR(16777216)", + }, + "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.employees,PROD)": { + "id": "INTEGER", + "name": "VARCHAR(16777216)", + }, + }, + expected_file=RESOURCE_DIR / "test_postgres_update_subselect.json", + ) + + +@pytest.mark.skip(reason="We can't parse column-list syntax with sub-selects yet") +def test_postgres_complex_update(): + # Example query from the postgres docs: + # https://www.postgresql.org/docs/current/sql-update.html + assert_sql_result( + """ +UPDATE accounts SET (contact_first_name, contact_last_name) = + (SELECT first_name, last_name FROM employees + WHERE employees.id = accounts.sales_person); +""", + dialect="postgres", + schemas={ + "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.accounts,PROD)": { + "id": "INTEGER", + "contact_first_name": "VARCHAR(16777216)", + "contact_last_name": "VARCHAR(16777216)", + "sales_person": "INTEGER", + }, + "urn:li:dataset:(urn:li:dataPlatform:postgres,my_db.my_schema.employees,PROD)": { + "id": "INTEGER", + "first_name": "VARCHAR(16777216)", + "last_name": "VARCHAR(16777216)", + }, + }, + expected_file=RESOURCE_DIR / "test_postgres_complex_update.json", + ) From 94d438d44f2d18def4a422cd60150d2c9a78be49 Mon Sep 17 00:00:00 2001 From: sachinsaju <33017477+sachinsaju@users.noreply.github.com> Date: Tue, 31 Oct 2023 08:54:56 +0530 Subject: [PATCH 59/80] docs: correct broken doc links (#9137) Co-authored-by: Hyejin Yoon <0327jane@gmail.com> --- docs/deploy/aws.md | 2 +- docs/what-is-datahub/datahub-concepts.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md index e0f57b4a0b0cb..6598b93c25e9a 100644 --- a/docs/deploy/aws.md +++ b/docs/deploy/aws.md @@ -15,7 +15,7 @@ This guide requires the following tools: - [kubectl](https://kubernetes.io/docs/tasks/tools/) to manage kubernetes resources - [helm](https://helm.sh/docs/intro/install/) to deploy the resources based on helm charts. Note, we only support Helm 3. -- [eksctl](https://eksctl.io/introduction/#installation) to create and manage clusters on EKS +- [eksctl](https://eksctl.io/installation/) to create and manage clusters on EKS - [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html) to manage AWS resources To use the above tools, you need to set up AWS credentials by following diff --git a/docs/what-is-datahub/datahub-concepts.md b/docs/what-is-datahub/datahub-concepts.md index 6328d97fa6a50..03b86fab0ede4 100644 --- a/docs/what-is-datahub/datahub-concepts.md +++ b/docs/what-is-datahub/datahub-concepts.md @@ -99,7 +99,7 @@ List of Data Platforms - Tableau - Vertica -Reference : [data_platforms.json](https://github.com/acryldata/datahub-fork/blob/acryl-main/metadata-service/war/src/main/resources/boot/data_platforms.json) +Reference : [data_platforms.json](https://github.com/datahub-project/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json) From ea1273281e3a65ab4d94d002ee19f91907a3eb84 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 30 Oct 2023 20:57:59 -0700 Subject: [PATCH 60/80] feat(ingest): sql parser perf + asyncio fixes (#9119) --- metadata-ingestion/setup.py | 2 +- .../src/datahub/cli/docker_cli.py | 5 ++ .../src/datahub/upgrade/upgrade.py | 12 ++--- .../src/datahub/utilities/sqlglot_lineage.py | 5 +- .../goldens/test_select_from_union.json | 2 +- .../test_teradata_strange_operators.json | 46 +++++++++++++++++++ .../unit/sql_parsing/test_sqlglot_lineage.py | 14 ++++++ 7 files changed, 73 insertions(+), 13 deletions(-) create mode 100644 metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index b1c5510efd923..151842bd84d0a 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -108,7 +108,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==18.5.2.dev45", + "acryl-sqlglot==18.17.1.dev16", } sql_common = ( diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index 4afccfe711e34..77e3285d359ef 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -5,6 +5,7 @@ import os import pathlib import platform +import signal import subprocess import sys import tempfile @@ -770,6 +771,10 @@ def quickstart( # noqa: C901 logger.debug("docker compose up still running, sending SIGKILL") up_process.kill() up_process.wait() + else: + # If the docker process got a keyboard interrupt, raise one here. + if up_process.returncode in {128 + signal.SIGINT, -signal.SIGINT}: + raise KeyboardInterrupt # Check docker health every few seconds. status = check_docker_quickstart() diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py index 30f19b8b84f35..acc7954ad25a6 100644 --- a/metadata-ingestion/src/datahub/upgrade/upgrade.py +++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py @@ -1,6 +1,5 @@ import asyncio import contextlib -import functools import logging import sys from datetime import datetime, timedelta, timezone @@ -374,17 +373,14 @@ def check_upgrade(func: Callable[..., T]) -> Callable[..., T]: @wraps(func) def async_wrapper(*args: Any, **kwargs: Any) -> Any: async def run_inner_func(): - loop = asyncio.get_event_loop() - return await loop.run_in_executor( - None, functools.partial(func, *args, **kwargs) - ) + return func(*args, **kwargs) async def run_func_check_upgrade(): version_stats_future = asyncio.ensure_future(retrieve_version_stats()) - the_one_future = asyncio.ensure_future(run_inner_func()) - ret = await the_one_future + main_func_future = asyncio.ensure_future(run_inner_func()) + ret = await main_func_future - # the one future has returned + # the main future has returned # we check the other futures quickly try: version_stats = await asyncio.wait_for(version_stats_future, 0.5) diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 388388f9f4b38..6413275ac63a6 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -106,6 +106,7 @@ def get_query_type_of_sql(expression: sqlglot.exp.Expression) -> QueryType: sqlglot.exp.Update: QueryType.UPDATE, sqlglot.exp.Delete: QueryType.DELETE, sqlglot.exp.Merge: QueryType.MERGE, + sqlglot.exp.Subqueryable: QueryType.SELECT, # unions, etc. are also selects } for cls, query_type in mapping.items(): @@ -820,10 +821,8 @@ def _extract_select_from_update( ) # Update statements always implicitly have the updated table in context. - # TODO: Retain table name alias. + # TODO: Retain table name alias, if one was present. if select_statement.args.get("from"): - # select_statement = sqlglot.parse_one(select_statement.sql(dialect=dialect)) - select_statement = select_statement.join( statement.this, append=True, join_kind="cross" ) diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json index 902aa010c8afc..5d1d421f49a2a 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json @@ -1,5 +1,5 @@ { - "query_type": "UNKNOWN", + "query_type": "SELECT", "in_tables": [ "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf10.orders,PROD)", "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf100.orders,PROD)" diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json new file mode 100644 index 0000000000000..4b21a2512ccd1 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_teradata_strange_operators.json @@ -0,0 +1,46 @@ +{ + "query_type": "SELECT", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table1,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table2,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "col1", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table1,PROD)", + "column": "col1" + }, + { + "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table2,PROD)", + "column": "col1" + } + ] + }, + { + "downstream": { + "table": null, + "column": "col2", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table1,PROD)", + "column": "col2" + }, + { + "table": "urn:li:dataset:(urn:li:dataPlatform:teradata,dbc.table2,PROD)", + "column": "col2" + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index 5559ebe1756a6..3b9fa0d55f18d 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -675,6 +675,20 @@ def test_teradata_default_normalization(): ) +def test_teradata_strange_operators(): + assert_sql_result( + """ +select col1, col2 from dbc.table1 +where col1 eq 'value1' +minus +select col1, col2 from dbc.table2 +""", + dialect="teradata", + default_schema="dbc", + expected_file=RESOURCE_DIR / "test_teradata_strange_operators.json", + ) + + def test_snowflake_update_hardcoded(): assert_sql_result( """ From b565a657d2235b82e65dfbe0bfcc11c97c3d9b79 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 30 Oct 2023 23:35:12 -0700 Subject: [PATCH 61/80] feat(quickstart): fix broker InconsistentClusterIdException issues (#9148) --- docker/docker-compose-with-cassandra.yml | 6 +++++- docker/docker-compose-without-neo4j.yml | 6 +++++- docker/docker-compose.yml | 6 +++++- docker/quickstart/docker-compose-m1.quickstart.yml | 4 +++- .../docker-compose-without-neo4j-m1.quickstart.yml | 4 +++- .../quickstart/docker-compose-without-neo4j.quickstart.yml | 4 +++- docker/quickstart/docker-compose.quickstart.yml | 4 +++- 7 files changed, 27 insertions(+), 7 deletions(-) diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index 9543e67da07f2..39f4341600572 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -200,7 +200,10 @@ services: retries: 5 timeout: 5s volumes: - - zkdata:/var/lib/zookeeper + # See https://stackoverflow.com/a/61008432 for why we need two volumes. + # See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk + - zkdata:/var/lib/zookeeper/data + - zklogs:/var/lib/zookeeper/log networks: default: name: datahub_network @@ -210,3 +213,4 @@ volumes: neo4jdata: broker: zkdata: + zklogs: diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 022362782f742..235e89e340551 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -174,7 +174,10 @@ services: retries: 3 timeout: 5s volumes: - - zkdata:/var/lib/zookeeper + # See https://stackoverflow.com/a/61008432 for why we need two volumes. + # See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk + - zkdata:/var/lib/zookeeper/data + - zklogs:/var/lib/zookeeper/log networks: default: name: datahub_network @@ -182,3 +185,4 @@ volumes: esdata: broker: zkdata: + zklogs: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index a486689e050a2..46da8c6fdbd2a 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -195,7 +195,10 @@ services: retries: 3 timeout: 5s volumes: - - zkdata:/var/lib/zookeeper + # See https://stackoverflow.com/a/61008432 for why we need two volumes. + # See also: https://docs.confluent.io/platform/current/installation/docker/operations/external-volumes.html#data-volumes-for-kafka-and-zk + - zkdata:/var/lib/zookeeper/data + - zklogs:/var/lib/zookeeper/log networks: default: name: datahub_network @@ -204,3 +207,4 @@ volumes: neo4jdata: broker: zkdata: + zklogs: diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index c5de687d335b9..3b6d02c83d0f0 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -300,7 +300,8 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/lib/zookeeper + - zkdata:/var/lib/zookeeper/data + - zklogs:/var/lib/zookeeper/log version: '3.9' volumes: broker: null @@ -308,3 +309,4 @@ volumes: mysqldata: null neo4jdata: null zkdata: null + zklogs: null diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index b6935f24c5ce2..e45bafc3da480 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -274,10 +274,12 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/lib/zookeeper + - zkdata:/var/lib/zookeeper/data + - zklogs:/var/lib/zookeeper/log version: '3.9' volumes: broker: null esdata: null mysqldata: null zkdata: null + zklogs: null diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 4ff8bbd70da85..020ef5e9a97b9 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -274,10 +274,12 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/lib/zookeeper + - zkdata:/var/lib/zookeeper/data + - zklogs:/var/lib/zookeeper/log version: '3.9' volumes: broker: null esdata: null mysqldata: null zkdata: null + zklogs: null diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index f2950ebab2c9d..8adc2b9063b84 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -300,7 +300,8 @@ services: ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 volumes: - - zkdata:/var/lib/zookeeper + - zkdata:/var/lib/zookeeper/data + - zklogs:/var/lib/zookeeper/log version: '3.9' volumes: broker: null @@ -308,3 +309,4 @@ volumes: mysqldata: null neo4jdata: null zkdata: null + zklogs: null From 2e8954f33a10f3e11af22fe6198fea43d65d580c Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 31 Oct 2023 22:25:48 +0530 Subject: [PATCH 62/80] fix(policies): remove non-existent policies, fix name (#9150) --- .../war/src/main/resources/boot/policies.json | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json index 18cb48bfcf1f0..b7ffc11c08f05 100644 --- a/metadata-service/war/src/main/resources/boot/policies.json +++ b/metadata-service/war/src/main/resources/boot/policies.json @@ -56,7 +56,7 @@ "EDIT_ENTITY", "VIEW_ENTITY_PAGE", "EDIT_LINEAGE", - "EDIT_ENTITY_ASSERTIONS_PRIVILEGE", + "EDIT_ENTITY_ASSERTIONS", "SEARCH_PRIVILEGE", "GET_COUNTS_PRIVILEGE", "GET_TIMESERIES_ASPECT_PRIVILEGE", @@ -251,11 +251,6 @@ "EDIT_GROUP_MEMBERS", "EDIT_USER_PROFILE", "EDIT_CONTACT_INFO", - "MANAGE_ENTITY_TAGS_PRIVILEGE", - "MANAGE_ENTITY_GLOSSARY_TERMS_PRIVILEGE", - "MANAGE_DATASET_COL_GLOSSARY_TERMS_PRIVILEGE", - "MANAGE_DATASET_COL_TAGS_PRIVILEGE", - "EDIT_ENTITY_ASSERTIONS_PRIVILEGE", "EDIT_LINEAGE", "EDIT_ENTITY_QUERIES", "SEARCH_PRIVILEGE", @@ -336,11 +331,6 @@ "EDIT_GROUP_MEMBERS", "EDIT_USER_PROFILE", "EDIT_CONTACT_INFO", - "MANAGE_ENTITY_TAGS_PRIVILEGE", - "MANAGE_ENTITY_GLOSSARY_TERMS_PRIVILEGE", - "MANAGE_DATASET_COL_GLOSSARY_TERMS_PRIVILEGE", - "MANAGE_DATASET_COL_TAGS_PRIVILEGE", - "EDIT_ENTITY_ASSERTIONS_PRIVILEGE", "EDIT_LINEAGE", "EDIT_ENTITY_QUERIES", "SEARCH_PRIVILEGE", @@ -441,11 +431,6 @@ "EDIT_GROUP_MEMBERS", "EDIT_USER_PROFILE", "EDIT_CONTACT_INFO", - "MANAGE_ENTITY_TAGS_PRIVILEGE", - "MANAGE_ENTITY_GLOSSARY_TERMS_PRIVILEGE", - "MANAGE_DATASET_COL_GLOSSARY_TERMS_PRIVILEGE", - "MANAGE_DATASET_COL_TAGS_PRIVILEGE", - "EDIT_ENTITY_ASSERTIONS_PRIVILEGE", "EDIT_LINEAGE", "EDIT_ENTITY_QUERIES", "GET_TIMELINE_PRIVILEGE", From b8dcc86281d06fcde35773fd4ef4933f5b553fd7 Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Tue, 31 Oct 2023 15:48:34 -0400 Subject: [PATCH 63/80] refactor(smoke): Fix for a test that passed on Oss and failed on Saas (#9147) --- .../cypress/cypress/e2e/lineage/download_lineage_results.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js index dc6efc9f7df66..ed4167b87c506 100644 --- a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js @@ -37,7 +37,7 @@ describe("download lineage results to .csv file", () => { cy.openEntityTab("Lineage"); // Verify 1st degree of dependencies - cy.contains(/1 - 3 of 3/); + cy.contains(/1 - [3-4] of [3-4]/); downloadCsvFile("first_degree_results.csv"); let first_degree_csv = cy.readFile('cypress/downloads/first_degree_results.csv'); first_degree.forEach(function (urn) { @@ -52,7 +52,7 @@ describe("download lineage results to .csv file", () => { // Verify 1st and 2nd degree of dependencies cy.get('[data-testid="facet-degree-2"]').click().wait(5000); - cy.contains(/1 - 7 of 7/); + cy.contains(/1 - [7-8] of [7-8]/); downloadCsvFile("second_degree_results.csv"); let second_degree_csv = cy.readFile('cypress/downloads/second_degree_results.csv'); first_degree.forEach(function (urn) { @@ -67,7 +67,7 @@ describe("download lineage results to .csv file", () => { // Verify 1st 2nd and 3+ degree of dependencies(Verify multi page download) cy.get('[data-testid="facet-degree-3+"]').click().wait(5000); - cy.contains(/1 - 10 of 13/); + cy.contains(/1 - 10 of 1[3-4]/); downloadCsvFile("third_plus_degree_results.csv"); let third_degree_csv = cy.readFile('cypress/downloads/third_plus_degree_results.csv'); first_degree.forEach(function (urn) { From dae320c9bc28b80c6110395092d4223e9a37258b Mon Sep 17 00:00:00 2001 From: sachinsaju <33017477+sachinsaju@users.noreply.github.com> Date: Wed, 1 Nov 2023 04:28:39 +0530 Subject: [PATCH 64/80] docs(teradata): teradata doc external link 404 fix (#9152) --- metadata-ingestion/docs/sources/teradata/teradata_pre.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/docs/sources/teradata/teradata_pre.md b/metadata-ingestion/docs/sources/teradata/teradata_pre.md index 7263a59f5ea3d..7b4da1255d575 100644 --- a/metadata-ingestion/docs/sources/teradata/teradata_pre.md +++ b/metadata-ingestion/docs/sources/teradata/teradata_pre.md @@ -25,4 +25,4 @@ will fit for your queries (the default query text size Teradata captures is max REPLACE QUERY LOGGING LIMIT SQLTEXT=2000 ON ALL; ``` See more here about query logging: - [https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs]() + [https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs](https://docs.teradata.com/r/Teradata-VantageCloud-Lake/Database-Reference/Database-Administration/Tracking-Query-Behavior-with-Database-Query-Logging-Operational-DBAs) From 7a31950f794b2b0527ad685cbd08e967b524bfec Mon Sep 17 00:00:00 2001 From: "Jia (Jason) Teoh" Date: Tue, 31 Oct 2023 18:41:16 -0700 Subject: [PATCH 65/80] fix(datahub-client): Include relocation for snakeyaml dependency. (#8911) Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com> --- metadata-integration/java/datahub-client/build.gradle | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index e6210f1f073f6..0bf6b18fa5073 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -97,6 +97,7 @@ shadowJar { // we can move to automatic relocation using ConfigureShadowRelocation after we get to a good place on these first relocate 'org.springframework', 'datahub.shaded.org.springframework' relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson' + relocate 'org.yaml', 'io.acryl.shaded.org.yaml' // Required for shading snakeyaml relocate 'net.jcip.annotations', 'datahub.shaded.annotations' relocate 'javassist', 'datahub.shaded.javassist' relocate 'edu.umd.cs.findbugs', 'datahub.shaded.findbugs' @@ -242,4 +243,4 @@ checkstyleMain.exclude '**/generated/**' clean { project.delete("$projectDir/generated") -} \ No newline at end of file +} From 73514ad9c5643cc5fbbb1edb0991d4aea0812459 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 31 Oct 2023 21:28:38 -0700 Subject: [PATCH 66/80] fix(ingest): cleanup large images in CI (#9153) --- .../tests/integration/sql_server/test_sql_server.py | 5 ++++- metadata-ingestion/tests/integration/vertica/test_vertica.py | 5 ++++- metadata-ingestion/tests/test_helpers/docker_helpers.py | 5 +++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index 099690fed34c2..f439a322c2677 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -6,7 +6,7 @@ from tests.test_helpers import mce_helpers from tests.test_helpers.click_helpers import run_datahub_cmd -from tests.test_helpers.docker_helpers import wait_for_port +from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port @pytest.fixture(scope="module") @@ -29,6 +29,9 @@ def mssql_runner(docker_compose_runner, pytestconfig): assert ret.returncode == 0 yield docker_services + # The image is pretty large, so we remove it after the test. + cleanup_image("mcr.microsoft.com/mssql/server") + SOURCE_FILES_PATH = "./tests/integration/sql_server/source_files" config_file = os.listdir(SOURCE_FILES_PATH) diff --git a/metadata-ingestion/tests/integration/vertica/test_vertica.py b/metadata-ingestion/tests/integration/vertica/test_vertica.py index 94ad33ba21ce4..d7b4c390f75d9 100644 --- a/metadata-ingestion/tests/integration/vertica/test_vertica.py +++ b/metadata-ingestion/tests/integration/vertica/test_vertica.py @@ -6,7 +6,7 @@ from tests.test_helpers import mce_helpers from tests.test_helpers.click_helpers import run_datahub_cmd -from tests.test_helpers.docker_helpers import wait_for_port +from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port FROZEN_TIME = "2020-04-14 07:00:00" @@ -49,6 +49,9 @@ def vertica_runner(docker_compose_runner, test_resources_dir): yield docker_services + # The image is pretty large, so we remove it after the test. + cleanup_image("vertica/vertica-ce") + @freeze_time(FROZEN_TIME) @pytest.mark.integration diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py index 30157c3a78094..2eb61068196a2 100644 --- a/metadata-ingestion/tests/test_helpers/docker_helpers.py +++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py @@ -1,5 +1,6 @@ import contextlib import logging +import os import subprocess from typing import Callable, Optional, Union @@ -78,6 +79,10 @@ def run( def cleanup_image(image_name: str) -> None: assert ":" not in image_name, "image_name should not contain a tag" + if not os.environ.get("CI"): + logger.debug("Not cleaning up images to speed up local development") + return + images_proc = subprocess.run( f"docker image ls --filter 'reference={image_name}*' -q", shell=True, From d2314976033e42c13b7897f46ea0f227afb7c90b Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 31 Oct 2023 21:37:11 -0700 Subject: [PATCH 67/80] build: increase gradle retries (#9091) --- gradle.properties | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gradle.properties b/gradle.properties index 2b211e725359a..1cd349344b432 100644 --- a/gradle.properties +++ b/gradle.properties @@ -6,9 +6,16 @@ org.gradle.caching=false # Increase gradle JVM memory to 3GB to allow tests to run locally org.gradle.jvmargs=-Xmx3000m # Increase retries to 5 (from default of 3) and increase interval from 125ms to 1s. +# Based on this thread https://github.com/gradle/gradle/issues/4629, it's unclear +# if we should be using systemProp or not. We're using both for now. org.gradle.internal.repository.max.retries=5 org.gradle.internal.repository.max.tentatives=5 org.gradle.internal.repository.initial.backoff=1000 +systemProp.org.gradle.internal.http.connectionTimeout=120000 +systemProp.org.gradle.internal.http.socketTimeout=120000 +systemProp.org.gradle.internal.repository.max.retries=5 +systemProp.org.gradle.internal.repository.max.tentatives=5 +systemProp.org.gradle.internal.repository.initial.backoff=1000 # Needed to publish to Nexus from a sub-module gnsp.disableApplyOnlyOnRootProjectEnforcement=true From 55f14530a397f75a9201db11c13bd7bbbb25162c Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 1 Nov 2023 00:12:52 -0700 Subject: [PATCH 68/80] feat(ingest): bump sqlglot parser (#9155) --- metadata-ingestion/setup.py | 2 +- .../unit/sql_parsing/test_sqlglot_lineage.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 151842bd84d0a..afce8dcee840b 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -108,7 +108,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==18.17.1.dev16", + "acryl-sqlglot==19.0.2.dev10", } sql_common = ( diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index 3b9fa0d55f18d..c420f2b8438ce 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -676,9 +676,13 @@ def test_teradata_default_normalization(): def test_teradata_strange_operators(): + # This is a test for the following operators: + # - `SEL` (select) + # - `EQ` (equals) + # - `MINUS` (except) assert_sql_result( """ -select col1, col2 from dbc.table1 +sel col1, col2 from dbc.table1 where col1 eq 'value1' minus select col1, col2 from dbc.table2 @@ -689,6 +693,19 @@ def test_teradata_strange_operators(): ) +@pytest.mark.skip("sqlglot doesn't support this cast syntax yet") +def test_teradata_cast_syntax(): + assert_sql_result( + """ +SELECT my_table.date_col MONTH(4) AS month_col +FROM my_table +""", + dialect="teradata", + default_schema="dbc", + expected_file=RESOURCE_DIR / "test_teradata_cast_syntax.json", + ) + + def test_snowflake_update_hardcoded(): assert_sql_result( """ From 876de214c9a11f8928d8eafe5c7f658d5b9dc61f Mon Sep 17 00:00:00 2001 From: Tony Ouyang Date: Wed, 1 Nov 2023 00:13:17 -0700 Subject: [PATCH 69/80] feat(ingest/mongodb): support stateful ingestion (#9118) --- .../src/datahub/ingestion/source/mongodb.py | 74 +- .../mongodb/mongodb_mces_golden.json | 8320 +++++++++-------- 2 files changed, 4270 insertions(+), 4124 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 890c5c64bd5e6..ce2b9ce2981e0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -15,7 +15,12 @@ EnvConfigMixin, PlatformInstanceConfigMixin, ) -from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance +from datahub.emitter.mce_builder import ( + make_data_platform_urn, + make_dataplatform_instance_urn, + make_dataset_urn_with_platform_instance, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -25,14 +30,21 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.schema_inference.object import ( SchemaDescription, construct_schema, ) -from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot -from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalHandler, + StaleEntityRemovalSourceReport, + StatefulIngestionConfigBase, + StatefulStaleMetadataRemovalConfig, +) +from datahub.ingestion.source.state.stateful_ingestion_base import ( + StatefulIngestionSourceBase, +) from datahub.metadata.com.linkedin.pegasus2avro.schema import ( ArrayTypeClass, BooleanTypeClass, @@ -48,7 +60,10 @@ TimeTypeClass, UnionTypeClass, ) -from datahub.metadata.schema_classes import DatasetPropertiesClass +from datahub.metadata.schema_classes import ( + DataPlatformInstanceClass, + DatasetPropertiesClass, +) logger = logging.getLogger(__name__) @@ -59,7 +74,9 @@ DENY_DATABASE_LIST = set(["admin", "config", "local"]) -class MongoDBConfig(PlatformInstanceConfigMixin, EnvConfigMixin): +class MongoDBConfig( + PlatformInstanceConfigMixin, EnvConfigMixin, StatefulIngestionConfigBase +): # See the MongoDB authentication docs for details and examples. # https://pymongo.readthedocs.io/en/stable/examples/authentication.html connect_uri: str = Field( @@ -99,6 +116,8 @@ class MongoDBConfig(PlatformInstanceConfigMixin, EnvConfigMixin): default=AllowDenyPattern.allow_all(), description="regex patterns for collections to filter in ingestion.", ) + # Custom Stateful Ingestion settings + stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None @validator("maxDocumentSize") def check_max_doc_size_filter_is_valid(cls, doc_size_filter_value): @@ -108,7 +127,7 @@ def check_max_doc_size_filter_is_valid(cls, doc_size_filter_value): @dataclass -class MongoDBSourceReport(SourceReport): +class MongoDBSourceReport(StaleEntityRemovalSourceReport): filtered: List[str] = field(default_factory=list) def report_dropped(self, name: str) -> None: @@ -129,6 +148,7 @@ def report_dropped(self, name: str) -> None: bson.timestamp.Timestamp: "timestamp", bson.dbref.DBRef: "dbref", bson.objectid.ObjectId: "oid", + bson.Decimal128: "numberDecimal", "mixed": "mixed", } @@ -145,6 +165,7 @@ def report_dropped(self, name: str) -> None: bson.timestamp.Timestamp: TimeTypeClass, bson.dbref.DBRef: BytesTypeClass, bson.objectid.ObjectId: BytesTypeClass, + bson.Decimal128: NumberTypeClass, dict: RecordTypeClass, "mixed": UnionTypeClass, } @@ -206,7 +227,7 @@ def construct_schema_pymongo( @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.SCHEMA_METADATA, "Enabled by default") @dataclass -class MongoDBSource(Source): +class MongoDBSource(StatefulIngestionSourceBase): """ This plugin extracts the following: @@ -227,7 +248,7 @@ class MongoDBSource(Source): mongo_client: MongoClient def __init__(self, ctx: PipelineContext, config: MongoDBConfig): - super().__init__(ctx) + super().__init__(config, ctx) self.config = config self.report = MongoDBSourceReport() @@ -254,6 +275,14 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> "MongoDBSource": config = MongoDBConfig.parse_obj(config_dict) return cls(ctx, config) + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: + return [ + *super().get_workunit_processors(), + StaleEntityRemovalHandler.create( + self, self.config, self.ctx + ).workunit_processor, + ] + def get_pymongo_type_string( self, field_type: Union[Type, str], collection_name: str ) -> str: @@ -332,16 +361,18 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: platform_instance=self.config.platform_instance, ) - dataset_snapshot = DatasetSnapshot( - urn=dataset_urn, - aspects=[], - ) + if self.config.platform_instance: + data_platform_instance = DataPlatformInstanceClass( + platform=make_data_platform_urn(platform), + instance=make_dataplatform_instance_urn( + platform, self.config.platform_instance + ), + ) dataset_properties = DatasetPropertiesClass( tags=[], customProperties={}, ) - dataset_snapshot.aspects.append(dataset_properties) if self.config.enableSchemaInference: assert self.config.maxDocumentSize is not None @@ -412,13 +443,20 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: fields=canonical_schema, ) - dataset_snapshot.aspects.append(schema_metadata) - # TODO: use list_indexes() or index_information() to get index information # See https://pymongo.readthedocs.io/en/stable/api/pymongo/collection.html#pymongo.collection.Collection.list_indexes. - mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot) - yield MetadataWorkUnit(id=dataset_name, mce=mce) + yield from [ + mcp.as_workunit() + for mcp in MetadataChangeProposalWrapper.construct_many( + entityUrn=dataset_urn, + aspects=[ + schema_metadata, + dataset_properties, + data_platform_instance, + ], + ) + ] def is_server_version_gte_4_4(self) -> bool: try: diff --git a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json index e16101b137ac9..ec3fd80e6a6ea 100644 --- a/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json +++ b/metadata-ingestion/tests/integration/mongodb/mongodb_mces_golden.json @@ -1,4136 +1,4240 @@ [ { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "emptyCollection", - "platform": "urn:li:dataPlatform:mongodb", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.Schemaless": {} - }, - "fields": [] - } + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "emptyCollection", + "platform": "urn:li:dataPlatform:mongodb", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.emptyCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "firstCollection", + "platform": "urn:li:dataPlatform:mongodb", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [ + { + "fieldPath": "_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BytesType": {} + } + }, + "nativeDataType": "oid", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "age", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "float", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "canSwim", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "emptyObject", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "OBJECT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteColor", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "OBJECT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.calories", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.emptyObject", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.RecordType": {} + } + }, + "nativeDataType": "OBJECT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.ingredients", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "ARRAY", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.ingredients.color", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.ingredients.from", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.ingredients.name", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "favoriteFood.servings", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "float", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "legs", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "mixedType", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.UnionType": {} + } + }, + "nativeDataType": "mixed", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "seen", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "float", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "servings", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "sometimesNull", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "tags", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "ARRAY", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "type", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "largeCollection", + "platform": "urn:li:dataPlatform:mongodb", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [ + { + "fieldPath": "_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BytesType": {} + } + }, + "nativeDataType": "oid", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_200", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_201", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_202", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_203", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_204", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_205", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_206", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_207", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_208", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_209", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_210", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_211", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_212", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_213", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_214", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_215", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_216", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_217", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_218", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_219", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_220", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_221", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_222", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_223", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_224", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_225", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_226", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_227", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_228", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_229", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_230", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_231", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_232", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_233", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_234", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_235", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_236", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_237", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_238", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_239", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_240", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_241", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_242", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_243", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_244", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_245", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_246", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_247", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_248", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_249", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_250", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_251", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_252", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_253", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_254", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_255", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_256", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_257", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_258", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_259", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_260", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_261", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_262", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_263", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_264", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_265", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_266", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_267", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_268", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_269", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_270", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_271", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_272", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_273", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_274", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_275", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_276", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_277", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_278", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_279", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_280", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_281", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_282", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_283", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_284", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_285", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_286", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_287", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_288", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_289", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_290", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_291", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_292", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_293", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_294", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_295", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_296", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_297", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_298", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_299", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_300", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_301", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_302", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_303", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_304", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_305", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_306", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_307", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_308", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_309", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_310", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_311", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_312", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_313", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_314", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_315", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_316", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_317", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_318", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_319", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_320", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_321", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_322", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_323", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_324", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_325", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_326", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_327", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_328", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_329", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_330", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_331", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_332", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_333", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_334", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_335", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_336", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_337", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_338", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_339", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_340", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_341", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_342", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_343", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_344", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_345", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_346", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_347", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_348", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_349", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_350", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_351", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_352", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_353", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_354", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_355", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_356", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_357", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_358", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_359", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_360", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_361", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_362", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_363", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_364", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_365", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_366", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_367", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_368", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_369", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_370", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_371", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_372", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_374", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_375", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_376", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_377", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_378", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_379", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_380", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_381", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_382", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_383", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_384", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_385", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_386", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_387", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_388", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_389", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_390", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_391", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_392", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_393", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_394", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_395", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_396", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_397", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_398", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_399", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_400", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_401", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_402", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_403", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_404", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_405", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_406", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_407", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_408", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_409", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_410", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_411", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_412", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_413", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_414", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_415", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_416", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_417", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_418", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_419", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_420", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_421", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_422", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_423", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_424", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_425", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_426", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_427", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_428", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_429", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_430", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_431", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_432", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_433", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_434", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_435", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_436", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_437", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_438", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_439", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_440", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_441", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_442", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_443", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_444", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_445", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_446", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_447", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_448", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_449", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_450", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_451", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_452", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_453", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_454", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_455", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_456", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_457", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_458", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_459", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_460", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_461", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_462", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_463", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_464", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_465", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_466", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_467", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_468", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_469", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_470", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_471", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_472", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_473", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_474", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_475", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_476", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_477", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_478", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_479", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_480", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_481", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_482", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_483", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_484", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_485", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_486", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_487", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_488", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_489", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_490", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_491", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_492", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_493", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_494", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_495", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_496", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_497", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_498", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "field_499", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mongodb-test" + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.firstCollection,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "firstCollection", - "platform": "urn:li:dataPlatform:mongodb", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.Schemaless": {} - }, - "fields": [ - { - "fieldPath": "_id", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.BytesType": {} - } - }, - "nativeDataType": "oid", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "age", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "float", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "canSwim", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.BooleanType": {} - } - }, - "nativeDataType": "boolean", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "emptyObject", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.RecordType": {} - } - }, - "nativeDataType": "OBJECT", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteColor", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.RecordType": {} - } - }, - "nativeDataType": "OBJECT", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood.calories", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood.emptyObject", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.RecordType": {} - } - }, - "nativeDataType": "OBJECT", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood.ingredients", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.ArrayType": {} - } - }, - "nativeDataType": "ARRAY", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood.ingredients.color", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood.ingredients.from", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood.ingredients.name", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "favoriteFood.servings", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "float", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "legs", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "mixedType", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.UnionType": {} - } - }, - "nativeDataType": "mixed", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "seen", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "float", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "servings", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "sometimesNull", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "tags", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.ArrayType": {} - } - }, - "nativeDataType": "ARRAY", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "type", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - } - ] - } - } - ] + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "schema.downsampled": "True", + "schema.totalFields": "501" + }, + "tags": [] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mongodb-test" + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "schema.downsampled": "True", - "schema.totalFields": "501" - }, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "largeCollection", - "platform": "urn:li:dataPlatform:mongodb", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.Schemaless": {} - }, - "fields": [ - { - "fieldPath": "_id", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.BytesType": {} - } - }, - "nativeDataType": "oid", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_200", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_201", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_202", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_203", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_204", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_205", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_206", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_207", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_208", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_209", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_210", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_211", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_212", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_213", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_214", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_215", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_216", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_217", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_218", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_219", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_220", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_221", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_222", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_223", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_224", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_225", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_226", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_227", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_228", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_229", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_230", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_231", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_232", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_233", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_234", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_235", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_236", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_237", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_238", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_239", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_240", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_241", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_242", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_243", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_244", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_245", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_246", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_247", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_248", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_249", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_250", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_251", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_252", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_253", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_254", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_255", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_256", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_257", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_258", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_259", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_260", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_261", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_262", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_263", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_264", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_265", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_266", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_267", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_268", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_269", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_270", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_271", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_272", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_273", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_274", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_275", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_276", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_277", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_278", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_279", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_280", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_281", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_282", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_283", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_284", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_285", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_286", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_287", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_288", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_289", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_290", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_291", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_292", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_293", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_294", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_295", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_296", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_297", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_298", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_299", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_300", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_301", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_302", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_303", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_304", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_305", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_306", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_307", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_308", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_309", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_310", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_311", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_312", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_313", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_314", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_315", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_316", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_317", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_318", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_319", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_320", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_321", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_322", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_323", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_324", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_325", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_326", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_327", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_328", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_329", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_330", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_331", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_332", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_333", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_334", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_335", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_336", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_337", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_338", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_339", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_340", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_341", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_342", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_343", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_344", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_345", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_346", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_347", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_348", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_349", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_350", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_351", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_352", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_353", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_354", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_355", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_356", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_357", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_358", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_359", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_360", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_361", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_362", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_363", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_364", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_365", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_366", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_367", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_368", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_369", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_370", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_371", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_372", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_374", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_375", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_376", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_377", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_378", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_379", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_380", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_381", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_382", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_383", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_384", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_385", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_386", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_387", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_388", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_389", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_390", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_391", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_392", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_393", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_394", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_395", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_396", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_397", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_398", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_399", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_400", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_401", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_402", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_403", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_404", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_405", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_406", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_407", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_408", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_409", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_410", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_411", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_412", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_413", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_414", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_415", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_416", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_417", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_418", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_419", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_420", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_421", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_422", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_423", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_424", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_425", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_426", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_427", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_428", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_429", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_430", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_431", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_432", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_433", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_434", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_435", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_436", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_437", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_438", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_439", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_440", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_441", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_442", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_443", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_444", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_445", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_446", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_447", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_448", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_449", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_450", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_451", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_452", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_453", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_454", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_455", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_456", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_457", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_458", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_459", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_460", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_461", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_462", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_463", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_464", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_465", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_466", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_467", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_468", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_469", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_470", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_471", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_472", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_473", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_474", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_475", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_476", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_477", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_478", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_479", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_480", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_481", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_482", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_483", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_484", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_485", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_486", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_487", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_488", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_489", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_490", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_491", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_492", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_493", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_494", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_495", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_496", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_497", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_498", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "field_499", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - } - ] - } - } - ] + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.largeCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mongodb-test" + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "secondCollection", - "platform": "urn:li:dataPlatform:mongodb", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.Schemaless": {} - }, - "fields": [ - { - "fieldPath": "_id", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.BytesType": {} - } - }, - "nativeDataType": "oid", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "mixedType", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.UnionType": {} - } - }, - "nativeDataType": "mixed", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "mixedType.fieldA", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "mixedType.fieldTwo", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "nullableMixedType", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.UnionType": {} - } - }, - "nativeDataType": "mixed", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "rating", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "float", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "tasty", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.BooleanType": {} - } - }, - "nativeDataType": "boolean", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "varieties", - "nullable": true, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.ArrayType": {} - } - }, - "nativeDataType": "ARRAY", - "recursive": false, - "isPartOfKey": false - } - ] - } + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "secondCollection", + "platform": "urn:li:dataPlatform:mongodb", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [ + { + "fieldPath": "_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BytesType": {} + } + }, + "nativeDataType": "oid", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "mixedType", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.UnionType": {} + } + }, + "nativeDataType": "mixed", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "mixedType.fieldA", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "mixedType.fieldTwo", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "name", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "nullableMixedType", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.UnionType": {} + } + }, + "nativeDataType": "mixed", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rating", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "float", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "tasty", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "varieties", + "nullable": true, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "ARRAY", + "recursive": false, + "isPartOfKey": false } ] } }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mongodb-test" + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": {}, + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mongodb,instance.mngdb.secondCollection,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mongodb", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:mongodb,instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4145,7 +4249,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mongodb-test" + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4160,7 +4265,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mongodb-test" + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4175,7 +4281,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mongodb-test" + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" } }, { @@ -4190,7 +4297,8 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "mongodb-test" + "runId": "mongodb-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file From f2eb0cf3073967d505004e9f2df58d5475cadec1 Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Wed, 1 Nov 2023 15:41:02 -0400 Subject: [PATCH 70/80] smoke-test: API test for managing secrets privilege (#9121) --- .../tests/privileges/test_privileges.py | 241 ++++++++++++++++++ smoke-test/tests/privileges/utils.py | 218 ++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 smoke-test/tests/privileges/test_privileges.py create mode 100644 smoke-test/tests/privileges/utils.py diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py new file mode 100644 index 0000000000000..13d6b6cf3415a --- /dev/null +++ b/smoke-test/tests/privileges/test_privileges.py @@ -0,0 +1,241 @@ +import pytest +import tenacity + +from tests.utils import (get_frontend_session, wait_for_writes_to_sync, wait_for_healthcheck_util, + get_frontend_url, get_admin_credentials,get_sleep_info) +from tests.privileges.utils import * + +sleep_sec, sleep_times = get_sleep_info() + +@pytest.fixture(scope="session") +def wait_for_healthchecks(): + wait_for_healthcheck_util() + yield + + +@pytest.mark.dependency() +def test_healthchecks(wait_for_healthchecks): + # Call to wait_for_healthchecks fixture will do the actual functionality. + pass + + +@pytest.fixture(scope="session") +def admin_session(wait_for_healthchecks): + yield get_frontend_session() + + +@pytest.mark.dependency(depends=["test_healthchecks"]) +@pytest.fixture(scope="module", autouse=True) +def privileges_and_test_user_setup(admin_session): + """Fixture to execute setup before and tear down after all tests are run""" + # Disable 'All users' privileges + set_base_platform_privileges_policy_status("INACTIVE", admin_session) + set_view_dataset_sensitive_info_policy_status("INACTIVE", admin_session) + set_view_entity_profile_privileges_policy_status("INACTIVE", admin_session) + # Sleep for eventual consistency + wait_for_writes_to_sync() + + # Create a new user + admin_session = create_user(admin_session, "user", "user") + + yield + + # Remove test user + remove_user(admin_session, "urn:li:corpuser:user") + + # Restore All users privileges + set_base_platform_privileges_policy_status("ACTIVE", admin_session) + set_view_dataset_sensitive_info_policy_status("ACTIVE", admin_session) + set_view_entity_profile_privileges_policy_status("ACTIVE", admin_session) + + # Sleep for eventual consistency + wait_for_writes_to_sync() + + +@tenacity.retry( + stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec) +) +def _ensure_can_create_secret(session, json, urn): + create_secret_success = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=json) + create_secret_success.raise_for_status() + secret_data = create_secret_success.json() + + assert secret_data + assert secret_data["data"] + assert secret_data["data"]["createSecret"] + assert secret_data["data"]["createSecret"] == urn + + +@tenacity.retry( + stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) +) +def _ensure_cant_create_secret(session, json): + create_secret_response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=json) + create_secret_response.raise_for_status() + create_secret_data = create_secret_response.json() + + assert create_secret_data["errors"][0]["extensions"]["code"] == 403 + assert create_secret_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED" + assert create_secret_data["data"]["createSecret"] == None + + +@tenacity.retry( + stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec) +) +def _ensure_can_create_ingestion_source(session, json): + create_ingestion_success = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=json) + create_ingestion_success.raise_for_status() + ingestion_data = create_ingestion_success.json() + + assert ingestion_data + assert ingestion_data["data"] + assert ingestion_data["data"]["createIngestionSource"] + assert ingestion_data["data"]["createIngestionSource"] is not None + + return ingestion_data["data"]["createIngestionSource"] + + +@tenacity.retry( + stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) +) +def _ensure_cant_create_ingestion_source(session, json): + create_source_response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=json) + create_source_response.raise_for_status() + create_source_data = create_source_response.json() + + assert create_source_data["errors"][0]["extensions"]["code"] == 403 + assert create_source_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED" + assert create_source_data["data"]["createIngestionSource"] == None + + +@pytest.mark.dependency(depends=["test_healthchecks"]) +def test_privilege_to_create_and_manage_secrets(): + + (admin_user, admin_pass) = get_admin_credentials() + admin_session = login_as(admin_user, admin_pass) + user_session = login_as("user", "user") + secret_urn = "urn:li:dataHubSecret:TestSecretName" + + # Verify new user can't create secrets + create_secret = { + "query": """mutation createSecret($input: CreateSecretInput!) {\n + createSecret(input: $input)\n}""", + "variables": { + "input":{ + "name":"TestSecretName", + "value":"Test Secret Value", + "description":"Test Secret Description" + } + }, + } + _ensure_cant_create_secret(user_session, create_secret) + + + # Assign privileges to the new user to manage secrets + policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_SECRETS"], admin_session) + + # Verify new user can create and manage secrets + # Create a secret + _ensure_can_create_secret(user_session, create_secret, secret_urn) + + + # Remove a secret + remove_secret = { + "query": """mutation deleteSecret($urn: String!) {\n + deleteSecret(urn: $urn)\n}""", + "variables": { + "urn": secret_urn + }, + } + + remove_secret_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_secret) + remove_secret_response.raise_for_status() + secret_data = remove_secret_response.json() + + assert secret_data + assert secret_data["data"] + assert secret_data["data"]["deleteSecret"] + assert secret_data["data"]["deleteSecret"] == secret_urn + + + # Remove the policy + remove_policy(policy_urn, admin_session) + + # Ensure user can't create secret after policy is removed + _ensure_cant_create_secret(user_session, create_secret) + + +@pytest.mark.dependency(depends=["test_healthchecks"]) +def test_privilege_to_create_and_manage_ingestion_source(): + + (admin_user, admin_pass) = get_admin_credentials() + admin_session = login_as(admin_user, admin_pass) + user_session = login_as("user", "user") + + # Verify new user can't create ingestion source + create_ingestion_source = { + "query": """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n + createIngestionSource(input: $input)\n}""", + "variables": {"input":{"type":"snowflake","name":"test","config": + {"recipe": + "{\"source\":{\"type\":\"snowflake\",\"config\":{\"account_id\":null,\"include_table_lineage\":true,\"include_view_lineage\":true,\"include_tables\":true,\"include_views\":true,\"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},\"stateful_ingestion\":{\"enabled\":true}}}}", + "executorId":"default","debugMode":False,"extraArgs":[]}}}, + } + + _ensure_cant_create_ingestion_source(user_session, create_ingestion_source) + + + # Assign privileges to the new user to manage ingestion source + policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_INGESTION"], admin_session) + + # Verify new user can create and manage ingestion source(edit, delete) + ingestion_source_urn = _ensure_can_create_ingestion_source(user_session, create_ingestion_source) + + # Edit ingestion source + update_ingestion_source = { + "query": """mutation updateIngestionSource($urn: String!, $input: UpdateIngestionSourceInput!) {\n + updateIngestionSource(urn: $urn, input: $input)\n}""", + "variables": {"urn":ingestion_source_urn, + "input":{"type":"snowflake","name":"test updated", + "config":{"recipe":"{\"source\":{\"type\":\"snowflake\",\"config\":{\"account_id\":null,\"include_table_lineage\":true,\"include_view_lineage\":true,\"include_tables\":true,\"include_views\":true,\"profiling\":{\"enabled\":true,\"profile_table_level_only\":true},\"stateful_ingestion\":{\"enabled\":true}}}}", + "executorId":"default","debugMode":False,"extraArgs":[]}}} + } + + update_ingestion_success = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=update_ingestion_source) + update_ingestion_success.raise_for_status() + ingestion_data = update_ingestion_success.json() + + assert ingestion_data + assert ingestion_data["data"] + assert ingestion_data["data"]["updateIngestionSource"] + assert ingestion_data["data"]["updateIngestionSource"] == ingestion_source_urn + + + # Delete ingestion source + remove_ingestion_source = { + "query": """mutation deleteIngestionSource($urn: String!) {\n + deleteIngestionSource(urn: $urn)\n}""", + "variables": { + "urn": ingestion_source_urn + }, + } + + remove_ingestion_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_ingestion_source) + remove_ingestion_response.raise_for_status() + ingestion_data = remove_ingestion_response.json() + + assert ingestion_data + assert ingestion_data["data"] + assert ingestion_data["data"]["deleteIngestionSource"] + assert ingestion_data["data"]["deleteIngestionSource"] == ingestion_source_urn + + # Remove the policy + remove_policy(policy_urn, admin_session) + + # Ensure that user can't create ingestion source after policy is removed + _ensure_cant_create_ingestion_source(user_session, create_ingestion_source) \ No newline at end of file diff --git a/smoke-test/tests/privileges/utils.py b/smoke-test/tests/privileges/utils.py new file mode 100644 index 0000000000000..ea1f565f6f5ac --- /dev/null +++ b/smoke-test/tests/privileges/utils.py @@ -0,0 +1,218 @@ +import requests_wrapper as requests +from tests.consistency_utils import wait_for_writes_to_sync +from tests.utils import (get_frontend_url, wait_for_writes_to_sync, get_admin_credentials) + + +def set_base_platform_privileges_policy_status(status, session): + base_platform_privileges = { + "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n + updatePolicy(urn: $urn, input: $input) }""", + "variables": { + "urn": "urn:li:dataHubPolicy:7", + "input": { + "type": "PLATFORM", + "state": status, + "name": "All Users - Base Platform Privileges", + "description": "Grants base platform privileges to ALL users of DataHub. Change this policy to alter that behavior.", + "privileges": ["MANAGE_INGESTION", + "MANAGE_SECRETS", + "MANAGE_USERS_AND_GROUPS", + "VIEW_ANALYTICS", + "GENERATE_PERSONAL_ACCESS_TOKENS", + "MANAGE_DOMAINS", + "MANAGE_GLOBAL_ANNOUNCEMENTS", + "MANAGE_TESTS", + "MANAGE_GLOSSARIES", + "MANAGE_TAGS", + "MANAGE_GLOBAL_VIEWS", + "MANAGE_GLOBAL_OWNERSHIP_TYPES"], + "actors": { + "users": [], + "groups": None, + "resourceOwners": False, + "allUsers": True, + "allGroups": False, + "resourceOwnersTypes": None, + }, + }, + }, + } + base_privileges_response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=base_platform_privileges) + base_privileges_response.raise_for_status() + base_res_data = base_privileges_response.json() + assert base_res_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:7" + +def set_view_dataset_sensitive_info_policy_status(status, session): + dataset_sensitive_information = { + "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n + updatePolicy(urn: $urn, input: $input) }""", + "variables": { + "urn": "urn:li:dataHubPolicy:view-dataset-sensitive", + "input": { + "type": "METADATA", + "state": status, + "name": "All Users - View Dataset Sensitive Information", + "description": "Grants viewing privileges of usage and profile information of all datasets for all users", + "privileges": ["VIEW_DATASET_USAGE","VIEW_DATASET_PROFILE"], + "actors": { + "users": [], + "groups": None, + "resourceOwners": False, + "allUsers": True, + "allGroups": False, + "resourceOwnersTypes": None, + }, + }, + }, + } + sensitive_info_response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=dataset_sensitive_information) + sensitive_info_response.raise_for_status() + sens_info_data = sensitive_info_response.json() + assert sens_info_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:view-dataset-sensitive" + +def set_view_entity_profile_privileges_policy_status(status, session): + view_entity_page = { + "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n + updatePolicy(urn: $urn, input: $input) }""", + "variables": { + "urn": "urn:li:dataHubPolicy:view-entity-page-all", + "input": { + "type": "METADATA", + "state": status, + "name": "All Users - View Entity Page", + "description": "Grants entity view to all users", + "privileges": ["VIEW_ENTITY_PAGE", + "SEARCH_PRIVILEGE", + "GET_COUNTS_PRIVILEGE", + "GET_TIMESERIES_ASPECT_PRIVILEGE", + "GET_ENTITY_PRIVILEGE", + "GET_TIMELINE_PRIVILEGE"], + "actors": { + "users": [], + "groups": None, + "resourceOwners": False, + "allUsers": True, + "allGroups": False, + "resourceOwnersTypes": None, + }, + }, + }, + } + view_entity_response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=view_entity_page) + view_entity_response.raise_for_status() + view_entity_data = view_entity_response.json() + assert view_entity_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:view-entity-page-all" + +def create_user(session, email, password): + # Remove user if exists + res_data = remove_user(session, f"urn:li:corpuser:{email}") + assert res_data + assert "error" not in res_data + # Get the invite token + get_invite_token_json = { + "query": """query getInviteToken($input: GetInviteTokenInput!) {\n + getInviteToken(input: $input){\n + inviteToken\n + }\n + }""", + "variables": {"input": {}}, + } + get_invite_token_response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json + ) + get_invite_token_response.raise_for_status() + get_invite_token_res_data = get_invite_token_response.json() + invite_token = get_invite_token_res_data["data"]["getInviteToken"]["inviteToken"] + assert invite_token is not None + assert "error" not in invite_token + # Create a new user using the invite token + sign_up_json = { + "fullName": "Test User", + "email": email, + "password": password, + "title": "Data Engineer", + "inviteToken": invite_token, + } + sign_up_response = session.post( + f"{get_frontend_url()}/signUp", json=sign_up_json + ) + sign_up_response.raise_for_status() + assert sign_up_response + assert "error" not in sign_up_response + wait_for_writes_to_sync() + session.cookies.clear() + (admin_user, admin_pass) = get_admin_credentials() + admin_session = login_as(admin_user, admin_pass) + return admin_session + + +def login_as(username, password): + session = requests.Session() + headers = { + "Content-Type": "application/json", + } + data = '{"username":"' + username + '", "password":"' + password + '"}' + response = session.post(f"{get_frontend_url()}/logIn", headers=headers, data=data) + response.raise_for_status() + return session + +def remove_user(session, urn): + json = { + "query": """mutation removeUser($urn: String!) {\n + removeUser(urn: $urn) + }""", + "variables": {"urn": urn}, + } + response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response.raise_for_status() + return response.json() + +def create_user_policy(user_urn, privileges, session): + policy = { + "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n + createPolicy(input: $input) }""", + "variables": { + "input": { + "type": "PLATFORM", + "name": "Policy Name", + "description": "Policy Description", + "state": "ACTIVE", + "resources": {"filter":{"criteria":[]}}, + "privileges": privileges, + "actors": { + "users": [user_urn], + "resourceOwners": False, + "allUsers": False, + "allGroups": False, + }, + } + }, + } + + response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=policy) + response.raise_for_status() + res_data = response.json() + + assert res_data + assert res_data["data"] + assert res_data["data"]["createPolicy"] + return res_data["data"]["createPolicy"] + +def remove_policy(urn, session): + remove_policy_json = { + "query": """mutation deletePolicy($urn: String!) {\n + deletePolicy(urn: $urn) }""", + "variables": {"urn": urn}, + } + + response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_policy_json) + response.raise_for_status() + res_data = response.json() + + assert res_data + assert res_data["data"] + assert res_data["data"]["deletePolicy"] + assert res_data["data"]["deletePolicy"] == urn \ No newline at end of file From 95d9ff2cc2b71c5062454f6da1eca5084d6dd6eb Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 2 Nov 2023 01:44:16 +0530 Subject: [PATCH 71/80] fix(ingest): handle exceptions in min, max, mean profiling (#9129) --- .../ingestion/source/ge_data_profiler.py | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 9f6ac9dd21164..6b97d2eb456da 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -406,22 +406,52 @@ def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None: def _get_dataset_column_min( self, column_profile: DatasetFieldProfileClass, column: str ) -> None: - if self.config.include_field_min_value: + if not self.config.include_field_min_value: + return + try: column_profile.min = str(self.dataset.get_column_min(column)) + except Exception as e: + logger.debug( + f"Caught exception while attempting to get column min for column {column}. {e}" + ) + self.report.report_warning( + "Profiling - Unable to get column min", + f"{self.dataset_name}.{column}", + ) @_run_with_query_combiner def _get_dataset_column_max( self, column_profile: DatasetFieldProfileClass, column: str ) -> None: - if self.config.include_field_max_value: + if not self.config.include_field_max_value: + return + try: column_profile.max = str(self.dataset.get_column_max(column)) + except Exception as e: + logger.debug( + f"Caught exception while attempting to get column max for column {column}. {e}" + ) + self.report.report_warning( + "Profiling - Unable to get column max", + f"{self.dataset_name}.{column}", + ) @_run_with_query_combiner def _get_dataset_column_mean( self, column_profile: DatasetFieldProfileClass, column: str ) -> None: - if self.config.include_field_mean_value: + if not self.config.include_field_mean_value: + return + try: column_profile.mean = str(self.dataset.get_column_mean(column)) + except Exception as e: + logger.debug( + f"Caught exception while attempting to get column mean for column {column}. {e}" + ) + self.report.report_warning( + "Profiling - Unable to get column mean", + f"{self.dataset_name}.{column}", + ) @_run_with_query_combiner def _get_dataset_column_median( From 932eebea353cf6f31bc489428feb54b43d647075 Mon Sep 17 00:00:00 2001 From: kushagra-apptware <81357546+kushagra-apptware@users.noreply.github.com> Date: Thu, 2 Nov 2023 01:50:15 +0530 Subject: [PATCH 72/80] feat: rename Assets tab to Owner Of (#9141) Co-authored-by: John Joyce --- datahub-web-react/src/app/entity/group/GroupProfile.tsx | 2 +- datahub-web-react/src/app/entity/user/UserProfile.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datahub-web-react/src/app/entity/group/GroupProfile.tsx b/datahub-web-react/src/app/entity/group/GroupProfile.tsx index 53d2062277dec..11ed31e00003f 100644 --- a/datahub-web-react/src/app/entity/group/GroupProfile.tsx +++ b/datahub-web-react/src/app/entity/group/GroupProfile.tsx @@ -16,7 +16,7 @@ import NonExistentEntityPage from '../shared/entity/NonExistentEntityPage'; const messageStyle = { marginTop: '10%' }; export enum TabType { - Assets = 'Assets', + Assets = 'Owner Of', Members = 'Members', } diff --git a/datahub-web-react/src/app/entity/user/UserProfile.tsx b/datahub-web-react/src/app/entity/user/UserProfile.tsx index 1d20072c4ea8f..e8284ba61afe4 100644 --- a/datahub-web-react/src/app/entity/user/UserProfile.tsx +++ b/datahub-web-react/src/app/entity/user/UserProfile.tsx @@ -17,7 +17,7 @@ export interface Props { } export enum TabType { - Assets = 'Assets', + Assets = 'Owner Of', Groups = 'Groups', } const ENABLED_TAB_TYPES = [TabType.Assets, TabType.Groups]; From 50789224a12e0f48d6b4ca2ef3876498f7738d9e Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 1 Nov 2023 16:58:37 -0700 Subject: [PATCH 73/80] fix(ingest/mongodb): fix schema inference for lists of values (#9145) --- .../datahub/ingestion/source/schema_inference/object.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py index 5797d66aa4d19..b58bdf41ccaa5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py @@ -16,7 +16,7 @@ class SchemaDescription(BasicSchemaDescription): nullable: bool # if field is ever missing -def is_field_nullable(doc: Dict[str, Any], field_path: Tuple) -> bool: +def is_field_nullable(doc: Dict[str, Any], field_path: Tuple[str, ...]) -> bool: """ Check if a nested field is nullable in a document from a collection. @@ -54,7 +54,10 @@ def is_field_nullable(doc: Dict[str, Any], field_path: Tuple) -> bool: # count empty lists of nested objects as nullable if len(value) == 0: return True - return any(is_field_nullable(x, remaining_fields) for x in doc[field]) + return any( + isinstance(x, dict) and is_field_nullable(x, remaining_fields) + for x in doc[field] + ) # any other types to check? # raise ValueError("Nested type not 'list' or 'dict' encountered") From f7cd80283ad768afe14e3cf53b9c38fe912be570 Mon Sep 17 00:00:00 2001 From: deepgarg-visa <149145061+deepgarg-visa@users.noreply.github.com> Date: Thu, 2 Nov 2023 09:16:58 +0530 Subject: [PATCH 74/80] fix(ingest/db2): fix handling for table properties (#9128) Co-authored-by: Harshal Sheth --- .../src/datahub/ingestion/source/sql/sql_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 51909eaf4ed55..80f828e9ea2fd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -20,6 +20,7 @@ import sqlalchemy.dialects.postgresql.base from sqlalchemy import create_engine, inspect from sqlalchemy.engine.reflection import Inspector +from sqlalchemy.engine.row import LegacyRow from sqlalchemy.exc import ProgrammingError from sqlalchemy.sql import sqltypes as types from sqlalchemy.types import TypeDecorator, TypeEngine @@ -784,7 +785,7 @@ def get_table_properties( table_info: dict = inspector.get_table_comment(table, f'"{schema}"') # type: ignore description = table_info.get("text") - if type(description) is tuple: + if isinstance(description, LegacyRow): # Handling for value type tuple which is coming for dialect 'db2+ibm_db' description = table_info["text"][0] From bab9d1c93196b7a7181525609e854640c9a13712 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 1 Nov 2023 20:47:18 -0700 Subject: [PATCH 75/80] fix(ingest): fully support MCPs in urn_iter primitive (#9157) --- .../src/datahub/utilities/urns/urn_iter.py | 16 +++++++------- .../tests/unit/serde/test_urn_iterator.py | 21 +++++++++++++------ 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py index e13d439161064..169a4ac3649a3 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py +++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py @@ -117,17 +117,17 @@ def _modify_at_path( if isinstance(path[0], int): assert isinstance(model, list) model[path[0]] = new_value - elif isinstance(model, MetadataChangeProposalWrapper): - setattr(model, path[0], new_value) - else: - assert isinstance(model, DictWrapper) + elif isinstance(model, DictWrapper): model._inner_dict[path[0]] = new_value + else: # MCPW + setattr(model, path[0], new_value) elif isinstance(path[0], int): assert isinstance(model, list) - return _modify_at_path(model[path[0]], path[1:], new_value) - else: - assert isinstance(model, DictWrapper) - return _modify_at_path(model._inner_dict[path[0]], path[1:], new_value) + _modify_at_path(model[path[0]], path[1:], new_value) + elif isinstance(model, DictWrapper): + _modify_at_path(model._inner_dict[path[0]], path[1:], new_value) + else: # MCPW + _modify_at_path(getattr(model, path[0]), path[1:], new_value) def _lowercase_dataset_urn(dataset_urn: str) -> str: diff --git a/metadata-ingestion/tests/unit/serde/test_urn_iterator.py b/metadata-ingestion/tests/unit/serde/test_urn_iterator.py index 9657ac45068da..135580dcdff13 100644 --- a/metadata-ingestion/tests/unit/serde/test_urn_iterator.py +++ b/metadata-ingestion/tests/unit/serde/test_urn_iterator.py @@ -1,4 +1,5 @@ import datahub.emitter.mce_builder as builder +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageTypeClass, FineGrainedLineage, @@ -10,11 +11,11 @@ from datahub.utilities.urns.urn_iter import list_urns_with_path, lowercase_dataset_urns -def _datasetUrn(tbl): +def _datasetUrn(tbl: str) -> str: return builder.make_dataset_urn("bigquery", tbl, "PROD") -def _fldUrn(tbl, fld): +def _fldUrn(tbl: str, fld: str) -> str: return builder.make_schema_field_urn(_datasetUrn(tbl), fld) @@ -114,8 +115,10 @@ def test_upstream_lineage_urn_iterator(): ] -def _make_test_lineage_obj(upstream: str, downstream: str) -> UpstreamLineage: - return UpstreamLineage( +def _make_test_lineage_obj( + table: str, upstream: str, downstream: str +) -> MetadataChangeProposalWrapper: + lineage = UpstreamLineage( upstreams=[ Upstream( dataset=_datasetUrn(upstream), @@ -132,11 +135,17 @@ def _make_test_lineage_obj(upstream: str, downstream: str) -> UpstreamLineage: ], ) + return MetadataChangeProposalWrapper(entityUrn=_datasetUrn(table), aspect=lineage) + def test_dataset_urn_lowercase_transformer(): - original = _make_test_lineage_obj("upstreamTable", "downstreamTable") + original = _make_test_lineage_obj( + "mainTableName", "upstreamTable", "downstreamTable" + ) - expected = _make_test_lineage_obj("upstreamtable", "downstreamtable") + expected = _make_test_lineage_obj( + "maintablename", "upstreamtable", "downstreamtable" + ) assert original != expected # sanity check From 12b41713b46ab474f0d55ea81fe0e854526036a9 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 2 Nov 2023 10:05:24 +0530 Subject: [PATCH 76/80] =?UTF-8?q?fix(ingest/bigquery):=20use=20correct=20r?= =?UTF-8?q?ow=20count=20in=20null=20count=20profiling=20c=E2=80=A6=20(#912?= =?UTF-8?q?3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Harshal Sheth Co-authored-by: Aseem Bansal --- .../datahub/ingestion/source/ge_data_profiler.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 6b97d2eb456da..c334a97680e3e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -659,7 +659,16 @@ def generate_dataset_profile( # noqa: C901 (complexity) self.query_combiner.flush() assert profile.rowCount is not None - row_count: int = profile.rowCount + row_count: int # used for null counts calculation + if profile.partitionSpec and "SAMPLE" in profile.partitionSpec.partition: + # We can alternatively use `self._get_dataset_rows(profile)` to get + # exact count of rows in sample, as actual rows involved in sample + # may be slightly different (more or less) than configured `sample_size`. + # However not doing so to start with, as that adds another query overhead + # plus approximate metrics should work for sampling based profiling. + row_count = self.config.sample_size + else: + row_count = profile.rowCount for column_spec in columns_profiling_queue: column = column_spec.column @@ -811,7 +820,7 @@ def update_dataset_batch_use_sampling(self, profile: DatasetProfileClass) -> Non sample_pc = 100 * self.config.sample_size / profile.rowCount sql = ( f"SELECT * FROM {str(self.dataset._table)} " - + f"TABLESAMPLE SYSTEM ({sample_pc:.3f} percent)" + + f"TABLESAMPLE SYSTEM ({sample_pc:.8f} percent)" ) temp_table_name = create_bigquery_temp_table( self, From 11d8988868357b956e7b6ccfa905689d6163f814 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Thu, 2 Nov 2023 17:46:27 +0900 Subject: [PATCH 77/80] docs: add feature guides for subscriptions and notifications (#9122) --- docs-website/sidebars.js | 5 + docs/managed-datahub/saas-slack-setup.md | 4 +- .../subscription-and-notification.md | 130 ++++++++++++++++++ 3 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 docs/managed-datahub/subscription-and-notification.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 39eaea57444ed..ab4c1311d5fc7 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -625,6 +625,11 @@ module.exports = { type: "doc", id: "docs/managed-datahub/chrome-extension", }, + { + type: "doc", + id: "docs/managed-datahub/subscription-and-notification", + className: "saasOnly", + }, { "Managed DataHub Release History": [ "docs/managed-datahub/release-notes/v_0_2_12", diff --git a/docs/managed-datahub/saas-slack-setup.md b/docs/managed-datahub/saas-slack-setup.md index 68f947f171715..8d4519b878cd8 100644 --- a/docs/managed-datahub/saas-slack-setup.md +++ b/docs/managed-datahub/saas-slack-setup.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# Configure Slack Notifications +# Configure Slack For Notifications @@ -108,4 +108,4 @@ For now we support sending notifications to - Click on “More” - Click on “Copy member ID” -![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_user_id.png) +![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_user_id.png) \ No newline at end of file diff --git a/docs/managed-datahub/subscription-and-notification.md b/docs/managed-datahub/subscription-and-notification.md new file mode 100644 index 0000000000000..b30a03de16511 --- /dev/null +++ b/docs/managed-datahub/subscription-and-notification.md @@ -0,0 +1,130 @@ +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + +# Subscriptions & Notifications + + + +DataHub's Subscriptions and Notifications feature gives you real-time change alerts on data assets of your choice. +With this feature, you can set up subscriptions to specific changes for an Entity – and DataHub will notify you when those changes happen. Currently, DataHub supports notifications on Slack, with support for Microsoft Teams and email subscriptions forthcoming. + +

+ +

+ +This feature is especially useful in helping you stay on top of any upstream changes that could impact the assets you or your stakeholders rely on. It eliminates the need for you and your team to manually check for upstream changes, or for upstream stakeholders to identify and notify impacted users. +As a user, you can subscribe to and receive notifications about changes such as deprecations, schema changes, changes in ownership, assertions, or incidents. You’ll always been in the know about potential data quality issues so you can proactively manage your data resources. + +## Prerequisites + +Once you have [configured Slack within your DataHub instance](saas-slack-setup.md), you will be able to subscribe to any Entity in DataHub and begin recieving notifications via DM. +If you want to create and manage group-level Subscriptions for your team, you will need [the following privileges](../../docs/authorization/roles.md#role-privileges): + +- Manage Group Notification Settings +- Manage Group Subscriptions + +## Using DataHub’s Subscriptions and Notifications Feature + +The first step is identifying the assets you want to subscribe to. +DataHub’s [Lineage and Impact Analysis features](../../docs/act-on-metadata/impact-analysis.md#lineage-impact-analysis-setup-prerequisites-and-permissions) can help you identify upstream entities that could impact the assets you use and are responsible for. +You can use the Subscriptions and Notifications feature to sign up for updates for your entire team, or just for yourself. + +### Subscribing Your Team/Group to Notifications + +The dropdown menu next to the Subscribe button lets you choose who the subscription is for. To create a group subscription, click on Manage Group Subscriptions. + +

+ +

+ +Next, customize the group’s subscriptions by selecting the types of changes you want the group to be notified about. + +

+ +

+ +Connect to Slack. Currently, Acryl's Subscriptions and Notifications feature integrates only with Slack. Add your group’s Slack Channel ID to receive notifications on Slack. +(You can find your Channel ID in the About section of your channel on Slack.) + +

+ +

+ +### Individually Subscribing to an Entity + +Select the **Subscribe Me** option in the Subscriptions dropdown menu. + +

+ +

+ +Pick the updates you want to be notified about, and connect your Slack account by using your Slack Member ID. + +

+ +

+ +:::note +You can find your Slack Member ID in your profile settings. + +

+ +

+::: + +### Managing Your Subscriptions + +You can enable, disable, or manage notifications at any time to ensure that you receive relevant updates. + +Simply use the Dropdown menu next to the Subscribe button to unsubscribe from the asset, or to manage/modify your subscription (say, to modify the changes you want to be updated about). + +

+ +

+ +You can also view and manage your subscriptions in your DataHub settings page. + +

+ +

+ +You can view and manage the group’s subscriptions on the group’s page on DataHub. + +

+ +

+ +## FAQ + +
+ +What changes can I be notified about using this feature? + +You can subscribe to deprecations, Assertion status changes, Incident status changes, Schema changes, Ownership changes, Glossary Term changes, and Tag changes. +

+ +

+
+ +
+ +What if I no longer want to receive updates about a data asset? + +You can unsubscribe from any asset to stop receiving notifications about it. On the asset’s DataHub page, simply use the dropdown menu next to the Subscribe button to unsubscribe from the asset. + +

+ +

+
+ +
+ +What if I want to be notified about different changes? + +To modify your subscription, use the dropdown menu next to the Subscribe button to modify the changes you want to be notified about. +
+ +## Reference + +- [DataHub Blog - Simplifying Data Monitoring & Management with Subscriptions and Notifications with Acryl DataHub](https://www.acryldata.io/blog/simplifying-data-monitoring-and-management-with-subscriptions-and-notifications-with-acryl-datahub) +- Video Guide - Getting Started with Subscription & Notifications + From f42cb95b928c071b8309cf7c3e9a0fe8b41d3a90 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Thu, 2 Nov 2023 17:46:49 +0900 Subject: [PATCH 78/80] docs: unify oidc guides using tabs (#9068) Co-authored-by: Harshal Sheth --- docs-website/sidebars.js | 11 +- .../guides/sso/configure-oidc-behind-proxy.md | 18 +- .../guides/sso/configure-oidc-react-azure.md | 127 ------- .../guides/sso/configure-oidc-react-google.md | 118 ------ .../guides/sso/configure-oidc-react-okta.md | 124 ------ .../guides/sso/configure-oidc-react.md | 355 +++++++++++++----- 6 files changed, 263 insertions(+), 490 deletions(-) delete mode 100644 docs/authentication/guides/sso/configure-oidc-react-azure.md delete mode 100644 docs/authentication/guides/sso/configure-oidc-react-google.md delete mode 100644 docs/authentication/guides/sso/configure-oidc-react-okta.md diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index ab4c1311d5fc7..9cc035f3e29e0 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -171,15 +171,8 @@ module.exports = { { "Frontend Authentication": [ "docs/authentication/guides/jaas", - { - "OIDC Authentication": [ - "docs/authentication/guides/sso/configure-oidc-react", - "docs/authentication/guides/sso/configure-oidc-react-google", - "docs/authentication/guides/sso/configure-oidc-react-okta", - "docs/authentication/guides/sso/configure-oidc-react-azure", - "docs/authentication/guides/sso/configure-oidc-behind-proxy", - ], - }, + "docs/authentication/guides/sso/configure-oidc-react", + "docs/authentication/guides/sso/configure-oidc-behind-proxy", ], }, "docs/authentication/introducing-metadata-service-authentication", diff --git a/docs/authentication/guides/sso/configure-oidc-behind-proxy.md b/docs/authentication/guides/sso/configure-oidc-behind-proxy.md index c998816e04735..684bf768f2baf 100644 --- a/docs/authentication/guides/sso/configure-oidc-behind-proxy.md +++ b/docs/authentication/guides/sso/configure-oidc-behind-proxy.md @@ -1,8 +1,9 @@ -# Configuring Frontend to use a Proxy when communicating with SSO Provider -*Authored on 22/08/2023* +# OIDC Proxy Configuration -The `datahub-frontend-react` server can be configured to use an http proxy when retrieving the openid-configuration. -This can be needed if your infrastructure is locked down and disallows connectivity by default, using proxies for fine-grained egress control. +_Authored on 22/08/2023_ + +The `datahub-frontend-react` server can be configured to use an http proxy when retrieving the openid-configuration. +This can be needed if your infrastructure is locked down and disallows connectivity by default, using proxies for fine-grained egress control. ## Configure http proxy and non proxy hosts @@ -17,7 +18,8 @@ HTTP_NON_PROXY_HOSTS=localhost|datahub-gms (or any other hosts that you would li ``` ## Optional: provide custom truststore -If your upstream proxy performs SSL termination to inspect traffic, this will result in different (self-signed) certificates for HTTPS connections. + +If your upstream proxy performs SSL termination to inspect traffic, this will result in different (self-signed) certificates for HTTPS connections. The default truststore used in the `datahub-frontend-react` docker image will not trust these kinds of connections. To address this, you can copy or mount your own truststore (provided by the proxy or network administrators) into the docker container. @@ -36,8 +38,8 @@ FROM linkedin/datahub-frontend-react: COPY /truststore-directory /certificates ``` -Building this Dockerfile will result in your own custom docker image on your local machine. -You will then be able to tag it, publish it to your own registry, etc. +Building this Dockerfile will result in your own custom docker image on your local machine. +You will then be able to tag it, publish it to your own registry, etc. #### Option b) Mount truststore from your host machine using a docker volume @@ -51,7 +53,7 @@ Adapt your docker-compose.yml to include a new volume mount in the `datahub-fron - /truststore-directory:/certificates ``` -### Reference new truststore +### Reference new truststore Add the following environment values to the `datahub-frontend-react` container: diff --git a/docs/authentication/guides/sso/configure-oidc-react-azure.md b/docs/authentication/guides/sso/configure-oidc-react-azure.md deleted file mode 100644 index 177387327c0e8..0000000000000 --- a/docs/authentication/guides/sso/configure-oidc-react-azure.md +++ /dev/null @@ -1,127 +0,0 @@ -# Configuring Azure Authentication for React App (OIDC) -*Authored on 21/12/2021* - -`datahub-frontend` server can be configured to authenticate users over OpenID Connect (OIDC). As such, it can be configured to -delegate authentication responsibility to identity providers like Microsoft Azure. - -This guide will provide steps for configuring DataHub authentication using Microsoft Azure. - -:::caution -Even when OIDC is configured, the root user can still login without OIDC by going -to `/login` URL endpoint. It is recommended that you don't use the default -credentials by mounting a different file in the front end container. To do this -please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS authenticated deployment. -::: - -## Steps - -### 1. Create an application registration in Microsoft Azure portal - -a. Using an account linked to your organization, navigate to the [Microsoft Azure Portal](https://portal.azure.com). - -b. Select **App registrations**, then **New registration** to register a new app. - -c. Name your app registration and choose who can access your application. - -d. Select `Web` as the **Redirect URI** type and enter the following: -``` -https://your-datahub-domain.com/callback/oidc -``` -If you are just testing locally, the following can be used: `http://localhost:9002/callback/oidc`. -Azure supports more than one redirect URI, so both can be configured at the same time from the **Authentication** tab once the registration is complete. - -At this point, your app registration should look like the following: - - -

- -

- - -e. Click **Register**. - -### 2. Configure Authentication (optional) - -Once registration is done, you will land on the app registration **Overview** tab. On the left-side navigation bar, click on **Authentication** under **Manage** and add extra redirect URIs if need be (if you want to support both local testing and Azure deployments). - - -

- -

- - -Click **Save**. - -### 3. Configure Certificates & secrets - -On the left-side navigation bar, click on **Certificates & secrets** under **Manage**. -Select **Client secrets**, then **New client secret**. Type in a meaningful description for your secret and select an expiry. Click the **Add** button when you are done. - -**IMPORTANT:** Copy the `value` of your newly create secret since Azure will never display its value afterwards. - - -

- -

- - -### 4. Configure API permissions - -On the left-side navigation bar, click on **API permissions** under **Manage**. DataHub requires the following four Microsoft Graph APIs: - -1. `User.Read` *(should be already configured)* -2. `profile` -3. `email` -4. `openid` - -Click on **Add a permission**, then from the **Microsoft APIs** tab select **Microsoft Graph**, then **Delegated permissions**. From the **OpenId permissions** category, select `email`, `openid`, `profile` and click **Add permissions**. - -At this point, you should be looking at a screen like the following: - - -

- -

- - -### 5. Obtain Application (Client) ID - -On the left-side navigation bar, go back to the **Overview** tab. You should see the `Application (client) ID`. Save its value for the next step. - -### 6. Obtain Discovery URI - -On the same page, you should see a `Directory (tenant) ID`. Your OIDC discovery URI will be formatted as follows: - -``` -https://login.microsoftonline.com/{tenant ID}/v2.0/.well-known/openid-configuration -``` - -### 7. Configure `datahub-frontend` to enable OIDC authentication - -a. Open the file `docker/datahub-frontend/env/docker.env` - -b. Add the following configuration values to the file: - -``` -AUTH_OIDC_ENABLED=true -AUTH_OIDC_CLIENT_ID=your-client-id -AUTH_OIDC_CLIENT_SECRET=your-client-secret -AUTH_OIDC_DISCOVERY_URI=https://login.microsoftonline.com/{tenant ID}/v2.0/.well-known/openid-configuration -AUTH_OIDC_BASE_URL=your-datahub-url -AUTH_OIDC_SCOPE="openid profile email" -``` - -Replacing the placeholders above with the client id (step 5), client secret (step 3) and tenant ID (step 6) received from Microsoft Azure. - -### 9. Restart `datahub-frontend-react` docker container - -Now, simply restart the `datahub-frontend-react` container to enable the integration. - -``` -docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml up datahub-frontend-react -``` - -Navigate to your DataHub domain to see SSO in action. - -## Resources -- [Microsoft identity platform and OpenID Connect protocol](https://docs.microsoft.com/en-us/azure/active-directory/develop/v2-protocols-oidc/) \ No newline at end of file diff --git a/docs/authentication/guides/sso/configure-oidc-react-google.md b/docs/authentication/guides/sso/configure-oidc-react-google.md deleted file mode 100644 index af62185e6e787..0000000000000 --- a/docs/authentication/guides/sso/configure-oidc-react-google.md +++ /dev/null @@ -1,118 +0,0 @@ -# Configuring Google Authentication for React App (OIDC) -*Authored on 3/10/2021* - -`datahub-frontend` server can be configured to authenticate users over OpenID Connect (OIDC). As such, it can be configured to delegate -authentication responsibility to identity providers like Google. - -This guide will provide steps for configuring DataHub authentication using Google. - -:::caution -Even when OIDC is configured, the root user can still login without OIDC by going -to `/login` URL endpoint. It is recommended that you don't use the default -credentials by mounting a different file in the front end container. To do this -please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS authenticated deployment. -::: - -## Steps - -### 1. Create a project in the Google API Console - -Using an account linked to your organization, navigate to the [Google API Console](https://console.developers.google.com/) and select **New project**. -Within this project, we will configure the OAuth2.0 screen and credentials. - -### 2. Create OAuth2.0 consent screen - -a. Navigate to `OAuth consent screen`. This is where you'll configure the screen your users see when attempting to -log in to DataHub. - -b. Select `Internal` (if you only want your company users to have access) and then click **Create**. -Note that in order to complete this step you should be logged into a Google account associated with your organization. - -c. Fill out the details in the App Information & Domain sections. Make sure the 'Application Home Page' provided matches where DataHub is deployed -at your organization. - - -

- -

- - -Once you've completed this, **Save & Continue**. - -d. Configure the scopes: Next, click **Add or Remove Scopes**. Select the following scopes: - - - `.../auth/userinfo.email` - - `.../auth/userinfo.profile` - - `openid` - -Once you've selected these, **Save & Continue**. - -### 3. Configure client credentials - -Now navigate to the **Credentials** tab. This is where you'll obtain your client id & secret, as well as configure info -like the redirect URI used after a user is authenticated. - -a. Click **Create Credentials** & select `OAuth client ID` as the credential type. - -b. On the following screen, select `Web application` as your Application Type. - -c. Add the domain where DataHub is hosted to your 'Authorized Javascript Origins'. - -``` -https://your-datahub-domain.com -``` - -d. Add the domain where DataHub is hosted with the path `/callback/oidc` appended to 'Authorized Redirect URLs'. - -``` -https://your-datahub-domain.com/callback/oidc -``` - -e. Click **Create** - -f. You will now receive a pair of values, a client id and a client secret. Bookmark these for the next step. - -At this point, you should be looking at a screen like the following: - - -

- -

- - -Success! - -### 4. Configure `datahub-frontend` to enable OIDC authentication - -a. Open the file `docker/datahub-frontend/env/docker.env` - -b. Add the following configuration values to the file: - -``` -AUTH_OIDC_ENABLED=true -AUTH_OIDC_CLIENT_ID=your-client-id -AUTH_OIDC_CLIENT_SECRET=your-client-secret -AUTH_OIDC_DISCOVERY_URI=https://accounts.google.com/.well-known/openid-configuration -AUTH_OIDC_BASE_URL=your-datahub-url -AUTH_OIDC_SCOPE="openid profile email" -AUTH_OIDC_USER_NAME_CLAIM=email -AUTH_OIDC_USER_NAME_CLAIM_REGEX=([^@]+) -``` - -Replacing the placeholders above with the client id & client secret received from Google in Step 3f. - - -### 5. Restart `datahub-frontend-react` docker container - -Now, simply restart the `datahub-frontend-react` container to enable the integration. - -``` -docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml up datahub-frontend-react -``` - -Navigate to your DataHub domain to see SSO in action. - - -## References - -- [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect) \ No newline at end of file diff --git a/docs/authentication/guides/sso/configure-oidc-react-okta.md b/docs/authentication/guides/sso/configure-oidc-react-okta.md deleted file mode 100644 index 320b887a28f16..0000000000000 --- a/docs/authentication/guides/sso/configure-oidc-react-okta.md +++ /dev/null @@ -1,124 +0,0 @@ -# Configuring Okta Authentication for React App (OIDC) -*Authored on 3/10/2021* - -`datahub-frontend` server can be configured to authenticate users over OpenID Connect (OIDC). As such, it can be configured to -delegate authentication responsibility to identity providers like Okta. - -This guide will provide steps for configuring DataHub authentication using Okta. - -:::caution -Even when OIDC is configured, the root user can still login without OIDC by going -to `/login` URL endpoint. It is recommended that you don't use the default -credentials by mounting a different file in the front end container. To do this -please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS authenticated deployment. -::: - -## Steps - -### 1. Create an application in Okta Developer Console - -a. Log in to your Okta admin account & navigate to the developer console - -b. Select **Applications**, then **Add Application**, the **Create New App** to create a new app. - -c. Select `Web` as the **Platform**, and `OpenID Connect` as the **Sign on method** - -d. Click **Create** - -e. Under 'General Settings', name your application - -f. Below, add a **Login Redirect URI**. This should be formatted as - -``` -https://your-datahub-domain.com/callback/oidc -``` - -If you're just testing locally, this can be `http://localhost:9002/callback/oidc`. - -g. Below, add a **Logout Redirect URI**. This should be formatted as - -``` -https://your-datahub-domain.com -``` - -h. [Optional] If you're enabling DataHub login as an Okta tile, you'll need to provide the **Initiate Login URI**. You -can set if to - -``` -https://your-datahub-domain.com/authenticate -``` - -If you're just testing locally, this can be `http://localhost:9002`. - -i. Click **Save** - - -### 2. Obtain Client Credentials - -On the subsequent screen, you should see the client credentials. Bookmark the `Client id` and `Client secret` for the next step. - -### 3. Obtain Discovery URI - -On the same page, you should see an `Okta Domain`. Your OIDC discovery URI will be formatted as follows: - -``` -https://your-okta-domain.com/.well-known/openid-configuration -``` - -for example, `https://dev-33231928.okta.com/.well-known/openid-configuration`. - -At this point, you should be looking at a screen like the following: - - -

- -

- - -

- -

- - -Success! - -### 4. Configure `datahub-frontend` to enable OIDC authentication - -a. Open the file `docker/datahub-frontend/env/docker.env` - -b. Add the following configuration values to the file: - -``` -AUTH_OIDC_ENABLED=true -AUTH_OIDC_CLIENT_ID=your-client-id -AUTH_OIDC_CLIENT_SECRET=your-client-secret -AUTH_OIDC_DISCOVERY_URI=https://your-okta-domain.com/.well-known/openid-configuration -AUTH_OIDC_BASE_URL=your-datahub-url -AUTH_OIDC_SCOPE="openid profile email groups" -``` - -Replacing the placeholders above with the client id & client secret received from Okta in Step 2. - -> **Pro Tip!** You can easily enable Okta to return the groups that a user is associated with, which will be provisioned in DataHub, along with the user logging in. This can be enabled by setting the `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` flag to `true`. -> if they do not already exist in DataHub. You can enable your Okta application to return a 'groups' claim from the Okta Console at Applications > Your Application -> Sign On -> OpenID Connect ID Token Settings (Requires an edit). -> -> By default, we assume that the groups will appear in a claim named "groups". This can be customized using the `AUTH_OIDC_GROUPS_CLAIM` container configuration. -> -> -

- -

- - -### 5. Restart `datahub-frontend-react` docker container - -Now, simply restart the `datahub-frontend-react` container to enable the integration. - -``` -docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml up datahub-frontend-react -``` - -Navigate to your DataHub domain to see SSO in action. - -## Resources -- [OAuth 2.0 and OpenID Connect Overview](https://developer.okta.com/docs/concepts/oauth-openid/) diff --git a/docs/authentication/guides/sso/configure-oidc-react.md b/docs/authentication/guides/sso/configure-oidc-react.md index 1671673c09318..9b4af80bb0ccd 100644 --- a/docs/authentication/guides/sso/configure-oidc-react.md +++ b/docs/authentication/guides/sso/configure-oidc-react.md @@ -1,59 +1,201 @@ -# Overview +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# OIDC Authentication The DataHub React application supports OIDC authentication built on top of the [Pac4j Play](https://github.com/pac4j/play-pac4j) library. This enables operators of DataHub to integrate with 3rd party identity providers like Okta, Google, Keycloak, & more to authenticate their users. -When configured, OIDC auth will be enabled between clients of the DataHub UI & `datahub-frontend` server. Beyond this point is considered -to be a secure environment and as such authentication is validated & enforced only at the "front door" inside datahub-frontend. +## 1. Register an app with your Identity Provider -:::caution -Even if OIDC is configured the root user can still login without OIDC by going -to `/login` URL endpoint. It is recommended that you don't use the default -credentials by mounting a different file in the front end container. To do this -please see [this guide](../jaas.md) to mount a custom user.props file for a JAAS authenticated deployment. + + + +#### Create a project in the Google API Console + +Using an account linked to your organization, navigate to the [Google API Console](https://console.developers.google.com/) and select **New project**. +Within this project, we will configure the OAuth2.0 screen and credentials. + +#### Create OAuth2.0 consent screen + +Navigate to **OAuth consent screen**. This is where you'll configure the screen your users see when attempting to +log in to DataHub. Select **Internal** (if you only want your company users to have access) and then click **Create**. +Note that in order to complete this step you should be logged into a Google account associated with your organization. + +Fill out the details in the App Information & Domain sections. Make sure the 'Application Home Page' provided matches where DataHub is deployed +at your organization. Once you've completed this, **Save & Continue**. + +

+ +

+ +#### Configure the scopes + +Next, click **Add or Remove Scopes**. Select the following scope and click **Save & Continue**. + +- .../auth/userinfo.email +- .../auth/userinfo.profile +- openid + +
+ + +#### Create an application in Okta Developer Console + +Log in to your Okta admin account & navigate to the developer console. Select **Applications**, then **Add Application**, the **Create New App** to create a new app. +Select `Web` as the **Platform**, and `OpenID Connect` as the **Sign on method**. + +Click **Create** and name your application under **General Settings** and save. + +- **Login Redirect URI** : `https://your-datahub-domain.com/callback/oidc`. +- **Logout Redirect URI**. `https://your-datahub-domain.com` + +

+ +

+ +:::note Optional +If you're enabling DataHub login as an Okta tile, you'll need to provide the **Initiate Login URI**. You +can set if to `https://your-datahub-domain.com/authenticate`. If you're just testing locally, this can be `http://localhost:9002`. ::: -## Provider-Specific Guides +
+ -1. [Configuring OIDC using Google](configure-oidc-react-google.md) -2. [Configuring OIDC using Okta](configure-oidc-react-okta.md) -3. [Configuring OIDC using Azure](configure-oidc-react-azure.md) +#### Create an application registration in Microsoft Azure portal -## Configuring OIDC in React +Using an account linked to your organization, navigate to the [Microsoft Azure Portal](https://portal.azure.com). Select **App registrations**, then **New registration** to register a new app. -### 1. Register an app with your Identity Provider +Name your app registration and choose who can access your application. -To configure OIDC in React, you will most often need to register yourself as a client with your identity provider (Google, Okta, etc). Each provider may -have their own instructions. Provided below are links to examples for Okta, Google, Azure AD, & Keycloak. +- **Redirect URI** : Select **Web** as type and enter `https://your-datahub-domain.com/callback/oidc` -- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/openidconnect/main/) -- [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect) -- [OpenID Connect authentication with Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/auth-oidc) -- [Keycloak - Securing Applications and Services Guide](https://www.keycloak.org/docs/latest/securing_apps/) +Azure supports more than one redirect URI, so both can be configured at the same time from the **Authentication** tab once the registration is complete. +At this point, your app registration should look like the following. Finally, click **Register**. + +

+ +

-During the registration process, you'll need to provide a login redirect URI to the identity provider. This tells the identity provider -where to redirect to once they've authenticated the end user. +:::note Optional +Once registration is done, you will land on the app registration **Overview** tab. +On the left-side navigation bar, click on **Authentication** under **Manage** and add extra redirect URIs if need be (if you want to support both local testing and Azure deployments). Finally, click **Save**. -By default, the URL will be constructed as follows: +

+ +

-> "http://your-datahub-domain.com/callback/oidc" +::: + +#### Configure Certificates & secrets + +On the left-side navigation bar, click on **Certificates & secrets** under **Manage**. +Select **Client secrets**, then **New client secret**. Type in a meaningful description for your secret and select an expiry. Click the **Add** button when you are done. +Copy the value of your newly create secret since Azure will never display its value afterwards. + +

+ +

+ +#### Configure API permissions + +On the left-side navigation bar, click on **API permissions** under **Manage**. DataHub requires the following four Microsoft Graph APIs: -For example, if you're hosted DataHub at `datahub.myorg.com`, this -value would be `http://datahub.myorg.com/callback/oidc`. For testing purposes you can also specify localhost as the domain name -directly: `http://localhost:9002/callback/oidc` +- User.Read _(should be already configured)_ +- profile +- email +- openid + +Click on **Add a permission**, then from the **Microsoft APIs** tab select **Microsoft Graph**, then **Delegated permissions**. From the **OpenId permissions** category, select `email`, `openid`, `profile` and click **Add permissions**. + +At this point, you should be looking at a screen like the following: + +

+ +

+ +
+
+ +## 2. Obtain Client Credentials & Discovery URL The goal of this step should be to obtain the following values, which will need to be configured before deploying DataHub: -1. **Client ID** - A unique identifier for your application with the identity provider -2. **Client Secret** - A shared secret to use for exchange between you and your identity provider -3. **Discovery URL** - A URL where the OIDC API of your identity provider can be discovered. This should suffixed by - `.well-known/openid-configuration`. Sometimes, identity providers will not explicitly include this URL in their setup guides, though - this endpoint *will* exist as per the OIDC specification. For more info see http://openid.net/specs/openid-connect-discovery-1_0.html. +- **Client ID** - A unique identifier for your application with the identity provider +- **Client Secret** - A shared secret to use for exchange between you and your identity provider +- **Discovery URL** - A URL where the OIDC API of your identity provider can be discovered. This should suffixed by + `.well-known/openid-configuration`. Sometimes, identity providers will not explicitly include this URL in their setup guides, though + this endpoint _will_ exist as per the OIDC specification. For more info see http://openid.net/specs/openid-connect-discovery-1_0.html. + + + + + +**Obtain Client Credentials** + +Navigate to the **Credentials** tab. Click **Create Credentials** & select **OAuth client ID** as the credential type. + +On the following screen, select **Web application** as your Application Type. +Add the domain where DataHub is hosted to your 'Authorized Javascript Origins'. + +``` +https://your-datahub-domain.com +``` + +Add the domain where DataHub is hosted with the path `/callback/oidc` appended to 'Authorized Redirect URLs'. Finally, click **Create** + +``` +https://your-datahub-domain.com/callback/oidc +``` + +You will now receive a pair of values, a client id and a client secret. Bookmark these for the next step. + + + + +**Obtain Client Credentials** + +After registering the app, you should see the client credentials. Bookmark the `Client id` and `Client secret` for the next step. + +**Obtain Discovery URI** + +On the same page, you should see an `Okta Domain`. Your OIDC discovery URI will be formatted as follows: + +``` +https://your-okta-domain.com/.well-known/openid-configuration +``` + +For example, `https://dev-33231928.okta.com/.well-known/openid-configuration`. + +At this point, you should be looking at a screen like the following: + +

+ +

+
+ -### 2. Configure DataHub Frontend Server +**Obtain Application (Client) ID** -The second step to enabling OIDC involves configuring `datahub-frontend` to enable OIDC authentication with your Identity Provider. +On the left-side navigation bar, go back to the **Overview** tab. You should see the `Application (client) ID`. Save its value for the next step. + +**Obtain Discovery URI** + +On the same page, you should see a `Directory (tenant) ID`. Your OIDC discovery URI will be formatted as follows: + +``` +https://login.microsoftonline.com/{tenant ID}/v2.0/.well-known/openid-configuration +``` + + +
+ +## 3. Configure DataHub Frontend Server + +### Docker + +The next step to enabling OIDC involves configuring `datahub-frontend` to enable OIDC authentication with your Identity Provider. To do so, you must update the `datahub-frontend` [docker.env](../../../../docker/datahub-frontend/env/docker.env) file with the values received from your identity provider: @@ -67,22 +209,29 @@ AUTH_OIDC_DISCOVERY_URI=your-provider-discovery-url AUTH_OIDC_BASE_URL=your-datahub-url ``` -- `AUTH_OIDC_ENABLED`: Enable delegating authentication to OIDC identity provider -- `AUTH_OIDC_CLIENT_ID`: Unique client id received from identity provider -- `AUTH_OIDC_CLIENT_SECRET`: Unique client secret received from identity provider -- `AUTH_OIDC_DISCOVERY_URI`: Location of the identity provider OIDC discovery API. Suffixed with `.well-known/openid-configuration` -- `AUTH_OIDC_BASE_URL`: The base URL of your DataHub deployment, e.g. https://yourorgdatahub.com (prod) or http://localhost:9002 (testing) -- `AUTH_SESSION_TTL_HOURS`: The length of time in hours before a user will be prompted to login again. Controls the actor cookie expiration time in the browser. Numeric value converted to hours, default 24. -- `MAX_SESSION_TOKEN_AGE`: Determines the expiration time of a session token. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed. Accepts a valid relative Java date style String, default 24h. +| Configuration | Description | Default | +| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------- | +| AUTH_OIDC_ENABLED | Enable delegating authentication to OIDC identity provider | | +| AUTH_OIDC_CLIENT_ID | Unique client id received from identity provider | | +| AUTH_OIDC_CLIENT_SECRET | Unique client secret received from identity provider | | +| AUTH_OIDC_DISCOVERY_URI | Location of the identity provider OIDC discovery API. Suffixed with `.well-known/openid-configuration` | | +| AUTH_OIDC_BASE_URL | The base URL of your DataHub deployment, e.g. https://yourorgdatahub.com (prod) or http://localhost:9002 (testing) | | +| AUTH_SESSION_TTL_HOURS | The length of time in hours before a user will be prompted to login again. Controls the actor cookie expiration time in the browser. Numeric value converted to hours. | 24 | +| MAX_SESSION_TOKEN_AGE | Determines the expiration time of a session token. Session tokens are stateless so this determines at what time a session token may no longer be used and a valid session token can be used until this time has passed. Accepts a valid relative Java date style String. | 24h | Providing these configs will cause DataHub to delegate authentication to your identity provider, requesting the "oidc email profile" scopes and parsing the "preferred_username" claim from the authenticated profile as the DataHub CorpUser identity. +:::note + +By default, the login callback endpoint exposed by DataHub will be located at `${AUTH_OIDC_BASE_URL}/callback/oidc`. This must **exactly** match the login redirect URL you've registered with your identity provider in step 1. + +::: -> By default, the login callback endpoint exposed by DataHub will be located at `${AUTH_OIDC_BASE_URL}/callback/oidc`. This must **exactly** match the login redirect URL you've registered with your identity provider in step 1. +### Kubernetes -In kubernetes, you can add the above env variables in the values.yaml as follows. +In Kubernetes, you can add the above env variables in the `values.yaml` as follows. ```yaml datahub-frontend: @@ -102,20 +251,21 @@ datahub-frontend: You can also package OIDC client secrets into a k8s secret by running -```kubectl create secret generic datahub-oidc-secret --from-literal=secret=<>``` +``` +kubectl create secret generic datahub-oidc-secret --from-literal=secret=<> +``` Then set the secret env as follows. ```yaml - - name: AUTH_OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: datahub-oidc-secret - key: secret +- name: AUTH_OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: datahub-oidc-secret + key: secret ``` - -#### Advanced +### Advanced OIDC Configurations You can optionally customize the flow further using advanced configurations. These allow you to specify the OIDC scopes requested, how the DataHub username is parsed from the claims returned by the identity provider, and how users and groups are extracted and provisioned from the OIDC claim set. @@ -128,23 +278,15 @@ AUTH_OIDC_SCOPE=your-custom-scope AUTH_OIDC_CLIENT_AUTHENTICATION_METHOD=authentication-method ``` -- `AUTH_OIDC_USER_NAME_CLAIM`: The attribute that will contain the username used on the DataHub platform. By default, this is "email" provided - as part of the standard `email` scope. -- `AUTH_OIDC_USER_NAME_CLAIM_REGEX`: A regex string used for extracting the username from the userNameClaim attribute. For example, if - the userNameClaim field will contain an email address, and we want to omit the domain name suffix of the email, we can specify a custom - regex to do so. (e.g. `([^@]+)`) -- `AUTH_OIDC_SCOPE`: a string representing the scopes to be requested from the identity provider, granted by the end user. For more info, - see [OpenID Connect Scopes](https://auth0.com/docs/scopes/openid-connect-scopes). -- `AUTH_OIDC_CLIENT_AUTHENTICATION_METHOD`: a string representing the token authentication method to use with the identity provider. Default value - is `client_secret_basic`, which uses HTTP Basic authentication. Another option is `client_secret_post`, which includes the client_id and secret_id - as form parameters in the HTTP POST request. For more info, see [OAuth 2.0 Client Authentication](https://darutk.medium.com/oauth-2-0-client-authentication-4b5f929305d4) - -Additional OIDC Options: +| Configuration | Description | Default | +| -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------- | +| AUTH_OIDC_USER_NAME_CLAIM | The attribute that will contain the username used on the DataHub platform. By default, this is "email" providedas part of the standard `email` scope. | | +| AUTH_OIDC_USER_NAME_CLAIM_REGEX | A regex string used for extracting the username from the userNameClaim attribute. For example, if the userNameClaim field will contain an email address, and we want to omit the domain name suffix of the email, we can specify a customregex to do so. (e.g. `([^@]+)`) | | +| AUTH_OIDC_SCOPE | A string representing the scopes to be requested from the identity provider, granted by the end user. For more info, see [OpenID Connect Scopes](https://auth0.com/docs/scopes/openid-connect-scopes). | | +| AUTH_OIDC_CLIENT_AUTHENTICATION_METHOD | a string representing the token authentication method to use with the identity provider. Default value is `client_secret_basic`, which uses HTTP Basic authentication. Another option is `client_secret_post`, which includes the client_id and secret_id as form parameters in the HTTP POST request. For more info, see [OAuth 2.0 Client Authentication](https://darutk.medium.com/oauth-2-0-client-authentication-4b5f929305d4) | client_secret_basic | +| AUTH_OIDC_PREFERRED_JWS_ALGORITHM | Can be used to select a preferred signing algorithm for id tokens. Examples include: `RS256` or `HS256`. If your IdP includes `none` before `RS256`/`HS256` in the list of signing algorithms, then this value **MUST** be set. | | -- `AUTH_OIDC_PREFERRED_JWS_ALGORITHM` - Can be used to select a preferred signing algorithm for id tokens. Examples include: `RS256` or `HS256`. If -your IdP includes `none` before `RS256`/`HS256` in the list of signing algorithms, then this value **MUST** be set. - -##### User & Group Provisioning (JIT Provisioning) +### User & Group Provisioning (JIT Provisioning) By default, DataHub will optimistically attempt to provision users and groups that do not already exist at the time of login. For users, we extract information like first name, last name, display name, & email to construct a basic user profile. If a groups claim is present, @@ -160,26 +302,30 @@ AUTH_OIDC_EXTRACT_GROUPS_ENABLED=false AUTH_OIDC_GROUPS_CLAIM= ``` -- `AUTH_OIDC_JIT_PROVISIONING_ENABLED`: Whether DataHub users & groups should be provisioned on login if they do not exist. Defaults to true. -- `AUTH_OIDC_PRE_PROVISIONING_REQUIRED`: Whether the user should already exist in DataHub when they login, failing login if they are not. This is appropriate for situations in which users and groups are batch ingested and tightly controlled inside your environment. Defaults to false. -- `AUTH_OIDC_EXTRACT_GROUPS_ENABLED`: Only applies if `AUTH_OIDC_JIT_PROVISIONING_ENABLED` is set to true. This determines whether we should attempt to extract a list of group names from a particular claim in the OIDC attributes. Note that if this is enabled, each login will re-sync group membership with the groups in your Identity Provider, clearing the group membership that has been assigned through the DataHub UI. Enable with care! Defaults to false. -- `AUTH_OIDC_GROUPS_CLAIM`: Only applies if `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` is set to true. This determines which OIDC claims will contain a list of string group names. Accepts multiple claim names with comma-separated values. I.e: `groups, teams, departments`. Defaults to 'groups'. +| Configuration | Description | Default | +| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| AUTH_OIDC_JIT_PROVISIONING_ENABLED | Whether DataHub users & groups should be provisioned on login if they do not exist. | true | +| AUTH_OIDC_PRE_PROVISIONING_REQUIRED | Whether the user should already exist in DataHub when they login, failing login if they are not. This is appropriate for situations in which users and groups are batch ingested and tightly controlled inside your environment. | false | +| AUTH_OIDC_EXTRACT_GROUPS_ENABLED | Only applies if `AUTH_OIDC_JIT_PROVISIONING_ENABLED` is set to true. This determines whether we should attempt to extract a list of group names from a particular claim in the OIDC attributes. Note that if this is enabled, each login will re-sync group membership with the groups in your Identity Provider, clearing the group membership that has been assigned through the DataHub UI. Enable with care! | false | +| AUTH_OIDC_GROUPS_CLAIM | Only applies if `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` is set to true. This determines which OIDC claims will contain a list of string group names. Accepts multiple claim names with comma-separated values. I.e: `groups, teams, departments`. | groups | +## 4. Restart datahub-frontend-react -Once configuration has been updated, `datahub-frontend-react` will need to be restarted to pick up the new environment variables: +Once configured, restarting the `datahub-frontend-react` container will enable an indirect authentication flow in which DataHub delegates authentication to the specified identity provider. ``` docker-compose -p datahub -f docker-compose.yml -f docker-compose.override.yml up datahub-frontend-react ``` ->Note that by default, enabling OIDC will *not* disable the dummy JAAS authentication path, which can be reached at the `/login` -route of the React app. To disable this authentication path, additionally specify the following config: -> `AUTH_JAAS_ENABLED=false` +Navigate to your DataHub domain to see SSO in action. -### Summary +:::caution +By default, enabling OIDC will _not_ disable the dummy JAAS authentication path, which can be reached at the `/login` +route of the React app. To disable this authentication path, additionally specify the following config: +`AUTH_JAAS_ENABLED=false` +::: -Once configured, deploying the `datahub-frontend-react` container will enable an indirect authentication flow in which DataHub delegates -authentication to the specified identity provider. +## Summary Once a user is authenticated by the identity provider, DataHub will extract a username from the provided claims and grant DataHub access to the user by setting a pair of session cookies. @@ -196,44 +342,45 @@ A brief summary of the steps that occur when the user navigates to the React app 7. DataHub sets session cookies for the newly authenticated user 8. DataHub redirects the user to the homepage ("/") -## FAQ +## Troubleshooting -**No users can log in. Instead, I get redirected to the login page with an error. What do I do?** +
+No users can log in. Instead, I get redirected to the login page with an error. What do I do? This can occur for a variety of reasons, but most often it is due to misconfiguration of Single-Sign On, either on the DataHub -side or on the Identity Provider side. - -First, verify that all values are consistent across them (e.g. the host URL where DataHub is deployed), and that no values -are misspelled (client id, client secret). +side or on the Identity Provider side. -Next, verify that the scopes requested are supported by your Identity Provider -and that the claim (i.e. attribute) DataHub uses for uniquely identifying the user is supported by your Identity Provider (refer to Identity Provider OpenID Connect documentation). By default, this claim is `email`. +- Verify that all values are consistent across them (e.g. the host URL where DataHub is deployed), and that no values are misspelled (client id, client secret). +- Verify that the scopes requested are supported by your Identity Provider and that the claim (i.e. attribute) DataHub uses for uniquely identifying the user is supported by your Identity Provider (refer to Identity Provider OpenID Connect documentation). By default, this claim is `email`. +- Make sure the Discovery URI you've configured (`AUTH_OIDC_DISCOVERY_URI`) is accessible where the datahub-frontend container is running. You can do this by issuing a basic CURL to the address (**Pro-Tip**: you may also visit the address in your browser to check more specific details about your Identity Provider). +- Check the container logs for the `datahub-frontend` container. This should hopefully provide some additional context around why exactly the login handoff is not working. -Then, make sure the Discovery URI you've configured (`AUTH_OIDC_DISCOVERY_URI`) is accessible where the datahub-frontend container is running. You -can do this by issuing a basic CURL to the address (**Pro-Tip**: you may also visit the address in your browser to check more specific details about your Identity Provider). +If all else fails, feel free to reach out to the DataHub Community on Slack for real-time support. -Finally, check the container logs for the `datahub-frontend` container. This should hopefully provide some additional context -around why exactly the login handoff is not working. +
-If all else fails, feel free to reach out to the DataHub Community on Slack for -real-time support - - - -**I'm seeing an error in the `datahub-frontend` logs when a user tries to login** -```shell -Caused by: java.lang.RuntimeException: Failed to resolve user name claim from profile provided by Identity Provider. Missing attribute. Attribute: 'email', Regex: '(.*)', Profile: { ... -``` -**what do I do?** +
+ +I'm seeing an error in the `datahub-frontend` logs when a user tries to login: Caused by: java.lang.RuntimeException: Failed to resolve user name claim from profile provided by Identity Provider. Missing attribute. Attribute: 'email', Regex: '(.*)', Profile: { .... + This indicates that your Identity Provider does not provide the claim with name 'email', which DataHub uses by default to uniquely identify users within your organization. -To fix this, you may need to +To fix this, you may need to -1. Change the claim that is used as the unique user identifier to something else by changing the `AUTH_OIDC_USER_NAME_CLAIM` (e.g. to "name" or "preferred_username") _OR_ +1. Change the claim that is used as the unique user identifier to something else by changing the `AUTH_OIDC_USER_NAME_CLAIM` (e.g. to "name" or "preferred*username") \_OR* 2. Change the environment variable `AUTH_OIDC_SCOPE` to include the scope required to retrieve the claim with name "email" -For the `datahub-frontend` container / pod. +For the `datahub-frontend` container / pod. + +
+ +## Reference -**Pro-Tip**: Check the documentation for your Identity Provider to learn more about the scope claims supported. +Check the documentation for your Identity Provider to learn more about the scope claims supported. + +- [Registering an App in Okta](https://developer.okta.com/docs/guides/add-an-external-idp/openidconnect/main/) +- [OpenID Connect in Google Identity](https://developers.google.com/identity/protocols/oauth2/openid-connect) +- [OpenID Connect authentication with Azure Active Directory](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/auth-oidc) +- [Keycloak - Securing Applications and Services Guide](https://www.keycloak.org/docs/latest/securing_apps/) From ec9725026dca7b89d6a6464ea9b5c547debf42e5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 2 Nov 2023 09:39:08 -0700 Subject: [PATCH 79/80] chore(ingest): remove legacy memory_leak_detector (#9158) --- .../src/datahub/cli/ingest_cli.py | 4 - metadata-ingestion/src/datahub/entrypoints.py | 15 --- .../ingestion/source/looker/looker_config.py | 6 +- .../datahub/utilities/memory_leak_detector.py | 106 ------------------ .../tests/integration/snowflake/common.py | 3 +- .../tests/unit/test_snowflake_source.py | 15 +-- 6 files changed, 10 insertions(+), 139 deletions(-) delete mode 100644 metadata-ingestion/src/datahub/utilities/memory_leak_detector.py diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 9b5716408f3e4..dd0287004a368 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -27,7 +27,6 @@ from datahub.ingestion.run.pipeline import Pipeline from datahub.telemetry import telemetry from datahub.upgrade import upgrade -from datahub.utilities import memory_leak_detector logger = logging.getLogger(__name__) @@ -98,7 +97,6 @@ def ingest() -> None: @click.option( "--no-spinner", type=bool, is_flag=True, default=False, help="Turn off spinner" ) -@click.pass_context @telemetry.with_telemetry( capture_kwargs=[ "dry_run", @@ -109,9 +107,7 @@ def ingest() -> None: "no_spinner", ] ) -@memory_leak_detector.with_leak_detection def run( - ctx: click.Context, config: str, dry_run: bool, preview: bool, diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index 5bfab3b841fa3..0cd37cc939854 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -70,21 +70,10 @@ version=datahub_package.nice_version_name(), prog_name=datahub_package.__package_name__, ) -@click.option( - "-dl", - "--detect-memory-leaks", - type=bool, - is_flag=True, - default=False, - help="Run memory leak detection.", -) -@click.pass_context def datahub( - ctx: click.Context, debug: bool, log_file: Optional[str], debug_vars: bool, - detect_memory_leaks: bool, ) -> None: if debug_vars: # debug_vars implies debug. This option isn't actually used here, but instead @@ -109,10 +98,6 @@ def datahub( _logging_configured = configure_logging(debug=debug, log_file=log_file) _logging_configured.__enter__() - # Setup the context for the memory_leak_detector decorator. - ctx.ensure_object(dict) - ctx.obj["detect_memory_leaks"] = detect_memory_leaks - @datahub.command() @telemetry.with_telemetry() diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 96c405f7257d0..98d58c9fc9d87 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -121,7 +121,10 @@ class LookerCommonConfig(DatasetSourceConfigMixin): "discoverable. When disabled, adds this information to the description of the column.", ) platform_name: str = Field( - "looker", description="Default platform name. Don't change." + # TODO: This shouldn't be part of the config. + "looker", + description="Default platform name.", + hidden_from_docs=True, ) extract_column_level_lineage: bool = Field( True, @@ -213,7 +216,6 @@ def external_url_defaults_to_api_config_base_url( def stateful_ingestion_should_be_enabled( cls, v: Optional[bool], *, values: Dict[str, Any], **kwargs: Dict[str, Any] ) -> Optional[bool]: - stateful_ingestion: StatefulStaleMetadataRemovalConfig = cast( StatefulStaleMetadataRemovalConfig, values.get("stateful_ingestion") ) diff --git a/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py b/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py deleted file mode 100644 index 85ad0fb4938eb..0000000000000 --- a/metadata-ingestion/src/datahub/utilities/memory_leak_detector.py +++ /dev/null @@ -1,106 +0,0 @@ -import fnmatch -import gc -import logging -import sys -import tracemalloc -from collections import defaultdict -from functools import wraps -from typing import Any, Callable, Dict, List, TypeVar, Union, cast - -import click -from typing_extensions import Concatenate, ParamSpec - -logger = logging.getLogger(__name__) -T = TypeVar("T") -P = ParamSpec("P") - - -def _trace_has_file(trace: tracemalloc.Traceback, file_pattern: str) -> bool: - for frame_index in range(len(trace)): - cur_frame = trace[frame_index] - if fnmatch.fnmatch(cur_frame.filename, file_pattern): - return True - return False - - -def _init_leak_detection() -> None: - # Initialize trace malloc to track up to 25 stack frames. - tracemalloc.start(25) - if sys.version_info >= (3, 9): - # Nice to reset peak to 0. Available for versions >= 3.9. - tracemalloc.reset_peak() - # Enable leak debugging in the garbage collector. - gc.set_debug(gc.DEBUG_LEAK) - - -def _perform_leak_detection() -> None: - # Log potentially useful memory usage metrics - logger.info(f"GC count before collect {gc.get_count()}") - traced_memory_size, traced_memory_peak = tracemalloc.get_traced_memory() - logger.info(f"Traced Memory: size={traced_memory_size}, peak={traced_memory_peak}") - num_unreacheable_objects = gc.collect() - logger.info(f"Number of unreachable objects = {num_unreacheable_objects}") - logger.info(f"GC count after collect {gc.get_count()}") - - # Collect unique traces of all live objects in the garbage - these have potential leaks. - unique_traces_to_objects: Dict[ - Union[tracemalloc.Traceback, int], List[object] - ] = defaultdict(list) - for obj in gc.garbage: - obj_trace = tracemalloc.get_object_traceback(obj) - if obj_trace is not None: - if _trace_has_file(obj_trace, "*datahub/*.py"): - # Leaking object - unique_traces_to_objects[obj_trace].append(obj) - else: - unique_traces_to_objects[id(obj)].append(obj) - logger.info("Potentially leaking objects start") - for key, obj_list in sorted( - unique_traces_to_objects.items(), - key=lambda item: sum( - [sys.getsizeof(o) for o in item[1]] - ), # TODO: add support for deep sizeof - reverse=True, - ): - if isinstance(key, tracemalloc.Traceback): - obj_traceback: tracemalloc.Traceback = cast(tracemalloc.Traceback, key) - logger.info( - f"#Objects:{len(obj_list)}; Total memory:{sum([sys.getsizeof(obj) for obj in obj_list])};" - + " Allocation Trace:\n\t" - + "\n\t".join(obj_traceback.format(limit=25)) - ) - else: - logger.info( - f"#Objects:{len(obj_list)}; Total memory:{sum([sys.getsizeof(obj) for obj in obj_list])};" - + " No Allocation Trace available!" - ) - logger.info("Potentially leaking objects end") - - tracemalloc.stop() - - -def with_leak_detection( - func: Callable[Concatenate[click.Context, P], T] -) -> Callable[Concatenate[click.Context, P], T]: - @wraps(func) - def wrapper(ctx: click.Context, *args: P.args, **kwargs: P.kwargs) -> Any: - detect_leaks: bool = ctx.obj.get("detect_memory_leaks", False) - if detect_leaks: - logger.info( - f"Initializing memory leak detection on command: {func.__module__}.{func.__name__}" - ) - _init_leak_detection() - - try: - return func(ctx, *args, **kwargs) - finally: - if detect_leaks: - logger.info( - f"Starting memory leak detection on command: {func.__module__}.{func.__name__}" - ) - _perform_leak_detection() - logger.info( - f"Finished memory leak detection on command: {func.__module__}.{func.__name__}" - ) - - return wrapper diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py index ff448eca01071..78e5499697311 100644 --- a/metadata-ingestion/tests/integration/snowflake/common.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -565,5 +565,4 @@ def default_query_results( # noqa: C901 "DOMAIN": "DATABASE", }, ] - # Unreachable code - raise Exception(f"Unknown query {query}") + raise ValueError(f"Unexpected query: {query}") diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py index 888a7c0441554..aaff878b81eee 100644 --- a/metadata-ingestion/tests/unit/test_snowflake_source.py +++ b/metadata-ingestion/tests/unit/test_snowflake_source.py @@ -368,8 +368,7 @@ def default_query_results(query): return [('{"roles":"","value":""}',)] elif query == "select current_warehouse()": return [("TEST_WAREHOUSE")] - # Unreachable code - raise Exception() + raise ValueError(f"Unexpected query: {query}") connection_mock = MagicMock() cursor_mock = MagicMock() @@ -397,8 +396,7 @@ def query_results(query): ] elif query == 'show grants to role "PUBLIC"': return [] - # Unreachable code - raise Exception() + raise ValueError(f"Unexpected query: {query}") config = { "username": "user", @@ -441,8 +439,7 @@ def query_results(query): return [("", "USAGE", "DATABASE", "DB1")] elif query == 'show grants to role "PUBLIC"': return [] - # Unreachable code - raise Exception() + raise ValueError(f"Unexpected query: {query}") setup_mock_connect(mock_connect, query_results) @@ -485,8 +482,7 @@ def query_results(query): ] elif query == 'show grants to role "PUBLIC"': return [] - # Unreachable code - raise Exception() + raise ValueError(f"Unexpected query: {query}") setup_mock_connect(mock_connect, query_results) @@ -536,8 +532,7 @@ def query_results(query): ["", "USAGE", "VIEW", "SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY"], ["", "USAGE", "VIEW", "SNOWFLAKE.ACCOUNT_USAGE.OBJECT_DEPENDENCIES"], ] - # Unreachable code - raise Exception() + raise ValueError(f"Unexpected query: {query}") setup_mock_connect(mock_connect, query_results) From 148ad1ad9f00d6eb43d6acb270b9a90a745c8af3 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 2 Nov 2023 09:44:35 -0700 Subject: [PATCH 80/80] feat(ingest/looker): support emitting unused explores (#9159) --- .../ingestion/source/looker/looker_common.py | 2 +- .../ingestion/source/looker/looker_config.py | 4 ++ .../source/looker/looker_lib_wrapper.py | 7 +++ .../ingestion/source/looker/looker_source.py | 46 +++++++++++++------ 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 30c38720dd96c..7ca5ce49019ab 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -388,7 +388,7 @@ def _get_field_type( # if still not found, log and continue if type_class is None: - logger.info( + logger.debug( f"The type '{native_type}' is not recognized for field type, setting as NullTypeClass.", ) type_class = NullTypeClass diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 98d58c9fc9d87..e6ddea9a30489 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -205,6 +205,10 @@ class LookerDashboardSourceConfig( False, description="Extract looks which are not part of any Dashboard. To enable this flag the stateful_ingestion should also be enabled.", ) + emit_used_explores_only: bool = Field( + True, + description="When enabled, only explores that are used by a Dashboard/Look will be ingested.", + ) @validator("external_base_url", pre=True, always=True) def external_url_defaults_to_api_config_base_url( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py index b00f74b71e792..988caba1c0d74 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py @@ -59,6 +59,7 @@ class LookerAPIStats(BaseModel): lookml_model_calls: int = 0 all_dashboards_calls: int = 0 all_looks_calls: int = 0 + all_models_calls: int = 0 get_query_calls: int = 0 search_looks_calls: int = 0 search_dashboards_calls: int = 0 @@ -155,6 +156,12 @@ def dashboard(self, dashboard_id: str, fields: Union[str, List[str]]) -> Dashboa transport_options=self.transport_options, ) + def all_lookml_models(self) -> Sequence[LookmlModel]: + self.client_stats.all_models_calls += 1 + return self.client.all_lookml_models( + transport_options=self.transport_options, + ) + def lookml_model_explore(self, model: str, explore_name: str) -> LookmlModelExplore: self.client_stats.explore_calls += 1 return self.client.lookml_model_explore( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 09683d790c14c..4a98e8874bca0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -147,9 +147,12 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext): ) self.reporter._looker_explore_registry = self.explore_registry self.reporter._looker_api = self.looker_api + self.reachable_look_registry = set() - self.explores_to_fetch_set: Dict[Tuple[str, str], List[str]] = {} + # (model, explore) -> list of charts/looks/dashboards that reference this explore + # The list values are used purely for debugging purposes. + self.reachable_explores: Dict[Tuple[str, str], List[str]] = {} # Keep stat generators to generate entity stat aspect later stat_generator_config: looker_usage.StatGeneratorConfig = ( @@ -378,11 +381,11 @@ def _get_input_fields_from_query( return result - def add_explore_to_fetch(self, model: str, explore: str, via: str) -> None: - if (model, explore) not in self.explores_to_fetch_set: - self.explores_to_fetch_set[(model, explore)] = [] + def add_reachable_explore(self, model: str, explore: str, via: str) -> None: + if (model, explore) not in self.reachable_explores: + self.reachable_explores[(model, explore)] = [] - self.explores_to_fetch_set[(model, explore)].append(via) + self.reachable_explores[(model, explore)].append(via) def _get_looker_dashboard_element( # noqa: C901 self, element: DashboardElement @@ -403,7 +406,7 @@ def _get_looker_dashboard_element( # noqa: C901 f"Element {element.title}: Explores added via query: {explores}" ) for exp in explores: - self.add_explore_to_fetch( + self.add_reachable_explore( model=element.query.model, explore=exp, via=f"look:{element.look_id}:query:{element.dashboard_id}", @@ -439,7 +442,7 @@ def _get_looker_dashboard_element( # noqa: C901 explores = [element.look.query.view] logger.debug(f"Element {title}: Explores added via look: {explores}") for exp in explores: - self.add_explore_to_fetch( + self.add_reachable_explore( model=element.look.query.model, explore=exp, via=f"Look:{element.look_id}:query:{element.dashboard_id}", @@ -483,7 +486,7 @@ def _get_looker_dashboard_element( # noqa: C901 ) for exp in explores: - self.add_explore_to_fetch( + self.add_reachable_explore( model=element.result_maker.query.model, explore=exp, via=f"Look:{element.look_id}:resultmaker:query", @@ -495,7 +498,7 @@ def _get_looker_dashboard_element( # noqa: C901 if filterable.view is not None and filterable.model is not None: model = filterable.model explores.append(filterable.view) - self.add_explore_to_fetch( + self.add_reachable_explore( model=filterable.model, explore=filterable.view, via=f"Look:{element.look_id}:resultmaker:filterable", @@ -694,20 +697,26 @@ def _make_dashboard_metadata_events( def _make_explore_metadata_events( self, ) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]: + if self.source_config.emit_used_explores_only: + explores_to_fetch = list(self.reachable_explores.keys()) + else: + explores_to_fetch = list(self.list_all_explores()) + explores_to_fetch.sort() + with concurrent.futures.ThreadPoolExecutor( max_workers=self.source_config.max_threads ) as async_executor: - self.reporter.total_explores = len(self.explores_to_fetch_set) + self.reporter.total_explores = len(explores_to_fetch) explore_futures = { async_executor.submit(self.fetch_one_explore, model, explore): ( model, explore, ) - for (model, explore) in self.explores_to_fetch_set + for (model, explore) in explores_to_fetch } - for future in concurrent.futures.as_completed(explore_futures): + for future in concurrent.futures.wait(explore_futures).done: events, explore_id, start_time, end_time = future.result() del explore_futures[future] self.reporter.explores_scanned += 1 @@ -717,6 +726,17 @@ def _make_explore_metadata_events( f"Running time of fetch_one_explore for {explore_id}: {(end_time - start_time).total_seconds()}" ) + def list_all_explores(self) -> Iterable[Tuple[str, str]]: + # returns a list of (model, explore) tuples + + for model in self.looker_api.all_lookml_models(): + if model.name is None or model.explores is None: + continue + for explore in model.explores: + if explore.name is None: + continue + yield (model.name, explore.name) + def fetch_one_explore( self, model: str, explore: str ) -> Tuple[ @@ -954,7 +974,7 @@ def _input_fields_from_dashboard_element( ) if explore is not None: # add this to the list of explores to finally generate metadata for - self.add_explore_to_fetch( + self.add_reachable_explore( input_field.model, input_field.explore, entity_urn ) entity_urn = explore.get_explore_urn(self.source_config)