From bf5f6f57f4c40d610a106238103170fce78376cb Mon Sep 17 00:00:00 2001
From: zheyu001 <89370467+zheyu001@users.noreply.github.com>
Date: Wed, 2 Aug 2023 12:23:27 +0800
Subject: [PATCH] feat(ingest/presto-on-hive): enable partition key for
presto-on-hive (#8380)
---
.../dataset/mappers/SchemaFieldMapper.java | 1 +
.../src/main/resources/entity.graphql | 5 +++++
datahub-web-react/src/Mocks.tsx | 2 ++
.../schema/utils/schemaTitleRenderer.tsx | 2 ++
.../components/PartitioningKeyLabel.tsx | 21 +++++++++++++++++++
.../src/graphql/fragments.graphql | 1 +
.../src/graphql/versionedDataset.graphql | 1 +
.../ingestion/source/sql/presto_on_hive.py | 14 +++++++++++++
.../presto_on_hive_mces_golden_1.json | 1 +
.../presto_on_hive_mces_golden_2.json | 1 +
.../presto_on_hive_mces_golden_3.json | 1 +
.../presto_on_hive_mces_golden_4.json | 1 +
12 files changed, 51 insertions(+)
create mode 100644 datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PartitioningKeyLabel.tsx
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java
index b472688085fa6..f05a1adb6b443 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/SchemaFieldMapper.java
@@ -34,6 +34,7 @@ public SchemaField apply(@Nonnull final com.linkedin.schema.SchemaField input, @
result.setGlossaryTerms(GlossaryTermsMapper.map(input.getGlossaryTerms(), entityUrn));
}
result.setIsPartOfKey(input.isIsPartOfKey());
+ result.setIsPartitioningKey(input.isIsPartitioningKey());
return result;
}
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index d4745af33e0ce..b1f9d57300177 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -2872,6 +2872,11 @@ type SchemaField {
Whether the field is part of a key schema
"""
isPartOfKey: Boolean
+
+ """
+ Whether the field is part of a partitioning key schema
+ """
+ isPartitioningKey: Boolean
}
"""
diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx
index be853361574ab..dcefc7f70d785 100644
--- a/datahub-web-react/src/Mocks.tsx
+++ b/datahub-web-react/src/Mocks.tsx
@@ -549,6 +549,7 @@ export const dataset3WithSchema = {
type: SchemaFieldDataType.String,
nativeDataType: 'varchar(100)',
isPartOfKey: false,
+ isPartitioningKey: false,
jsonPath: null,
globalTags: null,
glossaryTerms: null,
@@ -563,6 +564,7 @@ export const dataset3WithSchema = {
type: SchemaFieldDataType.String,
nativeDataType: 'boolean',
isPartOfKey: false,
+ isPartitioningKey: false,
jsonPath: null,
globalTags: null,
glossaryTerms: null,
diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/utils/schemaTitleRenderer.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/utils/schemaTitleRenderer.tsx
index bd587e300d057..3d03b6306454d 100644
--- a/datahub-web-react/src/app/entity/dataset/profile/schema/utils/schemaTitleRenderer.tsx
+++ b/datahub-web-react/src/app/entity/dataset/profile/schema/utils/schemaTitleRenderer.tsx
@@ -7,6 +7,7 @@ import { ExtendedSchemaFields } from './types';
import TypeLabel from '../../../../shared/tabs/Dataset/Schema/components/TypeLabel';
import { ForeignKeyConstraint, SchemaMetadata } from '../../../../../../types.generated';
import PrimaryKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/PrimaryKeyLabel';
+import PartitioningKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/PartitioningKeyLabel';
import NullableLabel from '../../../../shared/tabs/Dataset/Schema/components/NullableLabel';
import ForeignKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/ForeignKeyLabel';
@@ -62,6 +63,7 @@ export default function useSchemaTitleRenderer(
{(schemaMetadata?.primaryKeys?.includes(fieldPath) || record.isPartOfKey) && }
+ {record.isPartitioningKey && }
{record.nullable && }
{schemaMetadata?.foreignKeys
?.filter(
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PartitioningKeyLabel.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PartitioningKeyLabel.tsx
new file mode 100644
index 0000000000000..dbf259aa4cdc5
--- /dev/null
+++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PartitioningKeyLabel.tsx
@@ -0,0 +1,21 @@
+import React from 'react';
+import { Badge } from 'antd';
+import styled from 'styled-components';
+import { blue } from '@ant-design/colors';
+import { ANTD_GRAY } from '../../../../constants';
+
+const PartitioningKeyBadge = styled(Badge)`
+ margin-left: 4px;
+ &&& .ant-badge-count {
+ background-color: ${ANTD_GRAY[1]};
+ color: ${blue[5]};
+ border: 1px solid ${blue[2]};
+ font-size: 12px;
+ font-weight: 400;
+ height: 22px;
+ }
+`;
+
+export default function PartitioningKeyLabel() {
+ return ;
+}
diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql
index 219722ad1645a..c3ac2139e687b 100644
--- a/datahub-web-react/src/graphql/fragments.graphql
+++ b/datahub-web-react/src/graphql/fragments.graphql
@@ -678,6 +678,7 @@ fragment schemaFieldFields on SchemaField {
nativeDataType
recursive
isPartOfKey
+ isPartitioningKey
globalTags {
...globalTagsFields
}
diff --git a/datahub-web-react/src/graphql/versionedDataset.graphql b/datahub-web-react/src/graphql/versionedDataset.graphql
index d61139927e14b..bebfb102f8e1b 100644
--- a/datahub-web-react/src/graphql/versionedDataset.graphql
+++ b/datahub-web-react/src/graphql/versionedDataset.graphql
@@ -10,6 +10,7 @@ query getVersionedDataset($urn: String!, $versionStamp: String) {
nativeDataType
recursive
isPartOfKey
+ isPartitioningKey
}
lastObserved
}
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py
index a54cb9d50e2ae..1f3092888054e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py
@@ -524,6 +524,8 @@ def loop_tables(
# add table schema fields
schema_fields = self.get_schema_fields(dataset_name, columns)
+ self._set_partition_key(columns, schema_fields)
+
schema_metadata = get_schema_metadata(
self.report,
dataset_name,
@@ -888,6 +890,18 @@ def get_schema_fields_for_column(
default_nullable=True,
)
+ def _set_partition_key(self, columns, schema_fields):
+ if len(columns) > 0:
+ partition_key_names = set()
+ for column in columns:
+ if column["is_partition_col"]:
+ partition_key_names.add(column["col_name"])
+
+ for schema_field in schema_fields:
+ name = schema_field.fieldPath.split(".")[-1]
+ if name in partition_key_names:
+ schema_field.isPartitioningKey = True
+
class SQLAlchemyClient:
def __init__(self, config: SQLAlchemyConfig):
diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json
index 0953b6738fdcb..45d13229b2d85 100644
--- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json
+++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json
@@ -1277,6 +1277,7 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false,
+ "isPartitioningKey": true,
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
},
{
diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json
index e5ac05f979368..4ec71eb8c39c6 100644
--- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json
+++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json
@@ -1277,6 +1277,7 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false,
+ "isPartitioningKey": true,
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
},
{
diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_3.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_3.json
index a9cf64c0a43b4..824524782a8e3 100644
--- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_3.json
+++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_3.json
@@ -1277,6 +1277,7 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false,
+ "isPartitioningKey": true,
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
},
{
diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_4.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_4.json
index a050f418371c6..3f2980457daa4 100644
--- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_4.json
+++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_4.json
@@ -1277,6 +1277,7 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false,
+ "isPartitioningKey": true,
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
},
{