Skip to content

Commit

Permalink
feat(ingest/presto-on-hive): enable partition key for presto-on-hive (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
zheyu001 authored Aug 2, 2023
1 parent 4f9fc67 commit bf47d65
Show file tree
Hide file tree
Showing 12 changed files with 51 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public SchemaField apply(@Nonnull final com.linkedin.schema.SchemaField input, @
result.setGlossaryTerms(GlossaryTermsMapper.map(input.getGlossaryTerms(), entityUrn));
}
result.setIsPartOfKey(input.isIsPartOfKey());
result.setIsPartitioningKey(input.isIsPartitioningKey());
return result;
}

Expand Down
5 changes: 5 additions & 0 deletions datahub-graphql-core/src/main/resources/entity.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -2872,6 +2872,11 @@ type SchemaField {
Whether the field is part of a key schema
"""
isPartOfKey: Boolean

"""
Whether the field is part of a partitioning key schema
"""
isPartitioningKey: Boolean
}

"""
Expand Down
2 changes: 2 additions & 0 deletions datahub-web-react/src/Mocks.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ export const dataset3WithSchema = {
type: SchemaFieldDataType.String,
nativeDataType: 'varchar(100)',
isPartOfKey: false,
isPartitioningKey: false,
jsonPath: null,
globalTags: null,
glossaryTerms: null,
Expand All @@ -563,6 +564,7 @@ export const dataset3WithSchema = {
type: SchemaFieldDataType.String,
nativeDataType: 'boolean',
isPartOfKey: false,
isPartitioningKey: false,
jsonPath: null,
globalTags: null,
glossaryTerms: null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { ExtendedSchemaFields } from './types';
import TypeLabel from '../../../../shared/tabs/Dataset/Schema/components/TypeLabel';
import { ForeignKeyConstraint, SchemaMetadata } from '../../../../../../types.generated';
import PrimaryKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/PrimaryKeyLabel';
import PartitioningKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/PartitioningKeyLabel';
import NullableLabel from '../../../../shared/tabs/Dataset/Schema/components/NullableLabel';
import ForeignKeyLabel from '../../../../shared/tabs/Dataset/Schema/components/ForeignKeyLabel';

Expand Down Expand Up @@ -62,6 +63,7 @@ export default function useSchemaTitleRenderer(
</FieldPathText>
<TypeLabel type={record.type} nativeDataType={record.nativeDataType} />
{(schemaMetadata?.primaryKeys?.includes(fieldPath) || record.isPartOfKey) && <PrimaryKeyLabel />}
{record.isPartitioningKey && <PartitioningKeyLabel />}
{record.nullable && <NullableLabel />}
{schemaMetadata?.foreignKeys
?.filter(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import React from 'react';
import { Badge } from 'antd';
import styled from 'styled-components';
import { blue } from '@ant-design/colors';
import { ANTD_GRAY } from '../../../../constants';

const PartitioningKeyBadge = styled(Badge)`
margin-left: 4px;
&&& .ant-badge-count {
background-color: ${ANTD_GRAY[1]};
color: ${blue[5]};
border: 1px solid ${blue[2]};
font-size: 12px;
font-weight: 400;
height: 22px;
}
`;

export default function PartitioningKeyLabel() {
return <PartitioningKeyBadge count="Partition Key" />;
}
1 change: 1 addition & 0 deletions datahub-web-react/src/graphql/fragments.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,7 @@ fragment schemaFieldFields on SchemaField {
nativeDataType
recursive
isPartOfKey
isPartitioningKey
globalTags {
...globalTagsFields
}
Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/src/graphql/versionedDataset.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ query getVersionedDataset($urn: String!, $versionStamp: String) {
nativeDataType
recursive
isPartOfKey
isPartitioningKey
}
lastObserved
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,8 @@ def loop_tables(
# add table schema fields
schema_fields = self.get_schema_fields(dataset_name, columns)

self._set_partition_key(columns, schema_fields)

schema_metadata = get_schema_metadata(
self.report,
dataset_name,
Expand Down Expand Up @@ -888,6 +890,18 @@ def get_schema_fields_for_column(
default_nullable=True,
)

def _set_partition_key(self, columns, schema_fields):
if len(columns) > 0:
partition_key_names = set()
for column in columns:
if column["is_partition_col"]:
partition_key_names.add(column["col_name"])

for schema_field in schema_fields:
name = schema_field.fieldPath.split(".")[-1]
if name in partition_key_names:
schema_field.isPartitioningKey = True


class SQLAlchemyClient:
def __init__(self, config: SQLAlchemyConfig):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,7 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false,
"isPartitioningKey": true,
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,7 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false,
"isPartitioningKey": true,
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,7 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false,
"isPartitioningKey": true,
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1277,6 +1277,7 @@
"nativeDataType": "string",
"recursive": false,
"isPartOfKey": false,
"isPartitioningKey": true,
"jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}"
},
{
Expand Down

0 comments on commit bf47d65

Please sign in to comment.