Skip to content

Commit

Permalink
fix(sql-parser): convert platform instance to lowercase when building…
Browse files Browse the repository at this point in the history
… table urns (#9181)
  • Loading branch information
Starkie authored Nov 16, 2023
1 parent 417ffb1 commit d0fa5de
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
6 changes: 5 additions & 1 deletion metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,12 @@ def get_urn_for_table(self, table: _TableName, lower: bool = False) -> str:
table_name = ".".join(
filter(None, [table.database, table.db_schema, table.table])
)

platform_instance = self.platform_instance

if lower:
table_name = table_name.lower()
platform_instance = platform_instance.lower() if platform_instance else None

if self.platform == "bigquery":
# Normalize shard numbers and other BigQuery weirdness.
Expand All @@ -372,7 +376,7 @@ def get_urn_for_table(self, table: _TableName, lower: bool = False) -> str:

urn = make_dataset_urn_with_platform_instance(
platform=self.platform,
platform_instance=self.platform_instance,
platform_instance=platform_instance,
env=self.env,
name=table_name,
)
Expand Down
33 changes: 33 additions & 0 deletions metadata-ingestion/tests/unit/sql_parsing/test_schemaresolver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from datahub.utilities.sqlglot_lineage import SchemaResolver, _TableName


def test_get_urn_for_table_lowercase():
schema_resolver = SchemaResolver(
platform="mssql",
platform_instance="Uppercased-Instance",
env="PROD",
graph=None,
)

table = _TableName(database="Database", db_schema="DataSet", table="Table")

assert (
schema_resolver.get_urn_for_table(table=table, lower=True)
== "urn:li:dataset:(urn:li:dataPlatform:mssql,uppercased-instance.database.dataset.table,PROD)"
)


def test_get_urn_for_table_not_lower_should_keep_capital_letters():
schema_resolver = SchemaResolver(
platform="mssql",
platform_instance="Uppercased-Instance",
env="PROD",
graph=None,
)

table = _TableName(database="Database", db_schema="DataSet", table="Table")

assert (
schema_resolver.get_urn_for_table(table=table, lower=False)
== "urn:li:dataset:(urn:li:dataPlatform:mssql,Uppercased-Instance.Database.DataSet.Table,PROD)"
)

0 comments on commit d0fa5de

Please sign in to comment.