Skip to content

Commit

Permalink
dialect-specific column types
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 committed Oct 12, 2023
1 parent a8f0080 commit 7f7d226
Show file tree
Hide file tree
Showing 11 changed files with 29 additions and 24 deletions.
15 changes: 9 additions & 6 deletions metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ class SqlParsingResult(_ParserBaseModel):
)


def _parse_statement(sql: str, dialect: str) -> sqlglot.Expression:
statement = sqlglot.parse_one(
sql, read=dialect, error_level=sqlglot.ErrorLevel.RAISE
def _parse_statement(sql: sqlglot.exp.ExpOrStr, dialect: str) -> sqlglot.Expression:
statement: sqlglot.Expression = sqlglot.maybe_parse(
sql, dialect=dialect, error_level=sqlglot.ErrorLevel.RAISE
)
return statement

Expand Down Expand Up @@ -766,6 +766,7 @@ def _translate_sqlglot_type(
def _translate_internal_column_lineage(
table_name_urn_mapping: Dict[_TableName, str],
raw_column_lineage: _ColumnLineageInfo,
dialect: str,
) -> ColumnLineageInfo:
downstream_urn = None
if raw_column_lineage.downstream.table:
Expand All @@ -779,7 +780,9 @@ def _translate_internal_column_lineage(
)
if raw_column_lineage.downstream.column_type
else None,
native_column_type=raw_column_lineage.downstream.column_type.sql()
native_column_type=raw_column_lineage.downstream.column_type.sql(
dialect=dialect
)
if raw_column_lineage.downstream.column_type
and raw_column_lineage.downstream.column_type.this
!= sqlglot.exp.DataType.Type.UNKNOWN
Expand All @@ -805,7 +808,7 @@ def _get_dialect(platform: str) -> str:


def _sqlglot_lineage_inner(
sql: str,
sql: sqlglot.exp.ExpOrStr,
schema_resolver: SchemaResolver,
default_db: Optional[str] = None,
default_schema: Optional[str] = None,
Expand Down Expand Up @@ -918,7 +921,7 @@ def _sqlglot_lineage_inner(
if column_lineage:
column_lineage_urns = [
_translate_internal_column_lineage(
table_name_urn_mapping, internal_col_lineage
table_name_urn_mapping, internal_col_lineage, dialect=dialect
)
for internal_col_lineage in column_lineage
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -36,7 +36,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -54,7 +54,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -72,7 +72,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -32,7 +32,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -32,7 +32,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -32,7 +32,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -34,7 +34,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -52,7 +52,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand All @@ -39,7 +39,7 @@
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"native_column_type": "TEXT"
"native_column_type": "STRING"
},
"upstreams": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"native_column_type": "BIGINT"
"native_column_type": "NUMBER"
},
"upstreams": []
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"native_column_type": "DECIMAL"
"native_column_type": "NUMERIC"
},
"upstreams": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"native_column_type": "DECIMAL"
"native_column_type": "NUMERIC"
},
"upstreams": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)",
"column": "PatientId",
"column_type": null,
"native_column_type": "INTEGER()"
},
"upstreams": [
Expand All @@ -25,6 +26,7 @@
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:teradata,myteradata.demo_user.test_lineage2,PROD)",
"column": "BMI",
"column_type": null,
"native_column_type": "FLOAT()"
},
"upstreams": [
Expand Down

0 comments on commit 7f7d226

Please sign in to comment.