diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index ebb5c7d62c7d3..b99f712034fe0 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -1292,7 +1292,8 @@ private void configureCorpUserResolvers(final RuntimeWiring.Builder builder) {
*/
private void configureCorpGroupResolvers(final RuntimeWiring.Builder builder) {
builder.type("CorpGroup", typeWiring -> typeWiring
- .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)));
+ .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
+ .dataFetcher("exists", new EntityExistsResolver(entityService)));
builder.type("CorpGroupInfo", typeWiring -> typeWiring
.dataFetcher("admins",
new LoadableTypeBatchResolver<>(corpUserType,
diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index 0b15d7b875a9c..b37a8f34fa056 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -3788,6 +3788,11 @@ type CorpGroup implements Entity {
Additional read only info about the group
"""
info: CorpGroupInfo @deprecated
+
+ """
+ Whether or not this entity exists on DataHub
+ """
+ exists: Boolean
}
"""
diff --git a/datahub-web-react/src/app/entity/group/GroupProfile.tsx b/datahub-web-react/src/app/entity/group/GroupProfile.tsx
index d5e284af931df..53d2062277dec 100644
--- a/datahub-web-react/src/app/entity/group/GroupProfile.tsx
+++ b/datahub-web-react/src/app/entity/group/GroupProfile.tsx
@@ -11,6 +11,7 @@ import { RoutedTabs } from '../../shared/RoutedTabs';
import GroupInfoSidebar from './GroupInfoSideBar';
import { GroupAssets } from './GroupAssets';
import { ErrorSection } from '../../shared/error/ErrorSection';
+import NonExistentEntityPage from '../shared/entity/NonExistentEntityPage';
const messageStyle = { marginTop: '10%' };
@@ -110,6 +111,9 @@ export default function GroupProfile() {
urn,
};
+ if (data?.corpGroup?.exists === false) {
+ return ;
+ }
return (
<>
{error && }
diff --git a/datahub-web-react/src/graphql/group.graphql b/datahub-web-react/src/graphql/group.graphql
index 9aa6e2b005f16..1007721e51a4e 100644
--- a/datahub-web-react/src/graphql/group.graphql
+++ b/datahub-web-react/src/graphql/group.graphql
@@ -3,6 +3,7 @@ query getGroup($urn: String!, $membersCount: Int!) {
urn
type
name
+ exists
origin {
type
externalType
diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md
index 5d0ad5eaf8f7e..9cd4ad5c6f02d 100644
--- a/docs/how/updating-datahub.md
+++ b/docs/how/updating-datahub.md
@@ -7,6 +7,8 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
### Breaking Changes
- #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
+- #8942 - Removed `urn:li:corpuser:datahub` owner for the `Measure`, `Dimension` and `Temporal` tags emitted
+ by Looker and LookML source connectors.
- #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
- #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.
- #8943 The Unity Catalog ingestion source has a new option `include_metastore`, which will cause all urns to be changed when disabled.
diff --git a/metadata-ingestion/src/datahub/ingestion/api/common.py b/metadata-ingestion/src/datahub/ingestion/api/common.py
index 778bd119615e2..a6761a3c77d5e 100644
--- a/metadata-ingestion/src/datahub/ingestion/api/common.py
+++ b/metadata-ingestion/src/datahub/ingestion/api/common.py
@@ -2,6 +2,7 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Dict, Generic, Iterable, Optional, Tuple, TypeVar
+from datahub.configuration.common import ConfigurationError
from datahub.emitter.mce_builder import set_dataset_urn_to_lower
from datahub.ingestion.api.committable import Committable
from datahub.ingestion.graph.client import DataHubGraph
@@ -75,3 +76,11 @@ def register_checkpointer(self, committable: Committable) -> None:
def get_committables(self) -> Iterable[Tuple[str, Committable]]:
yield from self.checkpointers.items()
+
+ def require_graph(self, operation: Optional[str] = None) -> DataHubGraph:
+ if not self.graph:
+ raise ConfigurationError(
+ f"{operation or 'This operation'} requires a graph, but none was provided. "
+ "To provide one, either use the datahub-rest sink or set the top-level datahub_api config in the recipe."
+ )
+ return self.graph
diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
index 7cb487a86d931..611f0c5c52cc6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py
@@ -129,11 +129,9 @@ def __init__(self, config: CSVEnricherConfig, ctx: PipelineContext):
# Map from entity urn to a list of SubResourceRow.
self.editable_schema_metadata_map: Dict[str, List[SubResourceRow]] = {}
self.should_overwrite: bool = self.config.write_semantics == "OVERRIDE"
- if not self.should_overwrite and not self.ctx.graph:
- raise ConfigurationError(
- "With PATCH semantics, the csv-enricher source requires a datahub_api to connect to. "
- "Consider using the datahub-rest sink or provide a datahub_api: configuration on your ingestion recipe."
- )
+
+ if not self.should_overwrite:
+ self.ctx.require_graph(operation="The csv-enricher's PATCH semantics flag")
def get_resource_glossary_terms_work_unit(
self,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
index af9769bc9d94c..da1ea8ecb4678 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py
@@ -20,9 +20,8 @@
DBTCommonConfig,
DBTNode,
DBTSourceBase,
- DBTTest,
- DBTTestResult,
)
+from datahub.ingestion.source.dbt.dbt_tests import DBTTest, DBTTestResult
logger = logging.getLogger(__name__)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
index 0f5c08eb6ac54..48d2118a9b091 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
@@ -1,11 +1,10 @@
-import json
import logging
import re
from abc import abstractmethod
from dataclasses import dataclass, field
from datetime import datetime
from enum import auto
-from typing import Any, Callable, ClassVar, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple
import pydantic
from pydantic import root_validator, validator
@@ -34,6 +33,12 @@
from datahub.ingestion.api.source import MetadataWorkUnitProcessor
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
+from datahub.ingestion.source.dbt.dbt_tests import (
+ DBTTest,
+ DBTTestResult,
+ make_assertion_from_test,
+ make_assertion_result_from_test,
+)
from datahub.ingestion.source.sql.sql_types import (
ATHENA_SQL_TYPES_MAP,
BIGQUERY_TYPES_MAP,
@@ -81,20 +86,7 @@
TimeTypeClass,
)
from datahub.metadata.schema_classes import (
- AssertionInfoClass,
- AssertionResultClass,
- AssertionResultTypeClass,
- AssertionRunEventClass,
- AssertionRunStatusClass,
- AssertionStdAggregationClass,
- AssertionStdOperatorClass,
- AssertionStdParameterClass,
- AssertionStdParametersClass,
- AssertionStdParameterTypeClass,
- AssertionTypeClass,
DataPlatformInstanceClass,
- DatasetAssertionInfoClass,
- DatasetAssertionScopeClass,
DatasetPropertiesClass,
GlobalTagsClass,
GlossaryTermsClass,
@@ -551,134 +543,6 @@ def get_column_type(
return SchemaFieldDataType(type=TypeClass())
-@dataclass
-class AssertionParams:
- scope: Union[DatasetAssertionScopeClass, str]
- operator: Union[AssertionStdOperatorClass, str]
- aggregation: Union[AssertionStdAggregationClass, str]
- parameters: Optional[Callable[[Dict[str, str]], AssertionStdParametersClass]] = None
- logic_fn: Optional[Callable[[Dict[str, str]], Optional[str]]] = None
-
-
-def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
- """
- Try to produce a useful string for the name of a relationship constraint.
- Return None if we fail to
- """
- destination_ref = kw_args.get("to")
- source_ref = kw_args.get("model")
- column_name = kw_args.get("column_name")
- dest_field_name = kw_args.get("field")
- if not destination_ref or not source_ref or not column_name or not dest_field_name:
- # base assertions are violated, bail early
- return None
- m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref)
- if m:
- destination_table = m.group(1)
- else:
- destination_table = destination_ref
- m = re.search(r"ref\(\'(.*)\'\)", source_ref)
- if m:
- source_table = m.group(1)
- else:
- source_table = source_ref
- return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}"
-
-
-@dataclass
-class DBTTest:
- qualified_test_name: str
- column_name: Optional[str]
- kw_args: dict
-
- TEST_NAME_TO_ASSERTION_MAP: ClassVar[Dict[str, AssertionParams]] = {
- "not_null": AssertionParams(
- scope=DatasetAssertionScopeClass.DATASET_COLUMN,
- operator=AssertionStdOperatorClass.NOT_NULL,
- aggregation=AssertionStdAggregationClass.IDENTITY,
- ),
- "unique": AssertionParams(
- scope=DatasetAssertionScopeClass.DATASET_COLUMN,
- operator=AssertionStdOperatorClass.EQUAL_TO,
- aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION,
- parameters=lambda _: AssertionStdParametersClass(
- value=AssertionStdParameterClass(
- value="1.0",
- type=AssertionStdParameterTypeClass.NUMBER,
- )
- ),
- ),
- "accepted_values": AssertionParams(
- scope=DatasetAssertionScopeClass.DATASET_COLUMN,
- operator=AssertionStdOperatorClass.IN,
- aggregation=AssertionStdAggregationClass.IDENTITY,
- parameters=lambda kw_args: AssertionStdParametersClass(
- value=AssertionStdParameterClass(
- value=json.dumps(kw_args.get("values")),
- type=AssertionStdParameterTypeClass.SET,
- ),
- ),
- ),
- "relationships": AssertionParams(
- scope=DatasetAssertionScopeClass.DATASET_COLUMN,
- operator=AssertionStdOperatorClass._NATIVE_,
- aggregation=AssertionStdAggregationClass.IDENTITY,
- parameters=lambda kw_args: AssertionStdParametersClass(
- value=AssertionStdParameterClass(
- value=json.dumps(kw_args.get("values")),
- type=AssertionStdParameterTypeClass.SET,
- ),
- ),
- logic_fn=_get_name_for_relationship_test,
- ),
- "dbt_expectations.expect_column_values_to_not_be_null": AssertionParams(
- scope=DatasetAssertionScopeClass.DATASET_COLUMN,
- operator=AssertionStdOperatorClass.NOT_NULL,
- aggregation=AssertionStdAggregationClass.IDENTITY,
- ),
- "dbt_expectations.expect_column_values_to_be_between": AssertionParams(
- scope=DatasetAssertionScopeClass.DATASET_COLUMN,
- operator=AssertionStdOperatorClass.BETWEEN,
- aggregation=AssertionStdAggregationClass.IDENTITY,
- parameters=lambda x: AssertionStdParametersClass(
- minValue=AssertionStdParameterClass(
- value=str(x.get("min_value", "unknown")),
- type=AssertionStdParameterTypeClass.NUMBER,
- ),
- maxValue=AssertionStdParameterClass(
- value=str(x.get("max_value", "unknown")),
- type=AssertionStdParameterTypeClass.NUMBER,
- ),
- ),
- ),
- "dbt_expectations.expect_column_values_to_be_in_set": AssertionParams(
- scope=DatasetAssertionScopeClass.DATASET_COLUMN,
- operator=AssertionStdOperatorClass.IN,
- aggregation=AssertionStdAggregationClass.IDENTITY,
- parameters=lambda kw_args: AssertionStdParametersClass(
- value=AssertionStdParameterClass(
- value=json.dumps(kw_args.get("value_set")),
- type=AssertionStdParameterTypeClass.SET,
- ),
- ),
- ),
- }
-
-
-@dataclass
-class DBTTestResult:
- invocation_id: str
-
- status: str
- execution_time: datetime
-
- native_results: Dict[str, str]
-
-
-def string_map(input_map: Dict[str, Any]) -> Dict[str, str]:
- return {k: str(v) for k, v in input_map.items()}
-
-
@platform_name("dbt")
@config_class(DBTCommonConfig)
@support_status(SupportStatus.CERTIFIED)
@@ -750,7 +614,7 @@ def create_test_entity_mcps(
for upstream_urn in sorted(upstream_urns):
if self.config.entities_enabled.can_emit_node_type("test"):
- yield self._make_assertion_from_test(
+ yield make_assertion_from_test(
custom_props,
node,
assertion_urn,
@@ -759,133 +623,17 @@ def create_test_entity_mcps(
if node.test_result:
if self.config.entities_enabled.can_emit_test_results:
- yield self._make_assertion_result_from_test(
- node, assertion_urn, upstream_urn
+ yield make_assertion_result_from_test(
+ node,
+ assertion_urn,
+ upstream_urn,
+ test_warnings_are_errors=self.config.test_warnings_are_errors,
)
else:
logger.debug(
f"Skipping test result {node.name} emission since it is turned off."
)
- def _make_assertion_from_test(
- self,
- extra_custom_props: Dict[str, str],
- node: DBTNode,
- assertion_urn: str,
- upstream_urn: str,
- ) -> MetadataWorkUnit:
- assert node.test_info
- qualified_test_name = node.test_info.qualified_test_name
- column_name = node.test_info.column_name
- kw_args = node.test_info.kw_args
-
- if qualified_test_name in DBTTest.TEST_NAME_TO_ASSERTION_MAP:
- assertion_params = DBTTest.TEST_NAME_TO_ASSERTION_MAP[qualified_test_name]
- assertion_info = AssertionInfoClass(
- type=AssertionTypeClass.DATASET,
- customProperties=extra_custom_props,
- datasetAssertion=DatasetAssertionInfoClass(
- dataset=upstream_urn,
- scope=assertion_params.scope,
- operator=assertion_params.operator,
- fields=[
- mce_builder.make_schema_field_urn(upstream_urn, column_name)
- ]
- if (
- assertion_params.scope
- == DatasetAssertionScopeClass.DATASET_COLUMN
- and column_name
- )
- else [],
- nativeType=node.name,
- aggregation=assertion_params.aggregation,
- parameters=assertion_params.parameters(kw_args)
- if assertion_params.parameters
- else None,
- logic=assertion_params.logic_fn(kw_args)
- if assertion_params.logic_fn
- else None,
- nativeParameters=string_map(kw_args),
- ),
- )
- elif column_name:
- # no match with known test types, column-level test
- assertion_info = AssertionInfoClass(
- type=AssertionTypeClass.DATASET,
- customProperties=extra_custom_props,
- datasetAssertion=DatasetAssertionInfoClass(
- dataset=upstream_urn,
- scope=DatasetAssertionScopeClass.DATASET_COLUMN,
- operator=AssertionStdOperatorClass._NATIVE_,
- fields=[
- mce_builder.make_schema_field_urn(upstream_urn, column_name)
- ],
- nativeType=node.name,
- logic=node.compiled_code or node.raw_code,
- aggregation=AssertionStdAggregationClass._NATIVE_,
- nativeParameters=string_map(kw_args),
- ),
- )
- else:
- # no match with known test types, default to row-level test
- assertion_info = AssertionInfoClass(
- type=AssertionTypeClass.DATASET,
- customProperties=extra_custom_props,
- datasetAssertion=DatasetAssertionInfoClass(
- dataset=upstream_urn,
- scope=DatasetAssertionScopeClass.DATASET_ROWS,
- operator=AssertionStdOperatorClass._NATIVE_,
- logic=node.compiled_code or node.raw_code,
- nativeType=node.name,
- aggregation=AssertionStdAggregationClass._NATIVE_,
- nativeParameters=string_map(kw_args),
- ),
- )
-
- wu = MetadataChangeProposalWrapper(
- entityUrn=assertion_urn,
- aspect=assertion_info,
- ).as_workunit()
-
- return wu
-
- def _make_assertion_result_from_test(
- self,
- node: DBTNode,
- assertion_urn: str,
- upstream_urn: str,
- ) -> MetadataWorkUnit:
- assert node.test_result
- test_result = node.test_result
-
- assertionResult = AssertionRunEventClass(
- timestampMillis=int(test_result.execution_time.timestamp() * 1000.0),
- assertionUrn=assertion_urn,
- asserteeUrn=upstream_urn,
- runId=test_result.invocation_id,
- result=AssertionResultClass(
- type=AssertionResultTypeClass.SUCCESS
- if test_result.status == "pass"
- or (
- not self.config.test_warnings_are_errors
- and test_result.status == "warn"
- )
- else AssertionResultTypeClass.FAILURE,
- nativeResults=test_result.native_results,
- ),
- status=AssertionRunStatusClass.COMPLETE,
- )
-
- event = MetadataChangeProposalWrapper(
- entityUrn=assertion_urn,
- aspect=assertionResult,
- )
- wu = MetadataWorkUnit(
- id=f"{assertion_urn}-assertionRunEvent-{upstream_urn}",
- mcp=event,
- )
- return wu
-
@abstractmethod
def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]:
# return dbt nodes + global custom properties
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
index c08295ed1dc59..dc3a84847beb2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py
@@ -26,9 +26,8 @@
DBTNode,
DBTSourceBase,
DBTSourceReport,
- DBTTest,
- DBTTestResult,
)
+from datahub.ingestion.source.dbt.dbt_tests import DBTTest, DBTTestResult
logger = logging.getLogger(__name__)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py
new file mode 100644
index 0000000000000..721769d214d9e
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py
@@ -0,0 +1,261 @@
+import json
+import re
+from dataclasses import dataclass
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
+
+from datahub.emitter import mce_builder
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.metadata.schema_classes import (
+ AssertionInfoClass,
+ AssertionResultClass,
+ AssertionResultTypeClass,
+ AssertionRunEventClass,
+ AssertionRunStatusClass,
+ AssertionStdAggregationClass,
+ AssertionStdOperatorClass,
+ AssertionStdParameterClass,
+ AssertionStdParametersClass,
+ AssertionStdParameterTypeClass,
+ AssertionTypeClass,
+ DatasetAssertionInfoClass,
+ DatasetAssertionScopeClass,
+)
+
+if TYPE_CHECKING:
+ from datahub.ingestion.source.dbt.dbt_common import DBTNode
+
+
+@dataclass
+class DBTTest:
+ qualified_test_name: str
+ column_name: Optional[str]
+ kw_args: dict
+
+
+@dataclass
+class DBTTestResult:
+ invocation_id: str
+
+ status: str
+ execution_time: datetime
+
+ native_results: Dict[str, str]
+
+
+def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
+ """
+ Try to produce a useful string for the name of a relationship constraint.
+ Return None if we fail to
+ """
+ destination_ref = kw_args.get("to")
+ source_ref = kw_args.get("model")
+ column_name = kw_args.get("column_name")
+ dest_field_name = kw_args.get("field")
+ if not destination_ref or not source_ref or not column_name or not dest_field_name:
+ # base assertions are violated, bail early
+ return None
+ m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref)
+ if m:
+ destination_table = m.group(1)
+ else:
+ destination_table = destination_ref
+ m = re.search(r"ref\(\'(.*)\'\)", source_ref)
+ if m:
+ source_table = m.group(1)
+ else:
+ source_table = source_ref
+ return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}"
+
+
+@dataclass
+class AssertionParams:
+ scope: Union[DatasetAssertionScopeClass, str]
+ operator: Union[AssertionStdOperatorClass, str]
+ aggregation: Union[AssertionStdAggregationClass, str]
+ parameters: Optional[Callable[[Dict[str, str]], AssertionStdParametersClass]] = None
+ logic_fn: Optional[Callable[[Dict[str, str]], Optional[str]]] = None
+
+
+_DBT_TEST_NAME_TO_ASSERTION_MAP: Dict[str, AssertionParams] = {
+ "not_null": AssertionParams(
+ scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+ operator=AssertionStdOperatorClass.NOT_NULL,
+ aggregation=AssertionStdAggregationClass.IDENTITY,
+ ),
+ "unique": AssertionParams(
+ scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+ operator=AssertionStdOperatorClass.EQUAL_TO,
+ aggregation=AssertionStdAggregationClass.UNIQUE_PROPOTION,
+ parameters=lambda _: AssertionStdParametersClass(
+ value=AssertionStdParameterClass(
+ value="1.0",
+ type=AssertionStdParameterTypeClass.NUMBER,
+ )
+ ),
+ ),
+ "accepted_values": AssertionParams(
+ scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+ operator=AssertionStdOperatorClass.IN,
+ aggregation=AssertionStdAggregationClass.IDENTITY,
+ parameters=lambda kw_args: AssertionStdParametersClass(
+ value=AssertionStdParameterClass(
+ value=json.dumps(kw_args.get("values")),
+ type=AssertionStdParameterTypeClass.SET,
+ ),
+ ),
+ ),
+ "relationships": AssertionParams(
+ scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+ operator=AssertionStdOperatorClass._NATIVE_,
+ aggregation=AssertionStdAggregationClass.IDENTITY,
+ parameters=lambda kw_args: AssertionStdParametersClass(
+ value=AssertionStdParameterClass(
+ value=json.dumps(kw_args.get("values")),
+ type=AssertionStdParameterTypeClass.SET,
+ ),
+ ),
+ logic_fn=_get_name_for_relationship_test,
+ ),
+ "dbt_expectations.expect_column_values_to_not_be_null": AssertionParams(
+ scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+ operator=AssertionStdOperatorClass.NOT_NULL,
+ aggregation=AssertionStdAggregationClass.IDENTITY,
+ ),
+ "dbt_expectations.expect_column_values_to_be_between": AssertionParams(
+ scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+ operator=AssertionStdOperatorClass.BETWEEN,
+ aggregation=AssertionStdAggregationClass.IDENTITY,
+ parameters=lambda x: AssertionStdParametersClass(
+ minValue=AssertionStdParameterClass(
+ value=str(x.get("min_value", "unknown")),
+ type=AssertionStdParameterTypeClass.NUMBER,
+ ),
+ maxValue=AssertionStdParameterClass(
+ value=str(x.get("max_value", "unknown")),
+ type=AssertionStdParameterTypeClass.NUMBER,
+ ),
+ ),
+ ),
+ "dbt_expectations.expect_column_values_to_be_in_set": AssertionParams(
+ scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+ operator=AssertionStdOperatorClass.IN,
+ aggregation=AssertionStdAggregationClass.IDENTITY,
+ parameters=lambda kw_args: AssertionStdParametersClass(
+ value=AssertionStdParameterClass(
+ value=json.dumps(kw_args.get("value_set")),
+ type=AssertionStdParameterTypeClass.SET,
+ ),
+ ),
+ ),
+}
+
+
+def _string_map(input_map: Dict[str, Any]) -> Dict[str, str]:
+ return {k: str(v) for k, v in input_map.items()}
+
+
+def make_assertion_from_test(
+ extra_custom_props: Dict[str, str],
+ node: "DBTNode",
+ assertion_urn: str,
+ upstream_urn: str,
+) -> MetadataWorkUnit:
+ assert node.test_info
+ qualified_test_name = node.test_info.qualified_test_name
+ column_name = node.test_info.column_name
+ kw_args = node.test_info.kw_args
+
+ if qualified_test_name in _DBT_TEST_NAME_TO_ASSERTION_MAP:
+ assertion_params = _DBT_TEST_NAME_TO_ASSERTION_MAP[qualified_test_name]
+ assertion_info = AssertionInfoClass(
+ type=AssertionTypeClass.DATASET,
+ customProperties=extra_custom_props,
+ datasetAssertion=DatasetAssertionInfoClass(
+ dataset=upstream_urn,
+ scope=assertion_params.scope,
+ operator=assertion_params.operator,
+ fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)]
+ if (
+ assertion_params.scope == DatasetAssertionScopeClass.DATASET_COLUMN
+ and column_name
+ )
+ else [],
+ nativeType=node.name,
+ aggregation=assertion_params.aggregation,
+ parameters=assertion_params.parameters(kw_args)
+ if assertion_params.parameters
+ else None,
+ logic=assertion_params.logic_fn(kw_args)
+ if assertion_params.logic_fn
+ else None,
+ nativeParameters=_string_map(kw_args),
+ ),
+ )
+ elif column_name:
+ # no match with known test types, column-level test
+ assertion_info = AssertionInfoClass(
+ type=AssertionTypeClass.DATASET,
+ customProperties=extra_custom_props,
+ datasetAssertion=DatasetAssertionInfoClass(
+ dataset=upstream_urn,
+ scope=DatasetAssertionScopeClass.DATASET_COLUMN,
+ operator=AssertionStdOperatorClass._NATIVE_,
+ fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)],
+ nativeType=node.name,
+ logic=node.compiled_code or node.raw_code,
+ aggregation=AssertionStdAggregationClass._NATIVE_,
+ nativeParameters=_string_map(kw_args),
+ ),
+ )
+ else:
+ # no match with known test types, default to row-level test
+ assertion_info = AssertionInfoClass(
+ type=AssertionTypeClass.DATASET,
+ customProperties=extra_custom_props,
+ datasetAssertion=DatasetAssertionInfoClass(
+ dataset=upstream_urn,
+ scope=DatasetAssertionScopeClass.DATASET_ROWS,
+ operator=AssertionStdOperatorClass._NATIVE_,
+ logic=node.compiled_code or node.raw_code,
+ nativeType=node.name,
+ aggregation=AssertionStdAggregationClass._NATIVE_,
+ nativeParameters=_string_map(kw_args),
+ ),
+ )
+
+ return MetadataChangeProposalWrapper(
+ entityUrn=assertion_urn,
+ aspect=assertion_info,
+ ).as_workunit()
+
+
+def make_assertion_result_from_test(
+ node: "DBTNode",
+ assertion_urn: str,
+ upstream_urn: str,
+ test_warnings_are_errors: bool,
+) -> MetadataWorkUnit:
+ assert node.test_result
+ test_result = node.test_result
+
+ assertionResult = AssertionRunEventClass(
+ timestampMillis=int(test_result.execution_time.timestamp() * 1000.0),
+ assertionUrn=assertion_urn,
+ asserteeUrn=upstream_urn,
+ runId=test_result.invocation_id,
+ result=AssertionResultClass(
+ type=AssertionResultTypeClass.SUCCESS
+ if test_result.status == "pass"
+ or (not test_warnings_are_errors and test_result.status == "warn")
+ else AssertionResultTypeClass.FAILURE,
+ nativeResults=test_result.native_results,
+ ),
+ status=AssertionRunStatusClass.COMPLETE,
+ )
+
+ return MetadataChangeProposalWrapper(
+ entityUrn=assertion_urn,
+ aspect=assertionResult,
+ ).as_workunit()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index 89b1e45695c57..30c38720dd96c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -81,9 +81,6 @@
EnumTypeClass,
FineGrainedLineageClass,
GlobalTagsClass,
- OwnerClass,
- OwnershipClass,
- OwnershipTypeClass,
SchemaMetadataClass,
StatusClass,
SubTypesClass,
@@ -453,17 +450,9 @@ def _get_schema(
@staticmethod
def _get_tag_mce_for_urn(tag_urn: str) -> MetadataChangeEvent:
assert tag_urn in LookerUtil.tag_definitions
- ownership = OwnershipClass(
- owners=[
- OwnerClass(
- owner="urn:li:corpuser:datahub",
- type=OwnershipTypeClass.DATAOWNER,
- )
- ]
- )
return MetadataChangeEvent(
proposedSnapshot=TagSnapshotClass(
- urn=tag_urn, aspects=[ownership, LookerUtil.tag_definitions[tag_urn]]
+ urn=tag_urn, aspects=[LookerUtil.tag_definitions[tag_urn]]
)
)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
index 24275dcdff34d..8e18d85d6f3ca 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py
@@ -86,7 +86,7 @@ def get_batch_kwargs(
# Fixed-size sampling can be slower than equivalent fraction-based sampling
# as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations
sample_pc = 100 * self.config.profiling.sample_size / table.rows_count
- custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.3f})'
+ custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.8f})'
return {
**super().get_batch_kwargs(table, schema_name, db_name),
# Lowercase/Mixedcase table names in Snowflake do not work by default.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
index ba8655b83446d..a6a9d8e2c8597 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py
@@ -217,14 +217,15 @@ def _get_view_lineage_elements(
key = (lineage.dependent_view, lineage.dependent_schema)
# Append the source table to the list.
lineage_elements[key].append(
- mce_builder.make_dataset_urn(
- self.platform,
- self.get_identifier(
+ mce_builder.make_dataset_urn_with_platform_instance(
+ platform=self.platform,
+ name=self.get_identifier(
schema=lineage.source_schema,
entity=lineage.source_table,
inspector=inspector,
),
- self.config.env,
+ platform_instance=self.config.platform_instance,
+ env=self.config.env,
)
)
@@ -244,12 +245,13 @@ def _get_view_lineage_workunits(
dependent_view, dependent_schema = key
# Construct a lineage object.
- urn = mce_builder.make_dataset_urn(
- self.platform,
- self.get_identifier(
+ urn = mce_builder.make_dataset_urn_with_platform_instance(
+ platform=self.platform,
+ name=self.get_identifier(
schema=dependent_schema, entity=dependent_view, inspector=inspector
),
- self.config.env,
+ platform_instance=self.config.platform_instance,
+ env=self.config.env,
)
# use the mce_builder to ensure that the change proposal inherits
diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
index 81c43884fdf7d..349eb40a5e865 100644
--- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
+++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py
@@ -5,12 +5,13 @@
import logging
import pathlib
from collections import defaultdict
-from typing import Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
import pydantic.dataclasses
import sqlglot
import sqlglot.errors
import sqlglot.lineage
+import sqlglot.optimizer.annotate_types
import sqlglot.optimizer.qualify
import sqlglot.optimizer.qualify_columns
from pydantic import BaseModel
@@ -23,7 +24,17 @@
from datahub.ingestion.api.closeable import Closeable
from datahub.ingestion.graph.client import DataHubGraph
from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
-from datahub.metadata.schema_classes import OperationTypeClass, SchemaMetadataClass
+from datahub.metadata.schema_classes import (
+ ArrayTypeClass,
+ BooleanTypeClass,
+ DateTypeClass,
+ NumberTypeClass,
+ OperationTypeClass,
+ SchemaFieldDataTypeClass,
+ SchemaMetadataClass,
+ StringTypeClass,
+ TimeTypeClass,
+)
from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict
from datahub.utilities.urns.dataset_urn import DatasetUrn
@@ -90,8 +101,18 @@ def get_query_type_of_sql(expression: sqlglot.exp.Expression) -> QueryType:
return QueryType.UNKNOWN
+class _ParserBaseModel(
+ BaseModel,
+ arbitrary_types_allowed=True,
+ json_encoders={
+ SchemaFieldDataTypeClass: lambda v: v.to_obj(),
+ },
+):
+ pass
+
+
@functools.total_ordering
-class _FrozenModel(BaseModel, frozen=True):
+class _FrozenModel(_ParserBaseModel, frozen=True):
def __lt__(self, other: "_FrozenModel") -> bool:
for field in self.__fields__:
self_v = getattr(self, field)
@@ -146,29 +167,42 @@ class _ColumnRef(_FrozenModel):
column: str
-class ColumnRef(BaseModel):
+class ColumnRef(_ParserBaseModel):
table: Urn
column: str
-class _DownstreamColumnRef(BaseModel):
+class _DownstreamColumnRef(_ParserBaseModel):
table: Optional[_TableName]
column: str
+ column_type: Optional[sqlglot.exp.DataType]
-class DownstreamColumnRef(BaseModel):
+class DownstreamColumnRef(_ParserBaseModel):
table: Optional[Urn]
column: str
+ column_type: Optional[SchemaFieldDataTypeClass]
+ native_column_type: Optional[str]
+
+ @pydantic.validator("column_type", pre=True)
+ def _load_column_type(
+ cls, v: Optional[Union[dict, SchemaFieldDataTypeClass]]
+ ) -> Optional[SchemaFieldDataTypeClass]:
+ if v is None:
+ return None
+ if isinstance(v, SchemaFieldDataTypeClass):
+ return v
+ return SchemaFieldDataTypeClass.from_obj(v)
-class _ColumnLineageInfo(BaseModel):
+class _ColumnLineageInfo(_ParserBaseModel):
downstream: _DownstreamColumnRef
upstreams: List[_ColumnRef]
logic: Optional[str]
-class ColumnLineageInfo(BaseModel):
+class ColumnLineageInfo(_ParserBaseModel):
downstream: DownstreamColumnRef
upstreams: List[ColumnRef]
@@ -176,7 +210,7 @@ class ColumnLineageInfo(BaseModel):
logic: Optional[str] = pydantic.Field(default=None, exclude=True)
-class SqlParsingDebugInfo(BaseModel, arbitrary_types_allowed=True):
+class SqlParsingDebugInfo(_ParserBaseModel):
confidence: float = 0.0
tables_discovered: int = 0
@@ -190,7 +224,7 @@ def error(self) -> Optional[Exception]:
return self.table_error or self.column_error
-class SqlParsingResult(BaseModel):
+class SqlParsingResult(_ParserBaseModel):
query_type: QueryType = QueryType.UNKNOWN
in_tables: List[Urn]
@@ -541,6 +575,15 @@ def _schema_aware_fuzzy_column_resolve(
) from e
logger.debug("Qualified sql %s", statement.sql(pretty=True, dialect=dialect))
+ # Try to figure out the types of the output columns.
+ try:
+ statement = sqlglot.optimizer.annotate_types.annotate_types(
+ statement, schema=sqlglot_db_schema
+ )
+ except sqlglot.errors.OptimizeError as e:
+ # This is not a fatal error, so we can continue.
+ logger.debug("sqlglot failed to annotate types: %s", e)
+
column_lineage = []
try:
@@ -553,7 +596,6 @@ def _schema_aware_fuzzy_column_resolve(
logger.debug("output columns: %s", [col[0] for col in output_columns])
output_col: str
for output_col, original_col_expression in output_columns:
- # print(f"output column: {output_col}")
if output_col == "*":
# If schema information is available, the * will be expanded to the actual columns.
# Otherwise, we can't process it.
@@ -613,12 +655,19 @@ def _schema_aware_fuzzy_column_resolve(
output_col = _schema_aware_fuzzy_column_resolve(output_table, output_col)
+ # Guess the output column type.
+ output_col_type = None
+ if original_col_expression.type:
+ output_col_type = original_col_expression.type
+
if not direct_col_upstreams:
logger.debug(f' "{output_col}" has no upstreams')
column_lineage.append(
_ColumnLineageInfo(
downstream=_DownstreamColumnRef(
- table=output_table, column=output_col
+ table=output_table,
+ column=output_col,
+ column_type=output_col_type,
),
upstreams=sorted(direct_col_upstreams),
# logic=column_logic.sql(pretty=True, dialect=dialect),
@@ -673,6 +722,42 @@ def _try_extract_select(
return statement
+def _translate_sqlglot_type(
+ sqlglot_type: sqlglot.exp.DataType.Type,
+) -> Optional[SchemaFieldDataTypeClass]:
+ TypeClass: Any
+ if sqlglot_type in sqlglot.exp.DataType.TEXT_TYPES:
+ TypeClass = StringTypeClass
+ elif sqlglot_type in sqlglot.exp.DataType.NUMERIC_TYPES or sqlglot_type in {
+ sqlglot.exp.DataType.Type.DECIMAL,
+ }:
+ TypeClass = NumberTypeClass
+ elif sqlglot_type in {
+ sqlglot.exp.DataType.Type.BOOLEAN,
+ sqlglot.exp.DataType.Type.BIT,
+ }:
+ TypeClass = BooleanTypeClass
+ elif sqlglot_type in {
+ sqlglot.exp.DataType.Type.DATE,
+ }:
+ TypeClass = DateTypeClass
+ elif sqlglot_type in sqlglot.exp.DataType.TEMPORAL_TYPES:
+ TypeClass = TimeTypeClass
+ elif sqlglot_type in {
+ sqlglot.exp.DataType.Type.ARRAY,
+ }:
+ TypeClass = ArrayTypeClass
+ elif sqlglot_type in {
+ sqlglot.exp.DataType.Type.UNKNOWN,
+ }:
+ return None
+ else:
+ logger.debug("Unknown sqlglot type: %s", sqlglot_type)
+ return None
+
+ return SchemaFieldDataTypeClass(type=TypeClass())
+
+
def _translate_internal_column_lineage(
table_name_urn_mapping: Dict[_TableName, str],
raw_column_lineage: _ColumnLineageInfo,
@@ -684,6 +769,16 @@ def _translate_internal_column_lineage(
downstream=DownstreamColumnRef(
table=downstream_urn,
column=raw_column_lineage.downstream.column,
+ column_type=_translate_sqlglot_type(
+ raw_column_lineage.downstream.column_type.this
+ )
+ if raw_column_lineage.downstream.column_type
+ else None,
+ native_column_type=raw_column_lineage.downstream.column_type.sql()
+ if raw_column_lineage.downstream.column_type
+ and raw_column_lineage.downstream.column_type.this
+ != sqlglot.exp.DataType.Type.UNKNOWN
+ else None,
),
upstreams=[
ColumnRef(
diff --git a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
index dee85b40bb7a8..1da42b94e320c 100644
--- a/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_looker_mces.json
@@ -533,20 +533,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -566,20 +552,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -599,20 +571,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
index 72db36e63daf7..685a606a57c33 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_allow_ingest.json
@@ -327,20 +327,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -360,20 +346,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -393,20 +365,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
index e5508bdb06b9e..069788cb088ac 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_external_project_view_mces.json
@@ -327,20 +327,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -360,20 +346,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -393,20 +365,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
index b0f66e7b245c9..f1c932ebd5a70 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_file_path_ingest.json
@@ -335,20 +335,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -369,20 +355,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -403,20 +375,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
index 91e13debfa028..9521c9af4bbdc 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json
@@ -550,20 +550,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -583,20 +569,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -616,20 +588,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
index e93079119e4f4..dbacd52fe83de 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest.json
@@ -327,20 +327,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -360,20 +346,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -393,20 +365,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
index a9c8efa7cdb98..aaa874d9ff348 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_joins.json
@@ -351,20 +351,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -384,20 +370,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -417,20 +389,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
index edd15624a14cd..be8db0722aea3 100644
--- a/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
+++ b/metadata-ingestion/tests/integration/looker/golden_test_ingest_unaliased_joins.json
@@ -343,20 +343,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -376,20 +362,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -409,20 +381,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
index aebc89b609a08..05b74f163ad45 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_golden_deleted_stateful.json
@@ -327,20 +327,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -360,20 +346,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -393,20 +365,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
index 34bded3cf691e..0778aa0050b00 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
@@ -279,20 +279,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -312,20 +298,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -345,20 +317,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
index 238f4c2580cdf..5a0bd4e12fd3a 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json
@@ -2121,20 +2121,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -2154,20 +2140,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -2187,20 +2159,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
index 45d5d839e9d21..1b0ee3216383c 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json
@@ -2121,20 +2121,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -2154,20 +2140,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -2187,20 +2159,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
index 187cedaefb6b2..b960ba581e6b5 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json
@@ -2004,20 +2004,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -2037,20 +2023,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -2070,20 +2042,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
index c2c879e38f37b..e29292a44c949 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json
@@ -2121,20 +2121,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -2154,20 +2140,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -2187,20 +2159,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
index c1ac54b0fb588..04ecaecbd4afb 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json
@@ -584,20 +584,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -617,20 +603,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -650,20 +622,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
index f602ca37b3160..080931ae637bc 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json
@@ -2121,20 +2121,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -2154,20 +2140,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -2187,20 +2159,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
index 104bd365669e3..5826c4316b539 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json
@@ -2134,20 +2134,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -2167,20 +2153,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -2200,20 +2172,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
index 37a6c94c6952e..53d1ec0229de1 100644
--- a/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
+++ b/metadata-ingestion/tests/integration/lookml/lookml_reachable_views.json
@@ -681,20 +681,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Dimension",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Dimension",
@@ -714,20 +700,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Temporal",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Temporal",
@@ -747,20 +719,6 @@
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
"urn": "urn:li:tag:Measure",
"aspects": [
- {
- "com.linkedin.pegasus2avro.common.Ownership": {
- "owners": [
- {
- "owner": "urn:li:corpuser:datahub",
- "type": "DATAOWNER"
- }
- ],
- "lastModified": {
- "time": 0,
- "actor": "urn:li:corpuser:unknown"
- }
- }
- },
{
"com.linkedin.pegasus2avro.tag.TagProperties": {
"name": "Measure",
diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
index e3cc6c8101650..b6cb578217a2c 100644
--- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
+++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py
@@ -17,7 +17,6 @@
)
from datahub.ingestion.source.powerbi.m_query import parser, resolver, tree_function
from datahub.ingestion.source.powerbi.m_query.resolver import DataPlatformTable, Lineage
-from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef
pytestmark = pytest.mark.integration_batch_2
@@ -742,75 +741,25 @@ def test_sqlglot_parser():
== "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.operations_analytics.transformed_prod.v_sme_unit_targets,PROD)"
)
- assert lineage[0].column_lineage == [
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="client_director"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="tier"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column='upper("manager")'),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="team_type"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="date_target"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="monthid"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="target_team"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="seller_email"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="agent_key"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="sme_quota"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="revenue_quota"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="service_quota"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="bl_target"),
- upstreams=[],
- logic=None,
- ),
- ColumnLineageInfo(
- downstream=DownstreamColumnRef(table=None, column="software_quota"),
- upstreams=[],
- logic=None,
- ),
+ # TODO: None of these columns have upstreams?
+ # That doesn't seem right - we probably need to add fake schemas for the two tables above.
+ cols = [
+ "client_director",
+ "tier",
+ 'upper("manager")',
+ "team_type",
+ "date_target",
+ "monthid",
+ "target_team",
+ "seller_email",
+ "agent_key",
+ "sme_quota",
+ "revenue_quota",
+ "service_quota",
+ "bl_target",
+ "software_quota",
]
+ for i, column in enumerate(cols):
+ assert lineage[0].column_lineage[i].downstream.table is None
+ assert lineage[0].column_lineage[i].downstream.column == column
+ assert lineage[0].column_lineage[i].upstreams == []
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
index e50d944ce72e3..f0175b4dc8892 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_create_view_with_cte.json
@@ -12,7 +12,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
- "column": "col5"
+ "column": "col5",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -24,7 +30,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
- "column": "col1"
+ "column": "col1",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -36,7 +48,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
- "column": "col2"
+ "column": "col2",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -48,7 +66,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-proj-2.dataset.my_view,PROD)",
- "column": "col3"
+ "column": "col3",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
index 78591286feb50..b7df5444987f2 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_from_sharded_table_wildcard.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "col1"
+ "column": "col1",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -20,7 +26,13 @@
{
"downstream": {
"table": null,
- "column": "col2"
+ "column": "col2",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
index 0e93d31fbb6a6..67e306bebf545 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_nested_subqueries.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "col1"
+ "column": "col1",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -20,7 +26,13 @@
{
"downstream": {
"table": null,
- "column": "col2"
+ "column": "col2",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
index 78591286feb50..b7df5444987f2 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_sharded_table_normalization.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "col1"
+ "column": "col1",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -20,7 +26,13 @@
{
"downstream": {
"table": null,
- "column": "col2"
+ "column": "col2",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
index 17a801a63e3ff..b393b2445d6c4 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_star_with_replace.json
@@ -10,7 +10,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
- "column": "col1"
+ "column": "col1",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -22,7 +28,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
- "column": "col2"
+ "column": "col2",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -34,7 +46,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my-project.my-dataset.test_table,PROD)",
- "column": "something"
+ "column": "something",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
index fd8a586ac74ac..53fb94300e804 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_bigquery_view_from_union.json
@@ -11,7 +11,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_view,PROD)",
- "column": "col1"
+ "column": "col1",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -27,7 +33,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_view,PROD)",
- "column": "col2"
+ "column": "col2",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
index 1ca56840531e4..ff452467aa5bd 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_create_view_as_select.json
@@ -10,7 +10,9 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
- "column": "Department"
+ "column": "Department",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
@@ -22,14 +24,22 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
- "column": "Employees"
+ "column": "Employees",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "BIGINT"
},
"upstreams": []
},
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:oracle,vsal,PROD)",
- "column": "Salary"
+ "column": "Salary",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
index e241bdd08e243..eecb2265eaec5 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_expand_select_star_basic.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "total_agg"
+ "column": "total_agg",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DOUBLE"
},
"upstreams": [
{
@@ -20,7 +26,13 @@
{
"downstream": {
"table": null,
- "column": "orderkey"
+ "column": "orderkey",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL"
},
"upstreams": [
{
@@ -32,7 +44,13 @@
{
"downstream": {
"table": null,
- "column": "custkey"
+ "column": "custkey",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL"
},
"upstreams": [
{
@@ -44,7 +62,13 @@
{
"downstream": {
"table": null,
- "column": "orderstatus"
+ "column": "orderstatus",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -56,7 +80,13 @@
{
"downstream": {
"table": null,
- "column": "totalprice"
+ "column": "totalprice",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "FLOAT"
},
"upstreams": [
{
@@ -68,7 +98,13 @@
{
"downstream": {
"table": null,
- "column": "orderdate"
+ "column": "orderdate",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.DateType": {}
+ }
+ },
+ "native_column_type": "DATE"
},
"upstreams": [
{
@@ -80,7 +116,13 @@
{
"downstream": {
"table": null,
- "column": "orderpriority"
+ "column": "orderpriority",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -92,7 +134,13 @@
{
"downstream": {
"table": null,
- "column": "clerk"
+ "column": "clerk",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
@@ -104,7 +152,13 @@
{
"downstream": {
"table": null,
- "column": "shippriority"
+ "column": "shippriority",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL"
},
"upstreams": [
{
@@ -116,7 +170,13 @@
{
"downstream": {
"table": null,
- "column": "comment"
+ "column": "comment",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "TEXT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
index d7264fd2db6b2..326db47e7ab33 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_insert_as_select.json
@@ -18,21 +18,27 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
- "column": "i_item_desc"
+ "column": "i_item_desc",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": []
},
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
- "column": "w_warehouse_name"
+ "column": "w_warehouse_name",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": []
},
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
- "column": "d_week_seq"
+ "column": "d_week_seq",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
@@ -44,7 +50,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
- "column": "no_promo"
+ "column": "no_promo",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "BIGINT"
},
"upstreams": [
{
@@ -56,7 +68,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
- "column": "promo"
+ "column": "promo",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "BIGINT"
},
"upstreams": [
{
@@ -68,7 +86,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:hive,query72,PROD)",
- "column": "total_cnt"
+ "column": "total_cnt",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "BIGINT"
},
"upstreams": []
}
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
index 10f5ee20b0c1f..b5fd5eebeb1b1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_ambiguous_column_no_schema.json
@@ -9,21 +9,27 @@
{
"downstream": {
"table": null,
- "column": "a"
+ "column": "a",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": []
},
{
"downstream": {
"table": null,
- "column": "b"
+ "column": "b",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": []
},
{
"downstream": {
"table": null,
- "column": "c"
+ "column": "c",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": []
}
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
index 9f6eeae46c294..a67c944822138 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_count.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "COUNT(`fact_complaint_snapshot`.`etl_data_dt_id`)"
+ "column": "COUNT(`fact_complaint_snapshot`.`etl_data_dt_id`)",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "BIGINT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
index 109de96180422..5ad847e252497 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_struct_subfields.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "post_id"
+ "column": "post_id",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL"
},
"upstreams": [
{
@@ -20,7 +26,9 @@
{
"downstream": {
"table": null,
- "column": "id"
+ "column": "id",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
@@ -32,7 +40,9 @@
{
"downstream": {
"table": null,
- "column": "min_metric"
+ "column": "min_metric",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
index 2340b2e95b0d0..902aa010c8afc 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_from_union.json
@@ -9,14 +9,26 @@
{
"downstream": {
"table": null,
- "column": "label"
+ "column": "label",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "VARCHAR"
},
"upstreams": []
},
{
"downstream": {
"table": null,
- "column": "total_agg"
+ "column": "total_agg",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DOUBLE"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
index 326c07d332c26..6ea88f45847ce 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_max.json
@@ -8,7 +8,9 @@
{
"downstream": {
"table": null,
- "column": "max_col"
+ "column": "max_col",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
index 3e02314d6e8c3..67e9fd2d21a0e 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_ctes.json
@@ -9,7 +9,9 @@
{
"downstream": {
"table": null,
- "column": "COL1"
+ "column": "COL1",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
@@ -21,7 +23,9 @@
{
"downstream": {
"table": null,
- "column": "COL3"
+ "column": "COL3",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
index c12ad23b2f03b..6ee3d2e61c39b 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_select_with_full_col_name.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "post_id"
+ "column": "post_id",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL"
},
"upstreams": [
{
@@ -20,7 +26,9 @@
{
"downstream": {
"table": null,
- "column": "id"
+ "column": "id",
+ "column_type": null,
+ "native_column_type": null
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
index 64cd80e9a2d69..a876824127ec1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_case_statement.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "total_price_category"
+ "column": "total_price_category",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "VARCHAR"
},
"upstreams": [
{
@@ -20,7 +26,13 @@
{
"downstream": {
"table": null,
- "column": "total_price_success"
+ "column": "total_price_success",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "FLOAT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json
new file mode 100644
index 0000000000000..7545e2b3269dc
--- /dev/null
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_cast.json
@@ -0,0 +1,63 @@
+{
+ "query_type": "SELECT",
+ "in_tables": [
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)"
+ ],
+ "out_tables": [],
+ "column_lineage": [
+ {
+ "downstream": {
+ "table": null,
+ "column": "orderkey",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL(20, 0)"
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+ "column": "o_orderkey"
+ }
+ ]
+ },
+ {
+ "downstream": {
+ "table": null,
+ "column": "total_cast_int",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "INT"
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+ "column": "o_totalprice"
+ }
+ ]
+ },
+ {
+ "downstream": {
+ "table": null,
+ "column": "total_cast_float",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL(16, 4)"
+ },
+ "upstreams": [
+ {
+ "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)",
+ "column": "o_totalprice"
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
index 7b22a46757e39..84e6b053000f1 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_column_normalization.json
@@ -8,7 +8,13 @@
{
"downstream": {
"table": null,
- "column": "total_agg"
+ "column": "total_agg",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DOUBLE"
},
"upstreams": [
{
@@ -20,7 +26,13 @@
{
"downstream": {
"table": null,
- "column": "total_avg"
+ "column": "total_avg",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DOUBLE"
},
"upstreams": [
{
@@ -32,7 +44,13 @@
{
"downstream": {
"table": null,
- "column": "total_min"
+ "column": "total_min",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "FLOAT"
},
"upstreams": [
{
@@ -44,7 +62,13 @@
{
"downstream": {
"table": null,
- "column": "total_max"
+ "column": "total_max",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "FLOAT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
index c912d99a3a8a3..39c94cf83c561 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_ctas_column_normalization.json
@@ -10,7 +10,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
- "column": "Total_Agg"
+ "column": "Total_Agg",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DOUBLE"
},
"upstreams": [
{
@@ -22,7 +28,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
- "column": "total_avg"
+ "column": "total_avg",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DOUBLE"
},
"upstreams": [
{
@@ -34,7 +46,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
- "column": "TOTAL_MIN"
+ "column": "TOTAL_MIN",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "FLOAT"
},
"upstreams": [
{
@@ -46,7 +64,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders_normalized,PROD)",
- "column": "total_max"
+ "column": "total_max",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "FLOAT"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
index 2af308ec60623..dbf5b1b9a4453 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
+++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_default_normalization.json
@@ -11,7 +11,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
- "column": "user_fk"
+ "column": "user_fk",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL(38, 0)"
},
"upstreams": [
{
@@ -23,7 +29,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
- "column": "email"
+ "column": "email",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.StringType": {}
+ }
+ },
+ "native_column_type": "VARCHAR(16777216)"
},
"upstreams": [
{
@@ -35,7 +47,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
- "column": "last_purchase_date"
+ "column": "last_purchase_date",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.DateType": {}
+ }
+ },
+ "native_column_type": "DATE"
},
"upstreams": [
{
@@ -47,7 +65,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
- "column": "lifetime_purchase_amount"
+ "column": "lifetime_purchase_amount",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL"
},
"upstreams": [
{
@@ -59,7 +83,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
- "column": "lifetime_purchase_count"
+ "column": "lifetime_purchase_count",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "BIGINT"
},
"upstreams": [
{
@@ -71,7 +101,13 @@
{
"downstream": {
"table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,long_tail_companions.analytics.active_customer_ltv,PROD)",
- "column": "average_purchase_amount"
+ "column": "average_purchase_amount",
+ "column_type": {
+ "type": {
+ "com.linkedin.pegasus2avro.schema.NumberType": {}
+ }
+ },
+ "native_column_type": "DECIMAL"
},
"upstreams": [
{
diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
index 2a965a9bb1e61..bb6e5f1581754 100644
--- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
+++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py
@@ -608,4 +608,25 @@ def test_snowflake_default_normalization():
)
+def test_snowflake_column_cast():
+ assert_sql_result(
+ """
+SELECT
+ o.o_orderkey::NUMBER(20,0) as orderkey,
+ CAST(o.o_totalprice AS INT) as total_cast_int,
+ CAST(o.o_totalprice AS NUMBER(16,4)) as total_cast_float
+FROM snowflake_sample_data.tpch_sf1.orders o
+LIMIT 10
+""",
+ dialect="snowflake",
+ schemas={
+ "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpch_sf1.orders,PROD)": {
+ "orderkey": "NUMBER(38,0)",
+ "totalprice": "NUMBER(12,2)",
+ },
+ },
+ expected_file=RESOURCE_DIR / "test_snowflake_column_cast.json",
+ )
+
+
# TODO: Add a test for setting platform_instance or env