Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingest/dbt): support use_compiled_code and test_warnings_are_errors #8956

Merged
merged 2 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pydantic.fields import Field

from datahub.configuration.common import ConfigModel, ConfigurationError
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.metadata.schema_classes import FabricTypeClass

DEFAULT_ENV = FabricTypeClass.PROD
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
import warnings
from typing import Optional, Type
from typing import Any, Optional, Type

import pydantic

from datahub.configuration.common import ConfigurationWarning
from datahub.utilities.global_warning_util import add_global_warning

_unset = object()

def pydantic_field_deprecated(field: str, message: Optional[str] = None) -> classmethod:

def pydantic_field_deprecated(
field: str,
warn_if_value_is_not: Any = _unset,
message: Optional[str] = None,
) -> classmethod:
if message:
output = message
else:
output = f"{field} is deprecated and will be removed in a future release. Please remove it from your config."

def _validate_deprecated(cls: Type, values: dict) -> dict:
if field in values:
if field in values and (
warn_if_value_is_not is _unset or values[field] != warn_if_value_is_not
):
add_global_warning(output)
warnings.warn(output, ConfigurationWarning, stacklevel=2)
return values
Expand Down
41 changes: 31 additions & 10 deletions metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
ConfigurationError,
LineageConfig,
)
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
Expand Down Expand Up @@ -214,7 +214,9 @@ class DBTCommonConfig(
default=False,
description="Use model identifier instead of model name if defined (if not, default to model name).",
)
_deprecate_use_identifiers = pydantic_field_deprecated("use_identifiers")
_deprecate_use_identifiers = pydantic_field_deprecated(
"use_identifiers", warn_if_value_is_not=False
)

entities_enabled: DBTEntitiesEnabled = Field(
DBTEntitiesEnabled(),
Expand Down Expand Up @@ -278,6 +280,14 @@ class DBTCommonConfig(
description="When enabled, converts column URNs to lowercase to ensure cross-platform compatibility. "
"If `target_platform` is Snowflake, the default is True.",
)
use_compiled_code: bool = Field(
default=False,
description="When enabled, uses the compiled dbt code instead of the raw dbt node definition.",
)
test_warnings_are_errors: bool = Field(
default=False,
description="When enabled, dbt test warnings will be treated as failures.",
)

@validator("target_platform")
def validate_target_platform_value(cls, target_platform: str) -> str:
Expand Down Expand Up @@ -807,7 +817,7 @@ def _make_assertion_from_test(
mce_builder.make_schema_field_urn(upstream_urn, column_name)
],
nativeType=node.name,
logic=node.compiled_code if node.compiled_code else node.raw_code,
logic=node.compiled_code or node.raw_code,
aggregation=AssertionStdAggregationClass._NATIVE_,
nativeParameters=string_map(kw_args),
),
Expand All @@ -821,7 +831,7 @@ def _make_assertion_from_test(
dataset=upstream_urn,
scope=DatasetAssertionScopeClass.DATASET_ROWS,
operator=AssertionStdOperatorClass._NATIVE_,
logic=node.compiled_code if node.compiled_code else node.raw_code,
logic=node.compiled_code or node.raw_code,
nativeType=node.name,
aggregation=AssertionStdAggregationClass._NATIVE_,
nativeParameters=string_map(kw_args),
Expand Down Expand Up @@ -852,6 +862,10 @@ def _make_assertion_result_from_test(
result=AssertionResultClass(
type=AssertionResultTypeClass.SUCCESS
if test_result.status == "pass"
or (
not self.config.test_warnings_are_errors
and test_result.status == "warn"
)
else AssertionResultTypeClass.FAILURE,
nativeResults=test_result.native_results,
),
Expand Down Expand Up @@ -1003,8 +1017,8 @@ def create_platform_mces(
aspects.append(upstream_lineage_class)

# add view properties aspect
if node.raw_code and node.language == "sql":
view_prop_aspect = self._create_view_properties_aspect(node)
view_prop_aspect = self._create_view_properties_aspect(node)
if view_prop_aspect:
aspects.append(view_prop_aspect)

# emit subtype mcp
Expand Down Expand Up @@ -1129,14 +1143,21 @@ def _create_dataset_properties_aspect(
def get_external_url(self, node: DBTNode) -> Optional[str]:
pass

def _create_view_properties_aspect(self, node: DBTNode) -> ViewPropertiesClass:
def _create_view_properties_aspect(
self, node: DBTNode
) -> Optional[ViewPropertiesClass]:
view_logic = (
node.compiled_code if self.config.use_compiled_code else node.raw_code
)

if node.language != "sql" or not view_logic:
return None

materialized = node.materialization in {"table", "incremental", "snapshot"}
# this function is only called when raw sql is present. assert is added to satisfy lint checks
assert node.raw_code is not None
view_properties = ViewPropertiesClass(
materialized=materialized,
viewLanguage="SQL",
viewLogic=node.raw_code,
viewLogic=view_logic,
)
return view_properties

Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/ingestion/source/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pydantic.fields import Field

from datahub.configuration.common import ConfigEnum, ConfigModel, ConfigurationError
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

import datahub.emitter.mce_builder as builder
from datahub.configuration.common import AllowDenyPattern, ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.common.subtypes import BIAssetSubTypes
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StaleEntityRemovalSourceReport,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

from datahub.configuration import ConfigModel
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.data_lake_common.path_spec import PathSpec
from datahub.ingestion.source.sql.postgres import BasePostgresConfig
from datahub.ingestion.source.state.stateful_ingestion_base import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from pydantic.fields import Field

from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
from sqlalchemy.types import BOOLEAN, DATE, DATETIME, INTEGER

import datahub.emitter.mce_builder as builder
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetLineageProviderConfigBase
from datahub.configuration.time_window_config import BaseTimeWindowConfig
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter import mce_builder
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.decorators import (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from pydantic import Field

from datahub.configuration.common import AllowDenyPattern, ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig
from datahub.ingestion.source.state.stale_entity_removal_handler import (
StatefulStaleMetadataRemovalConfig,
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/ingestion/source/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@
ConfigModel,
ConfigurationError,
)
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.source_common import (
DatasetLineageProviderConfigBase,
DatasetSourceConfigMixin,
)
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_builder import (
ContainerKey,
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/tests/unit/test_pydantic_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pydantic import ValidationError

from datahub.configuration.common import ConfigModel
from datahub.configuration.pydantic_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
from datahub.configuration.validate_field_removal import pydantic_removed_field
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.utilities.global_warning_util import get_global_warnings
Expand Down
Loading