Skip to content

Commit

Permalink
feat(ingest/dbt): speed up test result only ingestion
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 committed Feb 6, 2024
1 parent 11f7804 commit 3b77780
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
17 changes: 13 additions & 4 deletions metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,17 +184,18 @@ def process_only_directive(cls, values):

return values

def can_emit_node_type(self, node_type: str) -> bool:
def _node_type_allow_map(self):
# Node type comes from dbt's node types.

node_type_allow_map = {
return {
"model": self.models,
"source": self.sources,
"seed": self.seeds,
"snapshot": self.snapshots,
"test": self.test_definitions,
}
allowed = node_type_allow_map.get(node_type)

def can_emit_node_type(self, node_type: str) -> bool:
allowed = self._node_type_allow_map().get(node_type)
if allowed is None:
return False

Expand All @@ -204,6 +205,11 @@ def can_emit_node_type(self, node_type: str) -> bool:
def can_emit_test_results(self) -> bool:
return self.test_results == EmitDirective.YES

def is_only_test_results(self) -> bool:
return self.test_results == EmitDirective.YES and all(
v == EmitDirective.NO for v in self._node_type_allow_map().values()
)


class DBTCommonConfig(
StatefulIngestionConfigBase, DatasetSourceConfigMixin, LineageConfig
Expand Down Expand Up @@ -877,6 +883,9 @@ def _infer_schemas_and_update_cll(self, all_nodes_map: Dict[str, DBTNode]) -> No
5. If we haven't already added the node's schema to the schema resolver, do that.
"""

if self.config.entities_enabled.is_only_test_results():
# If we're not emitting any other entities, so there's no need to infer schemas.
return
if not self.config.infer_dbt_schemas:
if self.config.include_column_lineage:
raise ConfigurationError(
Expand Down
4 changes: 4 additions & 0 deletions metadata-ingestion/tests/unit/test_dbt_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ def test_dbt_entity_emission_configuration_helpers():
assert not config.entities_enabled.can_emit_node_type("source")
assert not config.entities_enabled.can_emit_node_type("test")
assert not config.entities_enabled.can_emit_test_results
assert not config.entities_enabled.is_only_test_results()

config_dict = {
"manifest_path": "dummy_path",
Expand All @@ -267,6 +268,7 @@ def test_dbt_entity_emission_configuration_helpers():
assert config.entities_enabled.can_emit_node_type("source")
assert config.entities_enabled.can_emit_node_type("test")
assert config.entities_enabled.can_emit_test_results
assert not config.entities_enabled.is_only_test_results()

config_dict = {
"manifest_path": "dummy_path",
Expand All @@ -281,6 +283,7 @@ def test_dbt_entity_emission_configuration_helpers():
assert not config.entities_enabled.can_emit_node_type("source")
assert not config.entities_enabled.can_emit_node_type("test")
assert config.entities_enabled.can_emit_test_results
assert config.entities_enabled.is_only_test_results()

config_dict = {
"manifest_path": "dummy_path",
Expand All @@ -298,6 +301,7 @@ def test_dbt_entity_emission_configuration_helpers():
assert not config.entities_enabled.can_emit_node_type("source")
assert config.entities_enabled.can_emit_node_type("test")
assert config.entities_enabled.can_emit_test_results
assert not config.entities_enabled.is_only_test_results()


def test_dbt_cloud_config_access_url():
Expand Down

0 comments on commit 3b77780

Please sign in to comment.