dbt-labs · mikealfare · May 15, 2024 · May 15, 2024 · May 16, 2024 · May 16, 2024
@@ -146,21 +146,40 @@ def list_relations_without_caching(
         relations = []
         quote_policy = {"database": True, "schema": True, "identifier": True}
 
-        columns = ["database_name", "schema_name", "name", "kind"]
-        for _database, _schema, _identifier, _type in results.select(columns):
-            try:
-                _type = self.Relation.get_relation_type(_type.lower())
-            except ValueError:
-                _type = self.Relation.External
-            relations.append(
-                self.Relation.create(
-                    database=_database,
-                    schema=_schema,
-                    identifier=_identifier,
-                    quote_policy=quote_policy,
-                    type=_type,
+        if "is_dynamic" in results.column_names:
+            columns = ["database_name", "schema_name", "name", "kind", "is_dynamic"]
+            for _database, _schema, _identifier, _type, is_dynamic in results.select(columns):
+                try:
+                    _type = self.Relation.get_relation_type(_type.lower())
+                    if _type == self.Relation.Table and is_dynamic == "Y":
+                        _type = self.Relation.DynamicTable
+                except ValueError:
+                    _type = self.Relation.External
+                relations.append(
+                    self.Relation.create(
+                        database=_database,
+                        schema=_schema,
+                        identifier=_identifier,
+                        quote_policy=quote_policy,
+                        type=_type,
+                    )
+                )
+        else:
+            columns = ["database_name", "schema_name", "name", "kind"]
+            for _database, _schema, _identifier, _type in results.select(columns):
+                try:
+                    _type = self.Relation.get_relation_type(_type.lower())
+                except ValueError:
+                    _type = self.Relation.External
+                relations.append(
+                    self.Relation.create(
+                        database=_database,
+                        schema=_schema,
+                        identifier=_identifier,
+                        quote_policy=quote_policy,
+                        type=_type,
+                    )
                 )
-            )
 
         return relations
 

@@ -73,7 +73,7 @@
   {% for _ in range(0, max_iter) %}
 
       {%- set paginated_sql -%}
-         show terse objects in {{ schema_relation.database }}.{{ schema_relation.schema }} limit {{ max_results_per_iter }} from '{{ watermark.table_name }}'
+        {{ snowflake__get_show_objects_sql(schema_relation, max_results_per_iter) }} from '{{ watermark.table_name }}'
       {%- endset -%}
 
       {%- set paginated_result = run_query(paginated_sql) %}
@@ -119,12 +119,16 @@
 
 {% endmacro %}
 
+{% macro snowflake__get_show_objects_sql(schema, results_per_iteration) %}
+    show objects in {{ schema.database }}.{{ schema.schema }} limit {{ results_per_iteration }}
+{% endmacro %}
+
 {% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %}
 
   {%- set max_total_results = max_results_per_iter * max_iter -%}
 
   {%- set sql -%}
-    show terse objects in {{ schema_relation.database }}.{{ schema_relation.schema }} limit {{ max_results_per_iter }}
+    {{ snowflake__get_show_objects_sql(schema_relation, max_results_per_iter) }}
   {%- endset -%}
 
   {%- set result = run_query(sql) -%}

@@ -33,3 +33,5 @@ SNOWFLAKE_TEST_WAREHOUSE=my_warehouse_name
 DBT_TEST_USER_1=dbt_test_role_1
 DBT_TEST_USER_2=dbt_test_role_2
 DBT_TEST_USER_3=dbt_test_role_3
+
+DBT_PERFORMANCE_TESTING=0
@@ -0,0 +1,17 @@
+import os
+
+import pytest
+
+
+def _get_setting(environment_variable: str) -> bool:
+    raw_value = os.environ.get(environment_variable, False)
+    return raw_value in [True, "True", "TRUE", 1, "1"]
+
+
+performance_test = pytest.mark.skipif(
+    not _get_setting("DBT_PERFORMANCE_TESTING"),
+    reason=(
+        "Performance test skipped, to turn on performance testing, "
+        "please set the environment variable `DBT_PERFORMANCE_TESTING`"
+    ),
+)
@@ -0,0 +1,51 @@
+Performance tests were run using both `show objects` and `show terse objects` at three scales.
+With `2024_03` turned off, both methods are able to correctly identify a dynamic table.
+However, when `2024_03` is turned on, only `show objects` is able to correctly identify
+a dynamic table. This is done by inspecting the new column `is_dynamic` since both a table
+and a dynamic table show up with a `kind` of table.
+In order to properly compare the two methods, an additional scenario was added that does not
+create dynamic tables, and instead splits those objects evenly between views and tables.
+
+Let's take the small scale as an example. The small scale creates 30 objects.
+There is a run that creates 10 of each object, resulting in 30 objects.
+This is successful for `show objects` whether `2024_03` is turned on or off.
+It is also successful for `show terse objects` when `2024_03` is turned off.
+There is another scenario that creates 15 views and 15 table, but no dynamic tables.
+This scenario still creates 30 objects, and both methods return the correct types
+regardless of setting for `2024_03`.
+These scenarios can be combined to compare `show terse objects` with `2024_03` off
+to `show objects` with `2024_03` turned on.
+This comparison represents the change that will happen when `2024_03` becomes a mandatory bundle.
+
+### 30 Objects
+
+| 2024_03 | method             | mean time | mean time - no DTs |
+|:-------:|--------------------|----------:|-------------------:|
+|   NO    | show terse objects |    1.02 s |                 -- |
+|   YES   | show objects       |    0.91 s |             0.92 s |
+|   YES   | show terse objects |        -- |             0.94 s |
+
+- 11% improved run time of `list_relations_without_caching` when turning on `2024_03`
+- similar performance of `show objects` and `show terse objects` in `2024_03`
+
+### 300 Objects
+
+| 2024_03 | method             | mean time | mean time - no DTs |
+|:-------:|--------------------|----------:|-------------------:|
+|   NO    | show terse objects |    0.96 s |                 -- |
+|   YES   | show objects       |    1.19 s |             1.37 s |
+|   YES   | show terse objects |        -- |             0.92 s |
+
+- 24% longer run time of `list_relations_without_caching` when turning on `2024_03`
+- 49% longer run time of `show objects` than `show terse objects` in `2024_03`
+
+### 3000 Objects
+
+| 2024_03 | method             | mean time | mean time - no DTs |
+|:-------:|--------------------|----------:|-------------------:|
+|   NO    | show terse objects |    2.00 s |                 -- |
+|   YES   | show objects       |    3.05 s |             3.22 s |
+|   YES   | show terse objects |        -- |             2.33 s |
+
+- 53% longer run time of `list_relations_without_caching` when turning on `2024_03`
+- 38% longer run time of `show objects` than `show terse objects` in `2024_03`
@@ -0,0 +1,114 @@
+from dataclasses import dataclass
+from datetime import datetime, timedelta
+import os
+from statistics import mean
+from typing import List, Tuple
+
+import pytest
+
+from dbt.adapters.factory import get_adapter_by_type
+from dbt.adapters.snowflake import SnowflakeRelation
+
+from dbt.tests.util import run_dbt, get_connection
+from tests.performance.conftest import performance_test
+
+
+SEED = """
+id,value
+0,red
+1,yellow
+2,blue
+""".strip()
+
+
+VIEW = """
+select * from {{ ref('my_seed') }}
+"""
+
+
+TABLE = """
+{{ config(materialized='table') }}
+select * from {{ ref('my_seed') }}
+"""
+
+
+DYNAMIC_TABLE = (
+    """
+{{ config(
+    materialized='dynamic_table',
+    target_lag='1 day',
+    snowflake_warehouse='"""
+    + os.getenv("SNOWFLAKE_TEST_WAREHOUSE")
+    + """',
+) }}
+select * from {{ ref('my_seed') }}
+"""
+)
+
+
+@dataclass
+class Scenario:
+    views: int
+    tables: int
+    dynamic_tables: int
+
+
+class BaseConfig:
+    scenario: Scenario
+    expected_duration: float
+    iterations: int = 10
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        yield {"my_seed.csv": SEED}
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        models = {}
+        models.update({f"my_view_{i}.sql": VIEW for i in range(self.scenario.views)})
+        models.update({f"my_table_{i}.sql": TABLE for i in range(self.scenario.tables)})
+        models.update(
+            {
+                f"my_dynamic_table_{i}.sql": DYNAMIC_TABLE
+                for i in range(self.scenario.dynamic_tables)
+            }
+        )
+        yield models
+
+    @pytest.fixture(scope="class", autouse=True)
+    def setup(self, project):
+        run_dbt(["seed"])
+        run_dbt(["run"])
+
+    def list_relations(self, project) -> Tuple[List[SnowflakeRelation], timedelta]:
+        my_adapter = get_adapter_by_type("snowflake")
+        schema = my_adapter.Relation.create(
+            database=project.database, schema=project.test_schema, identifier=""
+        )
+
+        start = datetime.utcnow()
+        with get_connection(my_adapter):
+            relations = my_adapter.list_relations_without_caching(schema)
+        end = datetime.utcnow()
+        duration = end - start
+        return relations, duration
+
+    @performance_test
+    def test_list_relations(self, project):
+        durations = []
+        for i in range(self.iterations):
+            relations, duration = self.list_relations(project)
+            durations.append(duration.total_seconds())
+            assert (
+                len([relation for relation in relations if relation.is_view])
+                == self.scenario.views
+            )
+            assert (
+                len([relation for relation in relations if relation.is_table])
+                == self.scenario.tables + 1  # add the seed
+            )
+            assert (
+                len([relation for relation in relations if relation.is_dynamic_table])
+                == self.scenario.dynamic_tables
+            )
+        assert mean(durations) < self.expected_duration * 1.10  # allow for 10% error
@@ -0,0 +1,48 @@
+from datetime import timedelta
+
+import pytest
+
+from tests.performance.list_relations_tests.list_relations import BaseConfig, Scenario
+
+
+SHOW_OBJECTS_MACRO = """
+{% macro snowflake__get_show_objects_sql(schema, results_per_iteration) %}
+    show objects in {{ schema.database }}.{{ schema.schema }} limit {{ results_per_iteration }}
+{% endmacro %}
+"""
+
+
+class ShowObjects(BaseConfig):
+    @pytest.fixture(scope="class")
+    def macros(self):
+        yield {"snowflake__get_show_objects_sql.sql": SHOW_OBJECTS_MACRO}
+
+
+class TestShowObjects10View10Table10Dynamic(ShowObjects):
+    scenario = Scenario(10, 10, 10)
+    expected_duration = timedelta(seconds=0, microseconds=920_000).total_seconds()
+
+
+class TestShowObjects15View15Table0Dynamic(ShowObjects):
+    scenario = Scenario(15, 15, 0)
+    expected_duration = timedelta(seconds=0, microseconds=920_000).total_seconds()
+
+
+class TestShowObjects100View100Table100Dynamic(ShowObjects):
+    scenario = Scenario(100, 100, 100)
+    expected_duration = timedelta(seconds=1, microseconds=370_000).total_seconds()
+
+
+class TestShowObjects150View150Table0Dynamic(ShowObjects):
+    scenario = Scenario(150, 150, 0)
+    expected_duration = timedelta(seconds=1, microseconds=370_000).total_seconds()
+
+
+class TestShowObjects1000View1000Table1000Dynamic(ShowObjects):
+    scenario = Scenario(1000, 1000, 1000)
+    expected_duration = timedelta(seconds=3, microseconds=400_000).total_seconds()
+
+
+class TestShowObjects1500View1500Table0Dynamic(ShowObjects):
+    scenario = Scenario(1500, 1500, 0)
+    expected_duration = timedelta(seconds=3, microseconds=400_000).total_seconds()
@@ -0,0 +1,48 @@
+from datetime import timedelta
+
+import pytest
+
+from tests.performance.list_relations_tests.list_relations import BaseConfig, Scenario
+
+
+SHOW_TERSE_OBJECTS_MACRO = """
+{% macro snowflake__get_show_objects_sql(schema, results_per_iteration) %}
+    show terse objects in {{ schema.database }}.{{ schema.schema }} limit {{ results_per_iteration }}
+{% endmacro %}
+"""
+
+
+class ShowTerseObjects(BaseConfig):
+    @pytest.fixture(scope="class")
+    def macros(self):
+        yield {"snowflake__get_show_objects_sql.sql": SHOW_TERSE_OBJECTS_MACRO}
+
+
+class TestShowTerseObjects10View10Table10Dynamic(ShowTerseObjects):
+    scenario = Scenario(10, 10, 10)
+    expected_duration = timedelta(seconds=1, microseconds=20_000).total_seconds()
+
+
+class TestShowTerseObjects15View15Table0Dynamic(ShowTerseObjects):
+    scenario = Scenario(15, 15, 0)
+    expected_duration = timedelta(seconds=1, microseconds=20_000).total_seconds()
+
+
+class TestShowTerseObjects100View100Table100Dynamic(ShowTerseObjects):
+    scenario = Scenario(100, 100, 100)
+    expected_duration = timedelta(seconds=0, microseconds=960_000).total_seconds()
+
+
+class TestShowTerseObjects150View150Table0Dynamic(ShowTerseObjects):
+    scenario = Scenario(150, 150, 0)
+    expected_duration = timedelta(seconds=0, microseconds=960_000).total_seconds()
+
+
+class TestShowTerseObjects1000View1000Table1000Dynamic(ShowTerseObjects):
+    scenario = Scenario(1000, 1000, 1000)
+    expected_duration = timedelta(seconds=2, microseconds=330_000).total_seconds()
+
+
+class TestShowTerseObjects1500View1500Table0Dynamic(ShowTerseObjects):
+    scenario = Scenario(1500, 1500, 0)
+    expected_duration = timedelta(seconds=2, microseconds=330_000).total_seconds()