dbt-labs · mikealfare · Sep 27, 2024 · Sep 27, 2024 · Oct 9, 2024 · Oct 9, 2024
@@ -0,0 +1,6 @@
+kind: Fixes
+body: Fix scenario where dbt attempts to add existing columns to relations when using the SDK for column metadata
+time: 2024-09-27T17:17:25.584838-04:00
+custom:
+  Author: mikealfare
+  Issue: "914"
@@ -503,6 +503,15 @@ def _parse_column_results(record: Tuple[Any, ...]) -> Dict[str, Any]:
         char_dtypes = [1, 12]
         num_dtypes = [2, 3, 4, 5, 6, 7, 8, -5, 2003]
 
+        # the results from `get_columns` vary slightly from the pg_catalog tables for dtype names
+        dtype_alias = {
+            "bool": "boolean",
+            "int4": "integer",
+            "timestamp": "timestamp without time zone",
+            "varchar": "character varying",
+        }
+        dtype_name = dtype_alias.get(dtype_name, dtype_name)
+
         if dtype_code in char_dtypes:
             return {"column": column_name, "dtype": dtype_name, "char_size": column_size}
         elif dtype_code in num_dtypes:

@@ -73,7 +73,7 @@ def _behavior_flags(self) -> List[BehaviorFlag]:
         return [
             {
                 "name": "restrict_direct_pg_catalog_access",
-                "default": False,
+                "default": True,
                 "description": (
                     "The dbt-redshift adapter is migrating from using pg_ tables "
                     "to using Redshift Metadata API and information_schema tables "

@@ -15,11 +15,7 @@ def models(self):
     def setup(self, project):
         run_dbt(["run"])
 
-    @pytest.fixture(scope="class")
-    def expected_columns(self):
-        return []
-
-    def test_columns_in_relation(self, project, expected_columns):
+    def test_columns_in_relation(self, project):
         my_relation = RedshiftRelation.create(
             database=project.database,
             schema=project.test_schema,
@@ -28,6 +24,10 @@ def test_columns_in_relation(self, project, expected_columns):
         )
         with project.adapter.connection_named("_test"):
             actual_columns = project.adapter.get_columns_in_relation(my_relation)
+        expected_columns = [
+            Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2),
+            Column(column="my_char", dtype="character varying", char_size=1),
+        ]
         assert actual_columns == expected_columns
 
 
@@ -36,24 +36,8 @@ class TestColumnsInRelationBehaviorFlagOff(ColumnsInRelation):
     def project_config_update(self):
         return {"flags": {}}
 
-    @pytest.fixture(scope="class")
-    def expected_columns(self):
-        # the SDK query returns "varchar" whereas our custom query returns "character varying"
-        return [
-            Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2),
-            Column(column="my_char", dtype="character varying", char_size=1),
-        ]
-
 
 class TestColumnsInRelationBehaviorFlagOn(ColumnsInRelation):
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {"flags": {"restrict_direct_pg_catalog_access": True}}
-
-    @pytest.fixture(scope="class")
-    def expected_columns(self):
-        # the SDK query returns "varchar" whereas our custom query returns "character varying"
-        return [
-            Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2),
-            Column(column="my_char", dtype="varchar", char_size=1),
-        ]
@@ -0,0 +1,73 @@
+from dbt.tests.util import run_dbt
+import pytest
+
+from tests.functional.utils import update_model
+
+
+SEED = """
+id,col7,col6,occurred_at
+1,Cheetara,thunder,'2024-01-01'
+2,Tygra,thunder,'2024-01-01'
+2,Tygra,THUNDER,'2024-02-01'
+3,Lion-O,thunder,'2024-01-01'
+3,Lion-O,THUNDER,'2024-02-01'
+3,Lion-O,THUNDERCATS,'2024-03-01'
+""".strip()
+
+
+MODEL_INITIAL = """
+{{ config(
+    materialized='incremental',
+    dist='col6',
+    on_schema_change='append_new_columns',
+) }}
+select
+    id::bigint as id,
+    col6::varchar(128) as col6,
+    occurred_at::timestamptz as occurred_at
+from {{ ref('my_seed') }}
+where occurred_at::timestamptz >= '2024-01-01'::timestamptz
+and occurred_at::timestamptz < '2024-02-01'::timestamptz
+"""
+
+
+MODEL_UPDATE = """
+{{ config(
+    materialized='incremental',
+    dist='col6',
+    on_schema_change='append_new_columns',
+) }}
+select
+    id::bigint as id,
+    col6::varchar(128) as col6,
+    occurred_at::timestamptz as occurred_at,
+    col7::varchar(56) as col7
+from {{ ref('my_seed') }}
+where occurred_at::timestamptz >= '2024-02-01'::timestamptz
+and occurred_at::timestamptz < '2024-03-01'::timestamptz
+"""
+
+
+class TestIncrementalOnSchemaChange:
+    """
+    This addresses: https://github.com/dbt-labs/dbt-redshift/issues/914
+    """
+
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {"flags": {"restrict_direct_pg_catalog_access": False}}
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"my_seed.csv": SEED}
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {"my_model.sql": MODEL_INITIAL}
+
+    def test_columns_in_relation(self, project):
+        run_dbt(["seed"])
+        run_dbt(["run"])
+        update_model(project, "my_model", MODEL_UPDATE)
+        run_dbt(["run"])
+        # a successful run is a pass
@@ -0,0 +1,8 @@
+from dbt.tests.util import get_model_file, relation_from_name, set_model_file
+
+
+def update_model(project, name: str, model: str) -> str:
+    relation = relation_from_name(project.adapter, name)
+    original_model = get_model_file(project, relation)
+    set_model_file(project, relation, model)
+    return original_model