diff --git a/.changes/unreleased/Fixes-20240927-171725.yaml b/.changes/unreleased/Fixes-20240927-171725.yaml new file mode 100644 index 000000000..d417b8801 --- /dev/null +++ b/.changes/unreleased/Fixes-20240927-171725.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix scenario where dbt attempts to add existing columns to relations when using the SDK for column metadata +time: 2024-09-27T17:17:25.584838-04:00 +custom: + Author: mikealfare + Issue: "914" diff --git a/dbt/adapters/redshift/connections.py b/dbt/adapters/redshift/connections.py index 8e7ae36d2..e445bdce6 100644 --- a/dbt/adapters/redshift/connections.py +++ b/dbt/adapters/redshift/connections.py @@ -503,6 +503,15 @@ def _parse_column_results(record: Tuple[Any, ...]) -> Dict[str, Any]: char_dtypes = [1, 12] num_dtypes = [2, 3, 4, 5, 6, 7, 8, -5, 2003] + # the results from `get_columns` vary slightly from the pg_catalog tables for dtype names + dtype_alias = { + "bool": "boolean", + "int4": "integer", + "timestamp": "timestamp without time zone", + "varchar": "character varying", + } + dtype_name = dtype_alias.get(dtype_name, dtype_name) + if dtype_code in char_dtypes: return {"column": column_name, "dtype": dtype_name, "char_size": column_size} elif dtype_code in num_dtypes: diff --git a/dbt/adapters/redshift/impl.py b/dbt/adapters/redshift/impl.py index e0cefb989..7e24f6a89 100644 --- a/dbt/adapters/redshift/impl.py +++ b/dbt/adapters/redshift/impl.py @@ -73,7 +73,7 @@ def _behavior_flags(self) -> List[BehaviorFlag]: return [ { "name": "restrict_direct_pg_catalog_access", - "default": False, + "default": True, "description": ( "The dbt-redshift adapter is migrating from using pg_ tables " "to using Redshift Metadata API and information_schema tables " diff --git a/tests/functional/columns_in_relation_tests/__init__.py b/tests/functional/columns_in_relation_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/functional/test_columns_in_relation.py b/tests/functional/columns_in_relation_tests/test_columns_in_relation.py similarity index 66% rename from tests/functional/test_columns_in_relation.py rename to tests/functional/columns_in_relation_tests/test_columns_in_relation.py index 60aeaa2aa..9f9f90a94 100644 --- a/tests/functional/test_columns_in_relation.py +++ b/tests/functional/columns_in_relation_tests/test_columns_in_relation.py @@ -15,11 +15,7 @@ def models(self): def setup(self, project): run_dbt(["run"]) - @pytest.fixture(scope="class") - def expected_columns(self): - return [] - - def test_columns_in_relation(self, project, expected_columns): + def test_columns_in_relation(self, project): my_relation = RedshiftRelation.create( database=project.database, schema=project.test_schema, @@ -28,6 +24,10 @@ def test_columns_in_relation(self, project, expected_columns): ) with project.adapter.connection_named("_test"): actual_columns = project.adapter.get_columns_in_relation(my_relation) + expected_columns = [ + Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2), + Column(column="my_char", dtype="character varying", char_size=1), + ] assert actual_columns == expected_columns @@ -36,24 +36,8 @@ class TestColumnsInRelationBehaviorFlagOff(ColumnsInRelation): def project_config_update(self): return {"flags": {}} - @pytest.fixture(scope="class") - def expected_columns(self): - # the SDK query returns "varchar" whereas our custom query returns "character varying" - return [ - Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2), - Column(column="my_char", dtype="character varying", char_size=1), - ] - class TestColumnsInRelationBehaviorFlagOn(ColumnsInRelation): @pytest.fixture(scope="class") def project_config_update(self): return {"flags": {"restrict_direct_pg_catalog_access": True}} - - @pytest.fixture(scope="class") - def expected_columns(self): - # the SDK query returns "varchar" whereas our custom query returns "character varying" - return [ - Column(column="my_num", dtype="numeric", numeric_precision=3, numeric_scale=2), - Column(column="my_char", dtype="varchar", char_size=1), - ] diff --git a/tests/functional/columns_in_relation_tests/test_incremental_on_schema_change.py b/tests/functional/columns_in_relation_tests/test_incremental_on_schema_change.py new file mode 100644 index 000000000..a335114bb --- /dev/null +++ b/tests/functional/columns_in_relation_tests/test_incremental_on_schema_change.py @@ -0,0 +1,83 @@ +from dbt.tests.util import run_dbt +import pytest + +from tests.functional.utils import update_model + + +SEED = """ +id,col7,col6,occurred_at +1,Cheetara,thunder,'2024-01-01' +2,Tygra,thunder,'2024-01-01' +2,Tygra,THUNDER,'2024-02-01' +3,Lion-O,thunder,'2024-01-01' +3,Lion-O,THUNDER,'2024-02-01' +3,Lion-O,THUNDERCATS,'2024-03-01' +""".strip() + + +MODEL_INITIAL = """ +{{ config( + materialized='incremental', + dist='col6', + on_schema_change='append_new_columns', +) }} +select + id::bigint as id, + col6::varchar(128) as col6, + occurred_at::timestamptz as occurred_at +from {{ ref('my_seed') }} +where occurred_at::timestamptz >= '2024-01-01'::timestamptz +and occurred_at::timestamptz < '2024-02-01'::timestamptz +""" + + +MODEL_UPDATE = """ +{{ config( + materialized='incremental', + dist='col6', + on_schema_change='append_new_columns', +) }} +select + id::bigint as id, + col6::varchar(128) as col6, + occurred_at::timestamptz as occurred_at, + col7::varchar(56) as col7 +from {{ ref('my_seed') }} +where occurred_at::timestamptz >= '2024-02-01'::timestamptz +and occurred_at::timestamptz < '2024-03-01'::timestamptz +""" + + +class TestIncrementalOnSchemaChange: + """ + This addresses: https://github.com/dbt-labs/dbt-redshift/issues/914 + + We test it with the `restrict_direct_pg_catalog_access` flag both off and on since the bug + only emerges when the flag is on (the former is a control). + """ + + @pytest.fixture(scope="class") + def project_config_update(self): + return {"flags": {"restrict_direct_pg_catalog_access": False}} + + @pytest.fixture(scope="class") + def seeds(self): + return {"my_seed.csv": SEED} + + @pytest.fixture(scope="class") + def models(self): + return {"my_model.sql": MODEL_INITIAL} + + def test_columns_in_relation(self, project): + run_dbt(["seed"]) + run_dbt(["run"]) + update_model(project, "my_model", MODEL_UPDATE) + run_dbt(["run"]) + # a successful run is a pass + + +class TestIncrementalOnSchemaChangeFlagOn: + + @pytest.fixture(scope="class") + def project_config_update(self): + return {"flags": {"restrict_direct_pg_catalog_access": True}} diff --git a/tests/functional/utils.py b/tests/functional/utils.py new file mode 100644 index 000000000..c2fdde9f1 --- /dev/null +++ b/tests/functional/utils.py @@ -0,0 +1,8 @@ +from dbt.tests.util import get_model_file, relation_from_name, set_model_file + + +def update_model(project, name: str, model: str) -> str: + relation = relation_from_name(project.adapter, name) + original_model = get_model_file(project, relation) + set_model_file(project, relation, model) + return original_model