-
Notifications
You must be signed in to change notification settings - Fork 94
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: Invalid merge when unique_key == merge_update_columns (#406)
Co-authored-by: Serhii Dimchenko <[email protected]>
- Loading branch information
1 parent
9dec391
commit fc00e80
Showing
3 changed files
with
195 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
115 changes: 115 additions & 0 deletions
115
tests/functional/adapter/test_incremental_iceberg_merge_no_updates.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
from collections import namedtuple | ||
|
||
import pytest | ||
|
||
from dbt.tests.util import check_relations_equal, run_dbt | ||
|
||
models__merge_no_updates_sql = """ | ||
{{ config( | ||
materialized = 'incremental', | ||
unique_key = 'id', | ||
incremental_strategy = 'merge', | ||
merge_update_columns = ['id'], | ||
table_type = 'iceberg', | ||
) }} | ||
|
||
{% if not is_incremental() %} | ||
|
||
-- data for first invocation of model | ||
|
||
select 1 as id, 'hello' as msg, 'blue' as color | ||
union all | ||
select 2 as id, 'goodbye' as msg, 'red' as color | ||
|
||
{% else %} | ||
|
||
-- data for subsequent incremental update | ||
|
||
select 1 as id, 'hey' as msg, 'blue' as color | ||
union all | ||
select 2 as id, 'yo' as msg, 'green' as color | ||
union all | ||
select 3 as id, 'anyway' as msg, 'purple' as color | ||
|
||
{% endif %} | ||
""" | ||
|
||
seeds__expected_merge_no_updates_csv = """id,msg,color | ||
1,hello,blue | ||
2,goodbye,red | ||
3,anyway,purple | ||
""" | ||
|
||
ResultHolder = namedtuple( | ||
"ResultHolder", | ||
[ | ||
"seed_count", | ||
"model_count", | ||
"seed_rows", | ||
"inc_test_model_count", | ||
"relation", | ||
], | ||
) | ||
|
||
|
||
class TestIncrementalIcebergMergeNoUpdates: | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return {"merge_no_updates.sql": models__merge_no_updates_sql} | ||
|
||
@pytest.fixture(scope="class") | ||
def seeds(self): | ||
return {"expected_merge_no_updates.csv": seeds__expected_merge_no_updates_csv} | ||
|
||
def update_incremental_model(self, incremental_model): | ||
"""update incremental model after the seed table has been updated""" | ||
model_result_set = run_dbt(["run", "--select", incremental_model]) | ||
return len(model_result_set) | ||
|
||
def get_test_fields(self, project, seed, incremental_model, update_sql_file): | ||
seed_count = len(run_dbt(["seed", "--select", seed, "--full-refresh"])) | ||
|
||
model_count = len(run_dbt(["run", "--select", incremental_model, "--full-refresh"])) | ||
|
||
relation = incremental_model | ||
# update seed in anticipation of incremental model update | ||
row_count_query = f"select * from {project.test_schema}.{seed}" | ||
|
||
seed_rows = len(project.run_sql(row_count_query, fetch="all")) | ||
|
||
# propagate seed state to incremental model according to unique keys | ||
inc_test_model_count = self.update_incremental_model(incremental_model=incremental_model) | ||
|
||
return ResultHolder(seed_count, model_count, seed_rows, inc_test_model_count, relation) | ||
|
||
def check_scenario_correctness(self, expected_fields, test_case_fields, project): | ||
"""Invoke assertions to verify correct build functionality""" | ||
# 1. test seed(s) should build afresh | ||
assert expected_fields.seed_count == test_case_fields.seed_count | ||
# 2. test model(s) should build afresh | ||
assert expected_fields.model_count == test_case_fields.model_count | ||
# 3. seeds should have intended row counts post update | ||
assert expected_fields.seed_rows == test_case_fields.seed_rows | ||
# 4. incremental test model(s) should be updated | ||
assert expected_fields.inc_test_model_count == test_case_fields.inc_test_model_count | ||
# 5. result table should match intended result set (itself a relation) | ||
check_relations_equal(project.adapter, [expected_fields.relation, test_case_fields.relation]) | ||
|
||
def test__merge_no_updates(self, project): | ||
"""seed should match model after incremental run""" | ||
|
||
expected_fields = ResultHolder( | ||
seed_count=1, | ||
model_count=1, | ||
inc_test_model_count=1, | ||
seed_rows=3, | ||
relation="expected_merge_no_updates", | ||
) | ||
|
||
test_case_fields = self.get_test_fields( | ||
project, | ||
seed="expected_merge_no_updates", | ||
incremental_model="merge_no_updates", | ||
update_sql_file=None, | ||
) | ||
self.check_scenario_correctness(expected_fields, test_case_fields, project) |