Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable setting datetime value for dbt_valid_to when the record is current #10780

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240925-120855.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Enable specification of dbt_valid_to for current records
time: 2024-09-25T12:08:55.926848-04:00
custom:
Author: gshank
Issue: "10187"
1 change: 1 addition & 0 deletions core/dbt/artifacts/resources/v1/snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class SnapshotConfig(NodeConfig):
snapshot_meta_column_names: SnapshotMetaColumnNames = field(
default_factory=SnapshotMetaColumnNames
)
dbt_valid_to_current: Optional[str] = None

@property
def snapshot_table_column_names(self):
Expand Down
6 changes: 3 additions & 3 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
git+https://github.com/dbt-labs/dbt-adapters.git@main
git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter
git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_dbt_valid_to_current
git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_dbt_valid_to_current#subdirectory=dbt-tests-adapter
git+https://github.com/dbt-labs/dbt-common.git@main
git+https://github.com/dbt-labs/dbt-postgres.git@main
git+https://github.com/dbt-labs/dbt-postgres.git@snapshot_dbt_valid_to_current
# black must match what's in .pre-commit-config.yaml to be sure local env matches CI
black==24.3.0
bumpversion
Expand Down
24 changes: 23 additions & 1 deletion schemas/dbt/manifest/v12.json
Original file line number Diff line number Diff line change
Expand Up @@ -6685,6 +6685,17 @@
}
},
"additionalProperties": false
},
"dbt_valid_to_current": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
}
},
"additionalProperties": true
Expand Down Expand Up @@ -16511,6 +16522,17 @@
}
},
"additionalProperties": false
},
"dbt_valid_to_current": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
}
},
"additionalProperties": true
Expand Down Expand Up @@ -22476,4 +22498,4 @@
"unit_tests"
],
"$id": "https://schemas.getdbt.com/dbt/manifest/v12.json"
}
}
1 change: 1 addition & 0 deletions tests/functional/artifacts/expected_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def get_rendered_snapshot_config(**updates):
"dbt_updated_at": None,
"dbt_scd_id": None,
},
"dbt_valid_to_current": None,
"tags": [],
"persist_docs": {},
"full_refresh": None,
Expand Down
1 change: 1 addition & 0 deletions tests/functional/list/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def expect_snapshot_output(self, happy_path_project): # noqa: F811
"persist_docs": {},
"target_database": happy_path_project.database,
"target_schema": happy_path_project.test_schema,
"dbt_valid_to_current": None,
"snapshot_meta_column_names": {
"dbt_scd_id": None,
"dbt_updated_at": None,
Expand Down
82 changes: 82 additions & 0 deletions tests/functional/snapshots/data/seed_dbt_valid_to.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
create table {database}.{schema}.seed (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),
updated_at TIMESTAMP WITHOUT TIME ZONE
);

create table {database}.{schema}.snapshot_expected (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),

-- snapshotting fields
updated_at TIMESTAMP WITHOUT TIME ZONE,
test_valid_from TIMESTAMP WITHOUT TIME ZONE,
test_valid_to TIMESTAMP WITHOUT TIME ZONE,
test_scd_id TEXT,
test_updated_at TIMESTAMP WITHOUT TIME ZONE
);


-- seed inserts
-- use the same email for two users to verify that duplicated check_cols values
-- are handled appropriately
insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values
(1, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'),
(2, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'),
(3, 'Rachel', 'Moreno', '[email protected]', 'Female', '31.222.249.23', '2016-04-05 02:05:30'),
(4, 'Ralph', 'Turner', '[email protected]', 'Male', '157.83.76.114', '2016-08-08 00:06:51'),
(5, 'Laura', 'Gonzales', '[email protected]', 'Female', '30.54.105.168', '2016-09-01 08:25:38'),
(6, 'Katherine', 'Lopez', '[email protected]', 'Female', '169.138.46.89', '2016-08-30 18:52:11'),
(7, 'Jeremy', 'Hamilton', '[email protected]', 'Male', '231.189.13.133', '2016-07-17 02:09:46'),
(8, 'Heather', 'Rose', '[email protected]', 'Female', '87.165.201.65', '2015-12-29 22:03:56'),
(9, 'Gregory', 'Kelly', '[email protected]', 'Male', '154.209.99.7', '2016-03-24 21:18:16'),
(10, 'Rachel', 'Lopez', '[email protected]', 'Female', '237.165.82.71', '2016-08-20 15:44:49'),
(11, 'Donna', 'Welch', '[email protected]', 'Female', '103.33.110.138', '2016-02-27 01:41:48'),
(12, 'Russell', 'Lawrence', '[email protected]', 'Male', '189.115.73.4', '2016-06-11 03:07:09'),
(13, 'Michelle', 'Montgomery', '[email protected]', 'Female', '243.220.95.82', '2016-06-18 16:27:19'),
(14, 'Walter', 'Castillo', '[email protected]', 'Male', '71.159.238.196', '2016-10-06 01:55:44'),
(15, 'Robin', 'Mills', '[email protected]', 'Female', '172.190.5.50', '2016-10-31 11:41:21'),
(16, 'Raymond', 'Holmes', '[email protected]', 'Male', '148.153.166.95', '2016-10-03 08:16:38'),
(17, 'Gary', 'Bishop', '[email protected]', 'Male', '161.108.182.13', '2016-08-29 19:35:20'),
(18, 'Anna', 'Riley', '[email protected]', 'Female', '253.31.108.22', '2015-12-11 04:34:27'),
(19, 'Sarah', 'Knight', '[email protected]', 'Female', '222.220.3.177', '2016-09-26 00:49:06'),
(20, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19');


-- populate snapshot table
insert into {database}.{schema}.snapshot_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
test_valid_from,
test_valid_to,
test_updated_at,
test_scd_id
)

select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by snapshotting
updated_at as test_valid_from,
date('2099-12-31') as test_valid_to,
updated_at as test_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id
from {database}.{schema}.seed;
97 changes: 97 additions & 0 deletions tests/functional/snapshots/test_snapshot_column_names.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import os

import pytest
Expand All @@ -7,6 +8,7 @@
get_manifest,
run_dbt,
run_dbt_and_capture,
run_sql_with_adapter,
update_config_file,
)

Expand Down Expand Up @@ -232,3 +234,98 @@ def test_snapshot_invalid_column_names(self, project):
assert len(results) == 1
assert "Compilation Error in snapshot snapshot_actual" in log_output
assert "Snapshot target is missing configured columns" in log_output


snapshots_valid_to_current_yml = """
snapshots:
- name: snapshot_actual
config:
strategy: timestamp
updated_at: updated_at
dbt_valid_to_current: "date('2099-12-31')"
snapshot_meta_column_names:
dbt_valid_to: test_valid_to
dbt_valid_from: test_valid_from
dbt_scd_id: test_scd_id
dbt_updated_at: test_updated_at
"""

update_with_current_sql = """
-- insert v2 of the 11 - 21 records

insert into {database}.{schema}.snapshot_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
test_valid_from,
test_valid_to,
test_updated_at,
test_scd_id
)

select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by snapshotting
updated_at as test_valid_from,
date('2099-12-31') as test_valid_to,
updated_at as test_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id
from {database}.{schema}.seed
where id >= 10 and id <= 20;
"""


class TestSnapshotDbtValidToCurrent:
@pytest.fixture(scope="class")
def snapshots(self):
return {"snapshot.sql": snapshot_actual_sql}

@pytest.fixture(scope="class")
def models(self):
return {
"snapshots.yml": snapshots_valid_to_current_yml,
"ref_snapshot.sql": ref_snapshot_sql,
}

def test_valid_to_current(self, project):
path = os.path.join(project.test_data_dir, "seed_dbt_valid_to.sql")
project.run_sql_file(path)
results = run_dbt(["snapshot"])
assert len(results) == 1

original_snapshot = run_sql_with_adapter(
project.adapter,
"select id, test_scd_id, test_valid_to from {database}.{schema}.snapshot_actual",
"all",
)
assert original_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0)
assert original_snapshot[9][2] == datetime.datetime(2099, 12, 31, 0, 0)

project.run_sql(invalidate_sql)
project.run_sql(update_with_current_sql)

results = run_dbt(["snapshot"])
assert len(results) == 1

updated_snapshot = run_sql_with_adapter(
project.adapter,
"select id, test_scd_id, test_valid_to from {database}.{schema}.snapshot_actual",
"all",
)
assert updated_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0)
# Original row that was updated now has a non-current (2099/12/31) date
assert updated_snapshot[9][2] == datetime.datetime(2016, 8, 20, 16, 44, 49)
# Updated row has a current date
assert updated_snapshot[20][2] == datetime.datetime(2099, 12, 31, 0, 0)

check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"])
Loading