-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updates existing clean-stage models to use the macro.
- Loading branch information
1 parent
00f2804
commit be6e66f
Showing
36 changed files
with
356 additions
and
1,209 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,19 @@ | ||
{{ config(materialized='view') }} | ||
{% set dataset_name = "chicago_bike_paths" %} | ||
{% set ck_cols = ["st_name", "f_street", "t_street", "br_ow_dir"] %} | ||
{% set record_id = "bike_route_segment_id" %} | ||
{% set base_cols = [ | ||
"bike_route_segment_id", "st_name", "f_street", "t_street", "street", "displayrou", | ||
"oneway_dir", "contraflow", "br_ow_dir", "br_oneway", "mi_ctrline", "geometry", | ||
"source_data_updated", "ingestion_check_time" | ||
] %} | ||
{% set updated_at_col = "source_data_updated" %} | ||
|
||
-- selects all records from the standardized view of this data | ||
WITH std_data AS ( | ||
SELECT * | ||
FROM {{ ref('chicago_bike_paths_standardized') }} | ||
), | ||
{% set query = generate_clean_stage_incremental_dedupe_query( | ||
dataset_name=dataset_name, | ||
record_id=record_id, | ||
ck_cols=ck_cols, | ||
base_cols=base_cols, | ||
updated_at_col=updated_at_col | ||
) %} | ||
|
||
-- keeps the most recently updated version of each record | ||
std_records_numbered_latest_first AS ( | ||
SELECT *, | ||
row_number() over(partition by {{record_id}} ORDER BY source_data_updated DESC) as rn | ||
FROM std_data | ||
), | ||
most_current_records AS ( | ||
SELECT * | ||
FROM std_records_numbered_latest_first | ||
WHERE rn = 1 | ||
), | ||
|
||
-- selects the source_data_updated (ie the date of publication) value from each record's | ||
-- first ingestion into the local data warehouse | ||
std_records_numbered_earliest_first AS ( | ||
SELECT *, | ||
row_number() over(partition by {{record_id}} ORDER BY source_data_updated ASC) as rn | ||
FROM std_data | ||
), | ||
records_first_ingested_pub_date AS ( | ||
SELECT {{record_id}}, source_data_updated AS first_ingested_pub_date | ||
FROM std_records_numbered_earliest_first | ||
WHERE rn = 1 | ||
) | ||
|
||
SELECT | ||
{% for bc in base_cols %}mcr.{{ bc }},{% endfor %} | ||
fi.first_ingested_pub_date | ||
FROM most_current_records AS mcr | ||
LEFT JOIN records_first_ingested_pub_date AS fi | ||
ON mcr.{{ record_id }} = fi.{{ record_id }} | ||
ORDER BY {% for ck in ck_cols %}mcr.{{ ck }} DESC, {% endfor %} mcr.source_data_updated DESC | ||
{{ query }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,18 @@ | ||
{{ config(materialized='view') }} | ||
{% set dataset_name = "chicago_city_boundary" %} | ||
{% set ck_cols = ["objectid"] %} | ||
{% set record_id = "objectid" %} | ||
{% set base_cols = [ | ||
"objectid", "name", "shape_area", "shape_len", "geometry", "source_data_updated", | ||
"ingestion_check_time" | ||
] %} | ||
{% set updated_at_col = "source_data_updated" %} | ||
|
||
-- selects all records from the standardized view of this data | ||
WITH std_data AS ( | ||
SELECT * | ||
FROM {{ ref('chicago_city_boundary_standardized') }} | ||
), | ||
{% set query = generate_clean_stage_incremental_dedupe_query( | ||
dataset_name=dataset_name, | ||
record_id=record_id, | ||
ck_cols=ck_cols, | ||
base_cols=base_cols, | ||
updated_at_col=updated_at_col | ||
) %} | ||
|
||
-- keeps the most recently updated version of each record | ||
std_records_numbered_latest_first AS ( | ||
SELECT *, | ||
row_number() over(partition by {{record_id}} ORDER BY source_data_updated DESC) as rn | ||
FROM std_data | ||
), | ||
most_current_records AS ( | ||
SELECT * | ||
FROM std_records_numbered_latest_first | ||
WHERE rn = 1 | ||
), | ||
|
||
-- selects the source_data_updated (ie the date of publication) value from each record's | ||
-- first ingestion into the local data warehouse | ||
std_records_numbered_earliest_first AS ( | ||
SELECT *, | ||
row_number() over(partition by {{record_id}} ORDER BY source_data_updated ASC) as rn | ||
FROM std_data | ||
), | ||
records_first_ingested_pub_date AS ( | ||
SELECT {{record_id}}, source_data_updated AS first_ingested_pub_date | ||
FROM std_records_numbered_earliest_first | ||
WHERE rn = 1 | ||
) | ||
|
||
SELECT | ||
{% for bc in base_cols %}mcr.{{ bc }},{% endfor %} | ||
fi.first_ingested_pub_date | ||
FROM most_current_records AS mcr | ||
LEFT JOIN records_first_ingested_pub_date AS fi | ||
ON mcr.{{ record_id }} = fi.{{ record_id }} | ||
ORDER BY {% for ck in ck_cols %}mcr.{{ ck }} DESC, {% endfor %} mcr.source_data_updated DESC | ||
{{ query }} |
Oops, something went wrong.