Skip to content

Commit

Permalink
Fixed a whole lot of incremental build stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
JonCrawford committed Jul 20, 2024
1 parent dffd58c commit 684d625
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 21 deletions.
19 changes: 12 additions & 7 deletions custom_tests/workflow_enables_count_proportion.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,26 @@
{% set lookback_window_days = 30 %}
{% set min_enable_percent = 0.05 %}

WITH filtered_data AS (
WITH
recent_workflows AS (
SELECT *
FROM {{ ref('workflows') }}
WHERE
created_at_pt >= CURRENT_DATE - INTERVAL '{{ lookback_window_days }} days'
),

filtered_data AS (
SELECT
*
FROM
{{ ref('workflows') }}
WHERE
created_at_pt >= CURRENT_DATE - INTERVAL '30 days'
AND enable_count >= 1
FROM recent_workflows
WHERE enable_count >= 1
),

total_count AS (
SELECT
COUNT(*) AS total
FROM
{{ ref('workflows') }}
recent_workflows
),

filtered_count AS (
Expand Down
15 changes: 12 additions & 3 deletions custom_tests/workflow_saves_count_proportion.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,30 @@
{% set lookback_window_days = 30 %}
{% set min_save_percent = 0.05 %}

WITH filtered_data AS (
WITH
recent_workflows AS (
SELECT
*
FROM
{{ ref('workflows') }}
WHERE
created_at_pt >= CURRENT_DATE - INTERVAL '{{ lookback_window_days }} days'
AND save_count >= 1

),

filtered_data AS (
SELECT
*
FROM
recent_workflows
WHERE save_count >= 1
),

total_count AS (
SELECT
COUNT(*) AS total
FROM
{{ ref('workflows') }}
recent_workflows
),

filtered_count AS (
Expand Down
14 changes: 13 additions & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dbt-cloud:
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
#test-paths: ["custom_tests"]
test-paths: ["custom_tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
Expand Down Expand Up @@ -166,8 +166,12 @@ models:
+cluster_by: ["workflow_run_id"]
int_test_step_runs:
+materialized: incremental
+on_schema_change: "sync_all_columns"
+unique_key: test_step_run_id
int_test_runs:
+materialized: incremental
+on_schema_change: "sync_all_columns"
+unique_key: test_run_id
int_workflow_runs:
+on_schema_change: "sync_all_columns"
+unique_key: workflow_run_id
Expand Down Expand Up @@ -201,15 +205,23 @@ models:
+schema: support
stg_segment_first_visits:
+materialized: incremental
+on_schema_change: "sync_all_columns"
+unique_key: shop_subdomain

google_analytics:
staging:
+schema: support
+materialized: view
stg_ga4_events:
+materialized: incremental
+unique_key: event_id
+on_schema_change: "sync_all_columns"
intermediate:
+schema: support
int_ga4_events:
+materialized: incremental
+unique_key: event_id
+on_schema_change: "sync_all_columns"

customer_io:
staging:
Expand Down
5 changes: 5 additions & 0 deletions models/google_analytics/intermediate/int_ga4_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ staged_ga4_events AS (
user_pseudo_id::STRING AS user_pseudo_id

FROM {{ ref("stg_ga4_events") }}

{% if is_incremental() %}
-- this filter will only be applied on an incremental run
WHERE event_timestamp_pt > '{{ get_max_updated_at('event_timestamp_pt') }}'
{% endif %}
),

reformatted AS (
Expand Down
9 changes: 5 additions & 4 deletions models/google_analytics/staging/stg_ga4_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ with
{# TODO: Use the above only for everything after the first ga_session_id is present. #}
{# TODO: Then create another condition that looks to all the UA (pre-GA4 events) before that date. #}

{% if is_incremental() %}
-- this filter will only be applied on an incremental run
AND {{ pacific_timestamp("TO_TIMESTAMP(event_timestamp)") }} > '{{ get_max_updated_at('event_timestamp_pt') }}'
{% endif %}

{# We still get duplicates sometimes. #}
QUALIFY ROW_NUMBER() OVER (PARTITION BY user_pseudo_id, event_name, event_timestamp ORDER BY source) = 1
),
Expand Down Expand Up @@ -194,7 +199,3 @@ final AS (
)

SELECT * FROM final
{% if is_incremental() %}
-- this filter will only be applied on an incremental run
WHERE event_timestamp_pt > '{{ get_max_updated_at() }}'
{% endif %}
5 changes: 4 additions & 1 deletion models/mesa/intermediate/_int_test_step_runs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ models:
- name: test_step_run_id
data_tests:
- not_null
- unique
- unique:
warn_if: "> 50"
error_if: "> 500"

- name: workflow_step_id
meta:
metabase.semantic_type: type/fk
Expand Down
1 change: 1 addition & 0 deletions models/mesa/intermediate/int_test_runs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ SELECT
run_status = 'success' AND child_failure_count = 0 AS is_successful
FROM {{ source_table }}
WHERE is_test_run = TRUE
AND test_run_id = '66944e2118cf6789c509a2cb'

{% if is_incremental() %}
-- this filter will only be applied on an incremental run
Expand Down
2 changes: 1 addition & 1 deletion models/mesa/marts/workflow_runs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ LEFT JOIN shops USING (shop_subdomain)

{% if is_incremental() %}
-- this filter will only be applied on an incremental run
AND updated_at > '{{ get_max_updated_at() }}'
WHERE updated_at > '{{ get_max_updated_at() }}'
{% endif %}
8 changes: 5 additions & 3 deletions models/mesa/staging/_stg_step_runs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ models:
metabase.fk_target_table: ref('stg_workflows')
metabase.fk_target_field: workflow_id
data_tests:
- not_null
- not_null:
warn_if: ">100"
error_if: ">1000"
description: The foreign key to the originating workflow
- name: step_run_at_utc
data_tests:
Expand All @@ -39,8 +41,8 @@ models:
metabase.fk_target_field: workflow_step_id
data_tests:
- not_null:
warn_if: ">1"
error_if: ">50"
warn_if: ">50"
error_if: ">500"
config:
where: step_run_at_utc > '2021-08-20'
- relationships_proportion:
Expand Down
2 changes: 1 addition & 1 deletion models/mesa_segment/staging/stg_segment_first_visits.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ with
session_start_tstamp_pt <= shops.first_installed_at_pt + interval '1 hour'
{% if is_incremental() %}
-- this filter will only be applied on an incremental run
AND session_start_tstamp_pt > '{{ get_max_updated_at() }}'
AND session_start_tstamp_pt > '{{ get_max_updated_at('first_touch_at_pt') }}'
{% endif %}

qualify
Expand Down

0 comments on commit 684d625

Please sign in to comment.