Skip to content

Commit

Permalink
Rescue additional attribution data from BigQuery and refactor to use it.
Browse files Browse the repository at this point in the history
  • Loading branch information
JonCrawford committed Jan 2, 2024
1 parent 0f4e547 commit 5ac23a4
Show file tree
Hide file tree
Showing 45 changed files with 2,269 additions and 1,728 deletions.
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,10 @@ __pycache__
None.yml
*.log
.cursorignore
.vscode/

#PyCharm
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,9 @@ models:
google_analytics:
staging:
+schema: support
+materialized: view
intermediate:
+schema: support
+materialized: ephemeral

customer_io:
staging:
Expand Down
20 changes: 20 additions & 0 deletions macros/get_prefixed_columns.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{% macro get_prefixed_columns(relation_ref, prefix, exclude=[]) %}

{%- set column_names = dbt_utils.get_filtered_columns_in_relation(
from=relation_ref,
except=['shop_subdomain', 'user_pseudo_id'] + exclude
)
-%}

{%- for column_name in column_names -%}
{%- if column_name|lower == 'event_timestamp_pt' or column_name|lower == 'created_at' -%}
{{ column_name }} AS {{ prefix }}_at_pt
{%- else -%}
iff(
{{ column_name }}::varchar = '', null, {{ column_name }}
) AS {{ prefix }}_{{ column_name|lower }}
{%- endif -%}
{%- if not loop.last %},{% endif %}
{% endfor -%}

{% endmacro %}
9 changes: 9 additions & 0 deletions macros/should_exclude_column_by_prefix.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{% macro should_exclude_column_by_prefix(column_name, excluded_prefixes, result) %}
{% for prefix in excluded_prefixes %}
{% if column_name.startswith(prefix) %}
{# {{ log('Debug: excluding column ' ~ column_name ~ ' because it starts with ' ~ prefix, info=True) }} #}
{% set _ = result.append(true) %}
{% break %}
{% endif %}
{% endfor %}
{% endmacro %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
version: 2
models:
- name: int_app_store_attribution
tests:
- dbt_expectations.expect_table_row_count_to_be_between:
min_value: 200
columns:
- name: shop_subdomain
tests:
- unique
- not_null
description: The foreign key for the Shop.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version: 2
models:
- name: stg_ga_first_visits
- name: int_ga_first_visits
tests:
- dbt_expectations.expect_table_row_count_to_be_between:
min_value: 100
Expand All @@ -9,36 +9,36 @@ models:
- name: user_pseudo_id
decription: the anonymous google analytics id
description: the anonymouse google analytics user id
- name: first_touch_at_pt
- name: ga_first_touch_at_pt
description: The timestamp of the first touch.
- name: first_touch_url
- name: ga_first_touch_url
description: The URL of the first touch.
- name: first_touch_host
- name: ga_first_touch_host
description: The host of the first touch.
- name: first_touch_path
- name: ga_first_touch_path
description: The path of the first touch.
- name: first_touch_content
- name: ga_first_touch_content
description: The content that the shop was acquired through.
- name: first_touch_campaign
- name: ga_first_touch_traffic_source_name
description: The campaign that the shop was acquired through.
- name: first_touch_medium
- name: ga_first_touch_traffic_source_medium
description: The medium that the shop was acquired through.
- name: first_touch_source
- name: ga_first_touch_traffic_source_source
description: The source that the shop was acquired through.
- name: first_touch_referrer_host
- name: ga_first_touch_referrer_host
description: The referrer host that referrered the shop's first visit.
- name: first_touch_app_store_surface_detail
- name: ga_first_touch_app_store_surface_detail
description: ""
- name: first_touch_app_store_surface_type
- name: ga_first_touch_app_store_surface_type
description: The type of surface for the first touch in the app store.
- name: first_touch_app_store_surface_intra_position
- name: ga_first_touch_app_store_surface_intra_position
description: The intra position of the first touch on the app store surface.
- name: first_touch_app_store_surface_inter_position
- name: ga_first_touch_app_store_surface_inter_position
description: The inter position of the first touch on the app store surface.
- name: first_touch_app_store_locale
- name: ga_first_touch_app_store_locale
description: The locale of the first touch in the app store.
- name: shop_subdomain
description: The foreign key of the Shop.
description: The foreign key for the Shop.
tests:
- not_null
- unique
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
version: 2
models:
- name: stg_ga_install_events
- name: int_ga_install_events
description: ""
columns:
- name: USER_PSEUDO_ID
description: The anonymouse Google Analytics user ID
tests:
- not_null
- name: SHOP_SUBDOMAIN
description: The foreign key of the Shop.
- name: EVENT_TIMESTAMP
Expand Down
123 changes: 123 additions & 0 deletions models/google_analytics/intermediate/int_app_store_attribution.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
WITH
shops AS (
SELECT
shop_subdomain,
first_installed_at_pt
FROM {{ ref('stg_shops') }}
),

app_store_install_events as (
select
shop_subdomain,
{{ get_prefixed_columns(ref("int_ga_app_store_page_events"), 'app_store_install', exclude=['event_name']) }}
from {{ ref("int_ga_app_store_page_events") }}
where event_name = 'shopify_app_install'
qualify
row_number() over (
partition by shop_subdomain order by event_timestamp_pt asc
)
= 1
),

app_store_add_app_button_events as (
select
shop_subdomain,
{{ get_prefixed_columns(ref("int_ga_app_store_page_events"), 'app_store_add_app', exclude=['event_name']) }}
from {{ ref("int_ga_app_store_page_events") }}
where event_name = 'Add App button'
qualify
row_number() over (
partition by shop_subdomain order by event_timestamp_pt asc
)
= 1
),

combined_app_store_install_events AS (
{%- set column_names = dbt_utils.get_filtered_columns_in_relation(
from=ref('int_ga4_events'),
except=['event_timestamp_pt', 'shop_subdomain', 'user_pseudo_id', 'event_name']
)
%}
SELECT
shop_subdomain,
{%- for column_name in column_names %}
COALESCE(app_store_install_{{ column_name }}, app_store_add_app_{{ column_name }}) as app_store_install_{{ column_name }}
{%- if not loop.last %},{% endif %}
{%- endfor %},
COALESCE(app_store_install_at_pt, app_store_add_app_at_pt) as app_store_install_at_pt
FROM app_store_add_app_button_events
INNER JOIN app_store_install_events USING (shop_subdomain)
),

app_store_ad_clicks as (
select
shop_subdomain,
{{ get_prefixed_columns(ref("int_ga_app_store_page_events"), 'app_store_ad_click', exclude=['event_name']) }}
from {{ ref("int_ga_app_store_page_events") }}
where page_location ilike '%search_ad%' or event_name = 'shopify_ad_click'
qualify
row_number() over (
partition by shop_subdomain order by event_timestamp_pt asc
)
= 1
),

app_store_ad_click_counts as (
select
shop_subdomain,
count_if(
app_store_ad_click_at_pt
<= first_installed_at_pt + interval '60min'
)
> 0 as app_store_did_click_ad_before_install,
count(app_store_ad_clicks.app_store_ad_click_page_location)
> 0 as app_store_did_click_ad
from shops
left join app_store_ad_clicks using (shop_subdomain)
group by 1
),

app_store_organic_clicks as (
select
shop_subdomain,
{{ get_prefixed_columns(ref("int_ga_app_store_page_events"), 'app_store_organic_click', exclude=['event_name']) }}
from {{ ref("int_ga_app_store_page_events") }}
where
page_location ilike '%surface_type=%'
and page_location not ilike '%search_ad%'
and event_name = 'session_start'
qualify
row_number() over (
partition by shop_subdomain order by event_timestamp_pt asc
)
= 1
),

final AS (
SELECT * EXCLUDE (first_installed_at_pt),
coalesce(
app_store_ad_click_app_store_surface_type is not NULL, FALSE
) as app_store_has_ad_click,
coalesce(
app_store_organic_click_app_store_surface_type is not NULL, FALSE
) as has_app_store_organic_click,

case
when
app_store_has_ad_click = TRUE and has_app_store_organic_click = TRUE
then 'app_store_ad_click_and_organic_click'
when app_store_has_ad_click = TRUE
then 'app_store_ad_click'
when has_app_store_organic_click = TRUE
then 'app_store_organic_click'
else '(direct or predates tracking)'
end as app_store_click_type
FROM shops
LEFT JOIN combined_app_store_install_events USING (shop_subdomain)
LEFT JOIN app_store_ad_clicks USING (shop_subdomain)
LEFT JOIN app_store_organic_clicks USING (shop_subdomain)
LEFT JOIN app_store_ad_click_counts USING (shop_subdomain)
)

SELECT *
FROM final
27 changes: 27 additions & 0 deletions models/google_analytics/intermediate/int_ga4_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
WITH
user_matching AS (SELECT * FROM {{ ref("stg_anonymous_to_known_user_matching") }}),

staged_ga4_events AS (
SELECT *
FROM {{ ref("stg_ga4_events") }}
),

final AS (

SELECT
user_matching.shop_subdomain,
staged_ga4_events.*
EXCLUDE (shopify_id, shop_subdomain)

FROM staged_ga4_events
LEFT JOIN user_matching
WHERE
staged_ga4_events.user_pseudo_id = user_matching.user_pseudo_id
OR
staged_ga4_events.shopify_id::STRING = user_matching.shopify_id::STRING
OR
staged_ga4_events.shop_subdomain = user_matching.shop_subdomain
)

SELECT *
FROM final
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
with

source AS (
SELECT *
FROM {{ ref("int_ga4_events") }}
WHERE
page_location ilike '%apps.shopify.com%'
OR event_name ilike 'shopify%'
OR page_location ilike '%surface_%'

)

SELECT *
FROM source
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version: 2
models:
- name: stg_ga_app_store_page_events
- name: int_ga_app_store_page_events
columns:
- name: user_pseudo_id
description: the anonymouse google analytics user id
Expand Down
38 changes: 32 additions & 6 deletions models/google_analytics/intermediate/int_ga_attribution.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,49 @@ with
0
) as ga4_sessions_til_install
from shops
left join {{ ref("stg_ga_session_starts") }} using (shop_subdomain)
left join {{ ref("int_ga_session_starts") }} using (shop_subdomain)
group by 1
),

first_touches_ga4 as (select * from {{ ref("stg_ga_first_visits") }}),
first_touches_ga4 as (select * from {{ ref("int_ga_first_visits") }}),

last_touches_ga4 as (
select * from {{ ref("stg_last_touch_ga_sessions_before_install") }}
select * from {{ ref("int_last_touch_ga_sessions_before_install") }}
),

final as (
combined as (

select * exclude (user_pseudo_id)
from first_touches_ga4
select *
FROM first_touches_ga4
left join last_touches_ga4 using (shop_subdomain)
left join session_counts using (shop_subdomain)
),

final AS (
SELECT *
REPLACE (
{%- set reformatted_fields = [] -%}
{%- for prefix in ['first_touch', 'last_touch'] -%}
{%- for midfix in ['traffic_source', 'manual', 'param'] -%}
{%- for endfix in ['source', 'medium', 'campaign_name', 'term', 'content' ] -%}
{%- if (endfix=='campaign_name') -%}
{% if midfix=='traffic_source' %}
{% set endfix = 'name' %}
{%- elif midfix=='param' %}
{% set endfix = 'campaign' %}
{%- endif %}
{%- elif (midfix=='traffic_source' and (endfix=='term' or endfix=='content')) -%}
{%- continue -%}
{%- endif -%}
{%- set column_name = [prefix, midfix, endfix] | join('_') -%}
{%- do reformatted_fields.append("initcap(replace(" ~ column_name ~ ", '_', ' ')) as " ~ column_name) -%}
{% endfor %}
{% endfor %}
{% endfor %}
{{ reformatted_fields | join(',\n ') }}
)
FROM combined
)

select * EXCLUDE (parsed_url)
from final
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ models:
description: The path of the first touch.
- name: first_touch_content
description: The content that the shop was acquired through.
- name: first_touch_campaign
- name: first_touch_traffic_source_name
description: The campaign that the shop was acquired through.
- name: first_touch_medium
- name: first_touch_traffic_source_medium
description: The medium that the shop was acquired through.
- name: first_touch_source
- name: first_touch_traffic_source_source
description: The source that the shop was acquired through.
- name: first_touch_referrer_host
description: The referrer host that referrered the shop's first visit.
Expand Down
Loading

0 comments on commit 5ac23a4

Please sign in to comment.