Skip to content

Commit

Permalink
Position internal traffic and Shopify Forum traffic correctly and fil…
Browse files Browse the repository at this point in the history
…ter out duplicate events AND more accurately specify that page referrers are just for that pageview.
  • Loading branch information
JonCrawford committed Apr 5, 2024
1 parent e2df201 commit 00c1dce
Show file tree
Hide file tree
Showing 14 changed files with 103 additions and 113 deletions.
8 changes: 0 additions & 8 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,3 @@ models:
+schema: support
intermediate:
+schema: support

tests:
shoppad:
mesa:
staging:
enabled: true #"{{ target.name not in ['prod', 'default'] }}"
intermediate:
enabled: true #"{{ target.name not in ['prod', 'default'] }}"
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ models:
description: The medium that the shop was acquired through.
- name: ga_first_touch_traffic_source_source
description: The source that the shop was acquired through.
- name: ga_first_touch_referrer_host
- name: ga_first_touch_page_referrer_host
description: The referrer host that referrered the shop's first visit.
- name: ga_first_touch_app_store_surface_detail
description: The detail of the app store surface during the first touch event.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ models:
description: ""
- name: REFERRER
description: ""
- name: REFERRER_HOST
- name: page_referrer_host
description: ""
- name: REFERRER_SOURCE
- name: page_referrer_SOURCE
description: ""
- name: REFERRER_MEDIUM
- name: page_referrer_MEDIUM
description: ""
- name: SHOP_ID
description: ""
14 changes: 7 additions & 7 deletions models/google_analytics/intermediate/int_ga4_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,17 @@ reformatted AS (
WHEN lower(traffic_source_medium) = '(none)'
THEN
CASE
WHEN lower(referrer_host) = 'apps.shopify.com'
WHEN lower(page_referrer_host) = 'apps.shopify.com'
THEN 'App Store - Direct/Other'
ELSE COALESCE(
param_medium,
IFF(
referrer_full IS NOT NULL,
nullif(PARSE_URL('https://' || referrer_full):parameters:utm_medium, ''),
page_referrer_full IS NOT NULL,
nullif(PARSE_URL('https://' || page_referrer_full):parameters:utm_medium, ''),
NULL
),
IFF(
referrer_full ILIKE '%apps.shopify.com%', 'App Store - Direct/Other', 'direct'
page_referrer_full ILIKE '%apps.shopify.com%', 'App Store - Direct/Other', 'direct'
) {# Do a bunch of stuff to override direct because of the way the
App Store works. #}
)
Expand All @@ -62,12 +62,12 @@ reformatted AS (
THEN COALESCE(
param_source,
IFF(
referrer_full IS NOT NULL,
nullif(PARSE_URL('https://' || referrer_full):parameters:utm_source, ''),
page_referrer_full IS NOT NULL,
nullif(PARSE_URL('https://' || page_referrer_full):parameters:utm_source, ''),
NULL
),
IFF(
referrer_full ILIKE '%apps.shopify.com%', 'shopify', 'direct'
page_referrer_full ILIKE '%apps.shopify.com%', 'shopify', 'direct'
) {# Do a bunch of stuff to override direct because of the way
the App Store works. #}
)
Expand Down
32 changes: 14 additions & 18 deletions models/google_analytics/staging/stg_ga4_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ with
)
{# TODO: Use the above only for everything after the first ga_session_id is present. #}
{# TODO: Then create another condition that looks to all the UA (pre-GA4 events) before that date. #}

{# We still get duplicates sometimes. #}
QUALIFY ROW_NUMBER() OVER (PARTITION BY user_pseudo_id, event_name, event_timestamp ORDER BY source) = 1
),

filtered_raw_ga4_events AS (
Expand Down Expand Up @@ -95,16 +98,10 @@ with
{# Filter out the referral medium and source if it's our surface area. #}
CASE
WHEN param_source = 'shopify_forums' THEN 'referral'
WHEN medium ILIKE '%referral%'
AND (page_referrer ILIKE '%shopify.com%' OR page_referrer ILIKE '%getmesa.com%' OR page_referrer ILIKE '%shoppad.com%')
THEN 'internal'
ELSE medium
else medium
END as traffic_source_medium,
CASE
WHEN param_source = 'shopify_forums' THEN 'Shopify Forums'
WHEN name ILIKE '%referral%'
AND (page_referrer ILIKE '%shopify.com%' OR page_referrer ILIKE '%getmesa.com%' OR page_referrer ILIKE '%shoppad.com%')
THEN '(internal)'
ELSE name
END as traffic_source_name,

Expand All @@ -113,7 +110,6 @@ with
ELSE source
END as traffic_source_source,

{# Remove referrer when traffic_source_medium is referral and referrer is our own property. #}
CASE
WHEN param_source = 'shopify_forums' THEN parse_url('https://community.shopify.com/')
WHEN traffic_source_medium ILIKE '%referral%' OR traffic_source_name ILIKE '%referral%'
Expand All @@ -122,11 +118,11 @@ with
ELSE parse_url(page_referrer)
END as parsed_referrer,

parsed_referrer:host::STRING as referrer_host,
'/' || parsed_referrer:path as referrer_path,
'?' || parsed_referrer:query as referrer_query,
referrer_host || referrer_path AS referrer_url,
referrer_host || referrer_path || COALESCE(referrer_query, '') AS referrer_full,
parsed_referrer:host::STRING as page_referrer_host,
'/' || parsed_referrer:path as page_referrer_path,
'?' || parsed_referrer:query as page_referrer_query,
page_referrer_host || page_referrer_path AS page_referrer_url,
page_referrer_host || page_referrer_path || COALESCE(page_referrer_query, '') AS page_referrer_full,

CASE
WHEN param_source = 'shopify_forums' THEN 'referral'
Expand Down Expand Up @@ -178,11 +174,11 @@ with
"traffic_source_name",
"traffic_source_source",
"traffic_source_medium",
"referrer_host",
"referrer_full",
"referrer_url",
"referrer_query",
"referrer_path",
"page_referrer_host",
"page_referrer_full",
"page_referrer_url",
"page_referrer_query",
"page_referrer_path",
"shop_subdomain"
] %}

Expand Down
10 changes: 5 additions & 5 deletions models/mesa/ga4_events.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ models:
- name: page_location
- name: param_content
- name: param_term
- name: referrer_host
- name: referrer_path
- name: referrer_query
- name: referrer_url
- name: referrer_full
- name: page_referrer_host
- name: page_referrer_path
- name: page_referrer_query
- name: page_referrer_url
- name: page_referrer_full
- name: app_store_surface_detail
- name: app_store_surface_type
- name: app_store_surface_intra_position
Expand Down
10 changes: 5 additions & 5 deletions models/mesa/int_ga4_events.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ models:
- name: page_location
- name: param_content
- name: param_term
- name: referrer_host
- name: referrer_path
- name: referrer_query
- name: referrer_url
- name: referrer_full
- name: page_referrer_host
- name: page_referrer_path
- name: page_referrer_query
- name: page_referrer_url
- name: page_referrer_full
- name: app_store_surface_detail
- name: app_store_surface_type
- name: app_store_surface_intra_position
Expand Down
6 changes: 3 additions & 3 deletions models/mesa/int_simplified_shop_attribution.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ models:
description: The type of surface in the app store during acquisition.
- name: acq_unified_app_store_surface_detail
description: The detail of the surface in the app store during acquisition.
- name: acq_unified_referrer_host
- name: acq_unified_page_referrer_host
description: The host of the referrer during acquisition.
- name: acq_is_blog_referral
description:
Expand All @@ -38,7 +38,7 @@ models:
description: The number of days it took to install after acquisition.
- name: acq_is_app_store_search_ad_referral
description: Whether the acquisition is a referral from an app store search ad.
- name: acq_referrer_medium
- name: acq_page_referrer_medium
description: The medium of the referrer during acquisition.
- name: acq_referrer_source
- name: acq_page_referrer_source
description: The source of the referrer during acquisition.
4 changes: 2 additions & 2 deletions models/mesa/intermediate/_int_shop_install_sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ models:
description: The campaign that the shop was acquired through.
- name: ga_first_touch_content
description: The content that the shop was acquired through.
- name: ga_first_touch_referrer
- name: ga_first_touch_page_referrer
description: The referrer URL that referred the Shop's first visit.
- name: ga_first_touch_referrer_host
- name: ga_first_touch_page_referrer_host
description: The referrer host that referrered the shop's first visit.
- name: ga_first_touch_first_page_path
description: The first page path that the shop was acquired through.
Expand Down
49 changes: 26 additions & 23 deletions models/mesa/intermediate/int_shop_install_sources.sql
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,16 @@ combined_attribution AS (
app_store_organic_click_app_store_surface_detail
) AS unified_app_store_surface_detail,
REPLACE(COALESCE(
ga_first_touch_referrer_host,
ga_last_touch_referrer_host,
{# Fixes a weird GA4 thing that doesn't attribute referral traffic to Youtube.com #}
IFF(unified_traffic_source ILIKE '%youtube%', 'youtube.com', NULL),
ga_first_touch_page_referrer_host,
ga_last_touch_page_referrer_host,
segment_first_touch_referrer_host,
segment_last_touch_referrer_host,
app_store_install_referrer_host,
app_store_ad_click_referrer_host,
app_store_organic_click_referrer_host
), 'www.', '') AS unified_referrer_host,
app_store_install_page_referrer_host,
app_store_ad_click_page_referrer_host,
app_store_organic_click_page_referrer_host
), 'www.', '') AS unified_page_referrer_host,

{# Chain Columns #}

Expand Down Expand Up @@ -287,15 +289,15 @@ combined_attribution AS (

REPLACE(ARRAY_TO_STRING(
ARRAY_CONSTRUCT(
ga_first_touch_referrer_host,
ga_last_touch_referrer_host,
ga_first_touch_page_referrer_host,
ga_last_touch_page_referrer_host,
segment_first_touch_referrer_host,
segment_last_touch_referrer_host,
app_store_install_referrer_host,
app_store_ad_click_referrer_host,
app_store_organic_click_referrer_host
app_store_install_page_referrer_host,
app_store_ad_click_page_referrer_host,
app_store_organic_click_page_referrer_host
), ''
), 'www.', '') AS unified_referrer_host_chain
), 'www.', '') AS unified_page_referrer_host_chain

FROM shops
LEFT JOIN formatted_install_records USING (shop_subdomain)
Expand All @@ -304,7 +306,7 @@ combined_attribution AS (
LEFT JOIN segment_attribution USING (shop_subdomain)
),

referrer_mapping as (select * FROM {{ ref("referrer_mapping") }}),
page_referrer_mapping as (select * FROM {{ ref("referrer_mapping") }}),

reformatted AS (
SELECT
Expand All @@ -313,9 +315,9 @@ reformatted AS (
{# Referrer Mapping #}
INITCAP(
COALESCE(
referrer_mapping.source,
page_referrer_mapping.source,
IFF(
(unified_traffic_source IS NULL AND unified_referrer_host ILIKE '%shopify%')
(unified_traffic_source IS NULL AND unified_page_referrer_host ILIKE '%shopify%')
OR
unified_traffic_source ILIKE '%shopify%',
'Shopify',
Expand All @@ -325,27 +327,28 @@ reformatted AS (
) AS unified_traffic_source,
INITCAP(
COALESCE(
referrer_mapping.medium,
page_referrer_mapping.medium,
IFF(
(unified_traffic_medium IS NULL AND unified_referrer_host ILIKE '%apps.shopify.com%') OR unified_traffic_medium ILIKE '%apps.shopify.com%',
(unified_traffic_medium IS NULL AND unified_page_referrer_host ILIKE '%apps.shopify.com%')
OR unified_traffic_medium ILIKE '%apps.shopify.com%',
'app store',
unified_traffic_medium
)
)
) AS unified_traffic_medium,

referrer_mapping.medium AS referrer_medium,
referrer_mapping.source AS referrer_source
page_referrer_mapping.medium AS page_referrer_medium,
page_referrer_mapping.source AS page_referrer_source
FROM shops
LEFT JOIN combined_attribution USING (shop_subdomain)
LEFT JOIN
referrer_mapping
page_referrer_mapping
ON
LOWER(REPLACE(combined_attribution.unified_traffic_source, 'www.', ''))
= lower(referrer_mapping.host)
= lower(page_referrer_mapping.host)
OR
lower(REPLACE(combined_attribution.unified_referrer_host, 'www.', ''))
= lower(referrer_mapping.host)
lower(REPLACE(combined_attribution.unified_page_referrer_host, 'www.', ''))
= lower(page_referrer_mapping.host)
),

final AS (
Expand Down
8 changes: 4 additions & 4 deletions models/mesa/marts/_shops.yml
Original file line number Diff line number Diff line change
Expand Up @@ -856,15 +856,15 @@ models:
description: The campaign of the traffic during acquisition.
- name: acq_unified_traffic_source
description: The source of the traffic during acquisition.
- name: acq_unified_referrer_host
- name: acq_unified_page_referrer_host
description: The host of the referrer during acquisition.
- name: acq_unified_traffic_url
description: The URL of the traffic during acquisition.
- name: acq_unified_traffic_path
description: The path of the traffic during acquisition.
- name: acq_referrer_source
- name: acq_page_referrer_source
description: The source of the referrer during acquisition.
- name: acq_referrer_medium
- name: acq_page_referrer_medium
description: The medium of the referrer during acquisition.
- name: acq_unified_traffic_page_host
description: The host of the traffic page during acquisition.
Expand Down Expand Up @@ -894,7 +894,7 @@ models:
description: The URL path chain of important touch points during acquisition.
- name: acq_unified_page_host_chain
description: The host chain of important touch points during acquisition.
- name: acq_unified_referrer_host_chain
- name: acq_unified_page_referrer_host_chain
description: The referrer host chain of important touch points during acquisition.
- name: first_workflow_destination_operation_id
description: The operation ID of the first workflow destination.
Expand Down
Loading

0 comments on commit 00c1dce

Please sign in to comment.