Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Polishing repo #81

Merged
merged 3 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions tinybird/datasources/analytics_events.datasource
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
TOKEN tracker APPEND

DESCRIPTION >
Analytics events landing data source

TOKEN "tracker" APPEND

SCHEMA >
`timestamp` DateTime `json:$.timestamp`,
`session_id` String `json:$.session_id`,
`action` LowCardinality(String) `json:$.action`,
`version` LowCardinality(String) `json:$.version`,
`payload` String `json:$.payload`

ENGINE "MergeTree"
ENGINE_PARTITION_KEY "toYYYYMM(timestamp)"
ENGINE_SORTING_KEY "timestamp"
ENGINE_TTL "timestamp + toIntervalDay(60)"
ENGINE MergeTree
ENGINE_PARTITION_KEY toYYYYMM(timestamp)
ENGINE_SORTING_KEY timestamp
ENGINE_TTL timestamp + toIntervalDay(60)
12 changes: 12 additions & 0 deletions tinybird/datasources/analytics_pages_mv.datasource
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
SCHEMA >
`date` Date,
`device` String,
`browser` String,
`location` String,
`pathname` String,
`visits` AggregateFunction(uniq, String),
`hits` AggregateFunction(count)

ENGINE AggregatingMergeTree
ENGINE_PARTITION_KEY toYYYYMM(date)
ENGINE_SORTING_KEY date, device, browser, location, pathname
13 changes: 13 additions & 0 deletions tinybird/datasources/analytics_sessions_mv.datasource
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
SCHEMA >
`date` Date,
`session_id` String,
`device` SimpleAggregateFunction(any, String),
`browser` SimpleAggregateFunction(any, String),
`location` SimpleAggregateFunction(any, String),
`first_hit` SimpleAggregateFunction(min, DateTime),
`latest_hit` SimpleAggregateFunction(max, DateTime),
`hits` AggregateFunction(count)

ENGINE AggregatingMergeTree
ENGINE_PARTITION_KEY toYYYYMM(date)
ENGINE_SORTING_KEY date, session_id
12 changes: 12 additions & 0 deletions tinybird/datasources/analytics_sources_mv.datasource
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
SCHEMA >
`date` Date,
`device` String,
`browser` String,
`location` String,
`referrer` String,
`visits` AggregateFunction(uniq, String),
`hits` AggregateFunction(count)

ENGINE AggregatingMergeTree
ENGINE_PARTITION_KEY toYYYYMM(date)
ENGINE_SORTING_KEY date, device, browser, location, referrer
82 changes: 44 additions & 38 deletions tinybird/pipes/analytics_hits.pipe
Original file line number Diff line number Diff line change
@@ -1,52 +1,58 @@
TOKEN dashboard READ

DESCRIPTION >
Parsed `page_hit` events, implementing `browser` and `device` detection logic.

TOKEN "dashboard" READ

NODE parsed_hits
DESCRIPTION >
Parse raw page_hit events

SQL >
SELECT
timestamp,
action,
version,
coalesce(session_id,'0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM
analytics_events
timestamp,
action,
version,
coalesce(session_id, '0') as session_id,
JSONExtractString(payload, 'locale') as locale,
JSONExtractString(payload, 'location') as location,
JSONExtractString(payload, 'referrer') as referrer,
JSONExtractString(payload, 'pathname') as pathname,
JSONExtractString(payload, 'href') as href,
lower(JSONExtractString(payload, 'user-agent')) as user_agent
FROM analytics_events
where action = 'page_hit'

NODE endpoint
SQL >
SELECT
timestamp,
action,
version,
session_id,
location,
referrer,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot') then 'bot'
when match(user_agent, 'android') then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod') then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox') then 'firefox'
when match(user_agent, 'chrome|crios') then 'chrome'
when match(user_agent, 'opera') then 'opera'
when match(user_agent, 'msie|trident') then 'ie'
when match(user_agent, 'iphone|ipad|safari') then 'safari'
else 'Unknown'
END as browser
FROM
parsed_hits
timestamp,
action,
version,
session_id,
location,
referrer,
pathname,
href,
case
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
then 'bot'
when match(user_agent, 'android')
then 'mobile-android'
when match(user_agent, 'ipad|iphone|ipod')
then 'mobile-ios'
else 'desktop'
END as device,
case
when match(user_agent, 'firefox')
then 'firefox'
when match(user_agent, 'chrome|crios')
then 'chrome'
when match(user_agent, 'opera')
then 'opera'
when match(user_agent, 'msie|trident')
then 'ie'
when match(user_agent, 'iphone|ipad|safari')
then 'safari'
else 'Unknown'
END as browser
FROM parsed_hits
28 changes: 11 additions & 17 deletions tinybird/pipes/analytics_pages.pipe
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,16 @@ DESCRIPTION >
Aggregate by pathname and calculate session and hits

SQL >
select
toDate(timestamp) as date,
device,
browser,
location,
pathname,
uniqState(session_id) as visits,
countState() as hits
from
analytics_hits
group by
date,
device,
browser,
location,
pathname
SELECT
toDate(timestamp) AS date,
device,
browser,
location,
pathname,
uniqState(session_id) AS visits,
countState() AS hits
FROM analytics_hits
GROUP BY date, device, browser, location, pathname

TYPE materialized
TYPE MATERIALIZED
DATASOURCE analytics_pages_mv
27 changes: 12 additions & 15 deletions tinybird/pipes/analytics_sessions.pipe
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,17 @@ DESCRIPTION >
Aggregate by session_id and calculate session metrics

SQL >
select
toDate(timestamp) as date,
session_id,
anySimpleState(device) as device,
anySimpleState(browser) as browser,
anySimpleState(location) as location,
minSimpleState(timestamp) as first_hit,
maxSimpleState(timestamp) as latest_hit,
countState() as hits
from
analytics_hits
group by
date,
session_id
SELECT
toDate(timestamp) AS date,
session_id,
anySimpleState(device) AS device,
anySimpleState(browser) AS browser,
anySimpleState(location) AS location,
minSimpleState(timestamp) AS first_hit,
maxSimpleState(timestamp) AS latest_hit,
countState() AS hits
FROM analytics_hits
GROUP BY date, session_id

TYPE materialized
TYPE MATERIALIZED
DATASOURCE analytics_sessions_mv
33 changes: 13 additions & 20 deletions tinybird/pipes/analytics_sources.pipe
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,18 @@ DESCRIPTION >
Aggregate by referral and calculate session and hits

SQL >
with (select domainWithoutWWW(href) from analytics_hits limit 1) as currenct_domain
select
toDate(timestamp) AS date,
device,
browser,
location,
referrer,
uniqState(session_id) AS visits,
countState() AS hits
from
analytics_hits
where
domainWithoutWWW(referrer) <> currenct_domain
group by
date,
device,
browser,
location,
referrer
WITH (SELECT domainWithoutWWW(href) FROM analytics_hits LIMIT 1) AS currenct_domain
SELECT
toDate(timestamp) AS date,
device,
browser,
location,
referrer,
uniqState(session_id) AS visits,
countState() AS hits
FROM analytics_hits
WHERE domainWithoutWWW(referrer) != currenct_domain
GROUP BY date, device, browser, location, referrer

TYPE materialized
TYPE MATERIALIZED
DATASOURCE analytics_sources_mv
Loading
Loading