Skip to content

Commit

Permalink
Workflows, steps, runs, and shops all V1 with testing and light docs
Browse files Browse the repository at this point in the history
  • Loading branch information
JonCrawford committed Oct 19, 2022
0 parents commit b6ad873
Show file tree
Hide file tree
Showing 32 changed files with 1,472 additions and 0 deletions.
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

target/
dbt_packages/
logs/
.venv
target/
dbt_modules/
dbt_packages/
logs/
venv/
env/
test.env
__pycache__
33 changes: 33 additions & 0 deletions .sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[sqlfluff]
templater = dbt
dialect = snowflake
exclude_rules = L010, L030, L032
verbose = 1

[sqlfluff:templater:dbt]
project_dir="./"
profiles_dir = "~/.dbt/"
profile = "shoppad"
target = "dev"

[sqlfluff:rules]
tab_space_size = 4
max_line_length = 160
indent_unit = space
comma_style = trailing
allow_scalar = True
single_table_references = consistent
unquoted_identifiers_policy = all

[sqlfluff:templater:jinja:context]
dbt_date= {"time_zone": "America/Los_Angeles"}

[sqlfluff:templater:jinja]
apply_dbt_builtins = true

[sqlfluff:templater:jinja:macros]

# Specific Rule rules
[sqlfluff:rules:L066]
min_alias_length = 4
max_alias_length = None
3 changes: 3 additions & 0 deletions .sqlfluffignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
target/ # dbt <1.0.0
dbt_packages/
macros/
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Welcome to your new dbt project!

### Using the starter project

Try running the following commands:
- dbt run
- dbt test


### Resources:
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
- Find [dbt events](https://events.getdbt.com) near you
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
Empty file added analyses/.gitkeep
Empty file.
503 changes: 503 additions & 0 deletions analyses/original_views.sql

Large diffs are not rendered by default.

63 changes: 63 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'shoppad'
version: '1.0.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'shoppad'

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"

vars:
"dbt_date:time_zone": "America/Los_Angeles"

seeds:
+schema: seed_data

# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models

# In this example config, we tell dbt to build all models in the example/ directory
# as tables. These settings can be overridden in the individual model files
# using the `{{ config(...) }}` macro.
models:
shoppad:
+transient: true
+materialized: table
# Config indicated by + and applies to all files under models/example/
# example:
# +materialized: view
mesa:
mart:
step_runs:
+cluster_by: ['shop_id', 'shop_subdomain']

workflow_runs:
+cluster_by: ['shop_id', 'shop_subdomain', 'workflow_id']

shops:
+cluster_by: ["date_trunc('day', first_installed_at)"]

staging:
+materialized: view
+schema: staging

stg_step_runs:
+materialized: incremental
# +cluster_by: ['workflow_run_id']
Empty file added macros/.gitkeep
Empty file.
10 changes: 10 additions & 0 deletions macros/get_max_updated_at.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{# FROM https://dm03514.medium.com/beware-of-dbt-incremental-updates-against-snowflake-external-tables-beeda513e748 #}
{% macro get_max_updated_at() %}
{% if execute and is_incremental() %}
{% set query %}
SELECT max(updated_at) FROM {{ this }};
{% endset %}
{% set max_updated_at = run_query(query).columns[0][0] %}
{% do return(max_updated_at) %}
{% endif %}
{% endmacro %}
4 changes: 4 additions & 0 deletions macros/groomed_column_list.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{% macro groomed_column_list(relation_object, columns_to_skip=[]) %}
{% set hevo_columns = ['__HEVO__DATABASE_NAME', '__HEVO__INGESTED_AT', '__HEVO__LOADED_AT', '__HEVO__MARKED_DELETED', '__HEVO_ID'] %}
{{ dbt_utils.get_filtered_columns_in_relation(from=relation_object, except=columns_to_skip + hevo_columns) | join(",\n") }}
{% endmacro %}
3 changes: 3 additions & 0 deletions macros/pacific_timestamp.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{%- macro pacific_timestamp(timestamp_str) -%}
{{ dbt_date.convert_timezone(timestamp_str, "America/Los_Angeles", 'UTC') }}
{%- endmacro -%}
65 changes: 65 additions & 0 deletions mesa_dbt.code-workspace
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"folders": [
{
"path": "."
}
],
"settings": {
"git.ignoreLimitWarning": true,
"editor.tokenColorCustomizations": {},
"sqlfluff.dialect": "snowflake",
"sqlfluff.rules": [],
"sqlfluff.linter.run": "onSave",
"sqlfluff.experimental.format.executeInTerminal": true,
"editor.formatOnSave": false,
"dbt.queryTemplate": "select * from ({query}) as osmosis_query limit {limit}",
"sqlfluff.excludeRules": ["L010", "L030", "L032"],
"dbt-formatter.dialect": "snowflake",
"[sql]": {
"editor.defaultFormatter": "dorzey.vscode-sqlfluff"
},
"sqltools.connections": [
{
"authenticator": "SNOWFLAKE",
"ocspOptions": {
"ocspFailOpen": true
},
"snowflakeOptions": {
"clientSessionKeepAlive": true,
"clientSessionKeepAliveHeartbeatFrequency": 3600,
"schema": "DBT_JCRAW",
"role": "PIPELINE"
},
"previewLimit": 50,
"driver": "Snowflake",
"name": "Mesa Data Warehouse",
"account": "dccnrvg-zaa52567",
"username": "DBT_CRAWFORD",
"password": "qt@nFb9bLc_!WKdnxkj_c9nW",
"database": "MONGO",
"warehouse": "MONGO_WAREHOUSE",
"group": "Mesa"
}
],
"dbt.previewPanel.displayLocation": "vertical",
"files.associations": {
"*.sql": "jinja-sql",
"*.yml": "jinja-yaml"
},
"sqlfluff.executablePath": "",
"dbt-formatter.lowerWords": false,
"sqlfluff.format.enabled": true,
"sqlfluff.suppressNotifications": false
},
"extensions": {
"recommendations": [
"dorzey.vscode-sqlfluff",
"mtxr.sqltools",
"koszti.snowflake-driver-for-sqltools"
]
},

"editor.quickSuggestions": {
"strings": true
}
}
8 changes: 8 additions & 0 deletions models/mesa/docs/stg_shops.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{% docs staging_shops %}

This table contains decorated, deduplicated Shop/Merchant records. The records are imported from the Mongo source import tables generated by HEVO.

Some shop records are filtered out that meet the following criteria.
- deleted: `(__hevo__marked_deleted)`
- certain Shopify plans: `shopify_plan_name NOT IN ('affiliate', 'partner_test', 'plus_partner_sandbox')`
{% enddocs %}
26 changes: 26 additions & 0 deletions models/mesa/mart/mesa_charges.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
WITH shops AS (
SELECT * FROM {{ ref('stg_shops') }}
),

charges AS (
SELECT
_id AS charge_id,
merchant_id AS shop_id,
*
FROM {{ source('mesa_mongo', 'mesa_charges') }}
),

final AS (
SELECT
shops.shop_id,
shop_subdomain,
subscription_id,
billed_count,
billed_amount,
{{ pacific_timestamp('_CREATED_AT') }} AS charged_at_pt,
DATE_TRUNC('day', charged_at_pt)::date AS charged_on_pt
FROM charges
INNER JOIN shops ON (array_contains(charges.shop_id::variant, shops.all_shop_ids))
)

SELECT * FROM final
93 changes: 93 additions & 0 deletions models/mesa/mart/rollups/mesa_growth_accounting.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
-- This is a translated query which can be run ON Snowflake. Originally written BY Jonathan Hsu (Tribe Capital),
-- who shared the same analytical pattern to run in PostgreSQL, linked ON this blog post:
-- https://tribecap.co/a-quantitative-approach-to-product-market-fit/

WITH dau AS (
-- This part of the query can be pretty much anything.
-- The only requirement is that it have three columns:
-- dt, shop_id, inc_amt
-- Where dt is a date and shop_id is some unique identifier for a user.
-- Each dt-shop_id pair should be unique in this table.
-- inc_amt represents the amount of value that this user created ON dt.
-- The most common CASE is
-- inc_amt = incremental revenue FROM the user ON dt
-- If you want to do L28 growth accounting, user inc_amt=1.
-- The version here derives everything FROM the tutorial.yammer_events
-- data set provided for free BY Mode.
-- If you edit just this part to represent your data, the rest
-- of the query should run just fine.
-- The query here is a sample that works in the public Mode Analytics
-- tutorial.
SELECT
shop_id AS shop_id,
to_date(charged_on_pt) AS dt,
SUM(inc_amount) AS inc_amt
FROM {{ ref('mesa_shop_days') }}
GROUP by
1,
2
),

mau AS (
SELECT
date_trunc('month', dt) AS month,
shop_id,
sum(inc_amt) AS inc_amt
FROM dau
GROUP BY
1,
2
),

-- This determines the cohort date of each user. In this CASE we are
-- deriving it FROM DAU data but you can feel free to replace it with
-- registration date if that's more appropriate.
first_dt AS (
SELECT
shop_id,
min(dt) AS first_dt,
date_trunc('week', min(dt)) AS first_week,
date_trunc('month', min(dt)) AS first_month
FROM dau
GROUP BY 1
),

mau_decorated AS (
SELECT
mau.month,
mau.shop_id,
mau.inc_amt,
first_dt.first_month
FROM mau
INNER JOIN first_dt ON (mau.shop_id = first_dt.shop_id) and mau.inc_amt > 0
),

-- This is MAU growth accounting. Note that this does not require any
-- information about inc_amt. As discussed in the articles, these
-- quantities satisfy some identities:
-- MAU(t) = retained(t) + new(t) + resurrected(t)
-- MAU(t - 1 month) = retained(t) + churned(t)
mau_growth_accounting AS (
SELECT
coalesce(tm.month, dateadd(month, 1, lm.month)) AS month,
count(distinct tm.shop_id) AS mau,
count(distinct CASE WHEN lm.shop_id is not NULL THEN tm.shop_id
ELSE NULL end) AS retained,
count(distinct CASE WHEN tm.first_month = tm.month THEN tm.shop_id
ELSE NULL end) AS new,
count(distinct CASE WHEN tm.first_month != tm.month
and lm.shop_id is NULL THEN tm.shop_id ELSE NULL END
) AS resurrected,
-1 * count(distinct CASE WHEN tm.shop_id is NULL THEN lm.shop_id ELSE NULL end) AS churned
FROM
mau_decorated AS tm
FULL OUTER JOIN mau_decorated AS lm ON (
tm.shop_id = lm.shop_id
and tm.month = dateadd(month, 1, lm.month)
)
GROUP BY 1
ORDER BY 1
)

-- For MAU growth accuonting use this
SELECT * FROM mau_growth_accounting
38 changes: 38 additions & 0 deletions models/mesa/mart/rollups/mesa_shop_days.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
WITH charges AS (
SELECT *
FROM {{ ref('mesa_charges') }}
WHERE charged_on_pt < current_date()
),

billing_accounts AS (
SELECT
shop_id,
daily_plan_revenue
FROM {{ ref('stg_mesa_billing_accounts') }}
{# Is it important to include this? Won't charges never appear if they are still in Trial?
(
billing_plan_trial_ends IS NULL OR
billing_plan_trial_ends < current_date()
) #}

),

shops AS (
SELECT *
FROM {{ ref('shops') }}
WHERE install_status = 'active'
AND shopify_plan_name NOT IN ('frozen', 'cancelled', 'fraudulent')
)

SELECT
charges.charged_on_pt,
shop_id,
shop_subdomain,
daily_plan_revenue,
COALESCE(charges.billed_amount, 0) as daily_usage_revenue,
(daily_plan_revenue + daily_usage_revenue) as inc_amount
FROM charges
LEFT JOIN shops USING (shop_id)
LEFT JOIN billing_accounts USING (shop_id)
-- Don't create rows for zero amounts.
{# WHERE inc_amount > 0 -- This is handled in the Growth Accounting queries. #}
Loading

0 comments on commit b6ad873

Please sign in to comment.