forked from lavishsheth/code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
LAB Troubleshooting and Solving Data Join Pitfalls
119 lines (96 loc) · 3.23 KB
/
LAB Troubleshooting and Solving Data Join Pitfalls
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
export PROJECT_ID=YOUR_PROJECT_ID
bq mk ecommerce
bq query --use_legacy_sql=false \
'SELECT
productSKU,
COUNT(DISTINCT v2ProductName) AS product_count,
STRING_AGG(DISTINCT v2ProductName LIMIT 5) AS product_name
FROM `data-to-insights.ecommerce.all_sessions_raw`
WHERE v2ProductName IS NOT NULL
GROUP BY productSKU
HAVING product_count > 1
ORDER BY product_count DESC'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID 'WITH inventory_per_sku AS (
SELECT DISTINCT
website.v2ProductName,
website.productSKU,
inventory.stockLevel
FROM `data-to-insights.ecommerce.all_sessions_raw` AS website
JOIN `data-to-insights.ecommerce.products` AS inventory
ON website.productSKU = inventory.SKU
WHERE productSKU = "GGOEGPJC019099"
)
SELECT
productSKU,
SUM(stockLevel) AS total_inventory
FROM inventory_per_sku
GROUP BY productSKU'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID '#standardSQL
# the secret is in the JOIN type
# pull ID fields from both tables
SELECT DISTINCT
website.productSKU AS website_SKU,
inventory.SKU AS inventory_SKU
FROM `data-to-insights.ecommerce.all_sessions_raw` AS website
LEFT JOIN `data-to-insights.ecommerce.products` AS inventory
ON website.productSKU = inventory.SKU
'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID '
#standardSQL
# find product SKUs in website table but not in product inventory table
SELECT DISTINCT
website.productSKU AS website_SKU,
inventory.SKU AS inventory_SKU
FROM `data-to-insights.ecommerce.all_sessions_raw` AS website
LEFT JOIN `data-to-insights.ecommerce.products` AS inventory
ON website.productSKU = inventory.SKU
WHERE inventory.SKU IS NULL
'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID '
#standardSQL
# reverse the join
# find records in website but not in inventory
SELECT DISTINCT
website.productSKU AS website_SKU,
inventory.SKU AS inventory_SKU
FROM `data-to-insights.ecommerce.all_sessions_raw` AS website
RIGHT JOIN `data-to-insights.ecommerce.products` AS inventory
ON website.productSKU = inventory.SKU
WHERE website.productSKU IS NULL
'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID '
#standardSQL
CREATE OR REPLACE TABLE ecommerce.site_wide_promotion AS
SELECT .05 AS discount;
'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID '#standardSQL
SELECT DISTINCT
productSKU,
v2ProductCategory,
discount
FROM `data-to-insights.ecommerce.all_sessions_raw` AS website
CROSS JOIN ecommerce.site_wide_promotion
WHERE v2ProductCategory LIKE "%Clearance%"
AND productSKU = "GGOEGOLC013299"
'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID 'SELECT discount FROM ecommerce.site_wide_promotion'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID '#standardSQL
SELECT DISTINCT
productSKU,
v2ProductCategory,
discount
FROM `data-to-insights.ecommerce.all_sessions_raw` AS website
CROSS JOIN ecommerce.site_wide_promotion
WHERE v2ProductCategory LIKE "%Clearance%"
AND productSKU = "GGOEGOLC013299"
'
bq query --use_legacy_sql=false --project_id=$PROJECT_ID '#standardSQL
SELECT DISTINCT
productSKU,
v2ProductCategory,
discount
FROM `data-to-insights.ecommerce.all_sessions_raw` AS website
CROSS JOIN ecommerce.site_wide_promotion
WHERE v2ProductCategory LIKE "%Clearance%"
AND productSKU = "GGOEGOLC013299"
'