-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathUSER_ANALYSIS_ASSIGNMENT.sql
200 lines (175 loc) · 7.29 KB
/
USER_ANALYSIS_ASSIGNMENT.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
use mavenfuzzyfactory;
-- Analyze repeat behaviour
select
order_items.order_id,
order_items.order_item_id,
order_items.price_usd,
order_items.created_at,
order_item_refunds.order_item_refund_id,
order_item_refunds.refund_amount_usd,
order_item_refunds.created_at,
datediff(order_item_refunds.created_at, order_items.created_at) as days_order_to_refund
from order_items
left join order_item_refunds
on order_item_refunds.order_item_id=order_items.order_item_id
where order_items.order_id in(3489,32049,27061);
-- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Identify Repeat Visitors Assignment
-- step 1: Identify the relevant new sessions
-- step 2: User the user_id values from Step 1 to find any repeat sessions those users had
-- step 3: Analyze the data at the user level (how many sessions did each user have?)
-- step 4: Aggregate the user-level analysis to generate your behavioural analysis
select
user_id,
website_session_id
from website_sessions
where created_at<'2014-11-01' -- the date of the assignment
and created_at >='2014-01-01' -- prescribed date range in assignment
and is_repeat_session=0;
create temporary table sessions_w_repeats
select
new_sessions.user_id,
new_sessions.website_session_id as new_session_id,
website_sessions.website_session_id as repeat_session_id
from
(
select
user_id,
website_session_id
from website_sessions
where created_at<'2014-11-01' -- the date of the assignment
and created_at >='2014-01-01' -- prescribed date range in assignment
and is_repeat_session=0 -- new sessions only
)as new_sessions
left join website_sessions
on website_sessions.user_id = new_sessions.user_id
and website_sessions.is_repeat_session=1 -- was a repeat session (redundant but good to illustrate)
and website_sessions.website_session_id > new_sessions.website_session_id -- session was later than new session
and website_sessions.created_at < '2014-11-01' -- the date of the assignment
and website_sessions.created_at >= '2014-01-01' -- prescribed date range in assignment
;
select
user_id,
count(distinct new_session_id) as new_sessions,
count(distinct repeat_session_id) as repeat_sessions
from sessions_w_repeats
group by 1
order by 3 desc;
select
repeat_sessions,
count(distinct user_id) as users
from
(
select
user_id,
count(distinct new_session_id) as new_sessions,
count(distinct repeat_session_id) as repeat_sessions
from sessions_w_repeats
group by 1
order by 3 desc
) as user_level
group by 1;
-- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Analyzing Time To repeat
-- STEP1 : Identify the relevant new sessions
-- STEP2 : User the user_id values from Step 1 to find any repeat sessions those users had
-- STEP3 : Find the created_at times for first and second sessions
-- STEP4 : Find the differnce between first and second sessions at a user level
-- STEP5 : Aggregate the user level data to find the average, min, max
create temporary table sessions_w_repeats_for_time_diff
select
new_sessions.user_id,
new_sessions.website_session_id as new_session_id,
new_sessions.created_at as new_session_created_at,
website_sessions.website_session_id as repeat_session_id,
website_sessions.created_at as repeat_session_created_at
from
(
select
user_id,
website_session_id,
created_at
from website_sessions
where created_at<'2014-11-03' -- the date of the assignment
and created_at >='2014-01-01' -- prescribed date range in assignment
and is_repeat_session=0 -- new sessions only
) as new_sessions
left join website_sessions
on website_sessions.user_id = new_sessions.user_id
and website_sessions.is_repeat_session=1 -- was a repeat session (redundant but good to illustrate)
and website_sessions.website_session_id > new_sessions.website_session_id -- session was later than new session
and website_sessions.created_at < '2014-11-03' -- the date of the assignment
and website_sessions.created_at >= '2014-01-01'
;
select * from sessions_w_repeats_for_time_diff;
select
user_id,
new_session_id,
new_session_created_at,
min(repeat_session_id) as second_session_id,
min(repeat_session_created_at) as second_session_created_at
from sessions_w_repeats_for_time_diff
where repeat_session_id is not null
group by 1,2,3;
create temporary table users_first_to_second
select
user_id,
datediff(second_session_created_at,new_session_created_at) as days_first_to_second_session
from
(
select
user_id,
new_session_id,
new_session_created_at,
min(repeat_session_id) as second_session_id,
min(repeat_session_created_at) as second_session_created_at
from sessions_w_repeats_for_time_diff
where repeat_session_id is not null
group by 1,2,3
) as first_second;
select
avg(days_first_to_second_session) as avg_days_first_to_second,
min(days_first_to_second_session) as min_days_first_to_second,
max(days_first_to_second_session) as max_days_first_to_second
from users_first_to_second;
-- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Analyzing Repeat Channel Behaviour
select
utm_source,
utm_campaign,
http_referer,
count(case when is_repeat_session=0 then website_session_id else null end) as new_sessions,
count(case when is_repeat_session=1 then website_session_id else null end) as repeat_sessions
from website_sessions
where created_at<'2014-11-05' -- the date of the assignment
and created_at>='2014-01-01' -- prescribed date range in assignment
group by 1,2,3
order by 5 desc;
select
case
when utm_source is null and http_referer in ('https://www.gsearch.com','https://www.bsearch.com') then 'Organic_search'
when utm_campaign='nonbrand' then 'paid_nonbrand'
when utm_campaign='brand' then 'paid_brand'
when utm_source is null and http_referer is null then 'direct_type_in'
when utm_source='socialbook' then 'paid_social'
end as channel_group,
count(case when is_repeat_session=0 then website_session_id else null end) as new_sessions,
count(case when is_repeat_session=1 then website_session_id else null end) as repeat_sessions
from website_sessions
where created_at<'2014-11-05' -- the date of the assignment
and created_at>='2014-01-01' -- prescribed date range in assignment
group by 1
order by 3 desc;
-- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Analyzing new and repeat conversion rates
select
is_repeat_session,
count(distinct website_sessions.website_session_id) as sessions,
count(distinct orders.order_id)/count(distinct website_sessions.website_session_id) as conv_rate,
sum(price_usd)/count(distinct website_sessions.website_session_id) as rev_per_session
from website_sessions
left join orders
on website_sessions.website_session_id=orders.website_session_id
where website_sessions.created_at < '2014-11-08'
and website_sessions.created_at >= '2014-01-01'
group by 1;