-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathannual_customer_activity_growth_analysis.sql
185 lines (177 loc) · 5.3 KB
/
annual_customer_activity_growth_analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
-- Displaying the avg monthly active users for each year --
select
((count (distinct cd.customer_unique_id))::double precision/12) as avg_monthly_user,
date_part('year', od.order_purchase_timestamp) as years
from customers_dataset as cd
right join orders_dataset as od on cd.customer_id = od.customer_id
group by years;
----------------------------------------------
-- Displaying the number of new customers each year --
-- cte to order customer_unique_id and its duplicates (if exist) by year
with subq as
(
select
cd.customer_unique_id as customer_unique_id,
date_part('year', od.order_purchase_timestamp) as years
from customers_dataset as cd
right join orders_dataset as od on cd.customer_id = od.customer_id
order by customer_unique_id, years asc
)
-- counting the number of customer_unique_id (first time "orders") in each year
select
count(subq2.customer_unique_id) as number_of_new_customers,
subq2.years
from
(
-- removing duplicates of customer_unique_id by only selecting the first
-- occurrence of each customer_unique_id so that only first time "orders" are present.
select
distinct on (subq.customer_unique_id)
subq.customer_unique_id,
subq.years
from subq
) as subq2
group by years
order by years asc;
----------------------------------------------
-- Displaying the number of customers that has repeat orders in each year --
-- cte to get the order frequency of each customer in each year
with subq as
(
select
cd.customer_unique_id as customer_unique_id,
date_part('year', od.order_purchase_timestamp) as years,
count(cd.customer_unique_id) as frequency
from customers_dataset as cd
right join orders_dataset as od on cd.customer_id = od.customer_id
group by years, customer_unique_id
)
-- counting the amount of repeat orders (order frequency > 1) in each year
select
count(subq.frequency) as number_of_customers_repeat_orders,
subq.years as years
from subq
where subq.frequency > 1
group by years;
----------------------------------------------
-- Displaying the avg no. orders by each customer in each year --
-- cte to get the order frequency of each customer in each year
with subq as
(
select
cd.customer_unique_id as customer_unique_id,
date_part('year', od.order_purchase_timestamp) as years,
count(cd.customer_unique_id) as frequency
from customers_dataset as cd
right join orders_dataset as od on cd.customer_id = od.customer_id
group by years, customer_unique_id
)
-- calculating the average of the order frequency in each year
select
avg(subq.frequency) as avg_order_by_customers,
subq.years as years
from subq
group by years;
----------------------------------------------
-- Creating master table to display all of the information above --
create table if not exists annual_customer_activity_growth(
avg_monthly_user double precision,
number_of_new_customers bigint,
number_of_customers_repeat_orders bigint,
avg_order_by_customers numeric,
years int
);
-- Inserting information into the new table
insert into annual_customer_activity_growth (
years,
avg_monthly_user,
number_of_new_customers,
number_of_customers_repeat_orders,
avg_order_by_customers
)
select
q1.years,
q1.avg_monthly_user as avg_monthly_user,
q2.number_of_new_customers as number_of_new_customers,
q3.number_of_customers_repeat_orders as number_of_customers_repeat_orders,
q4.avg_order_by_customers as avg_order_by_customers
from (
select
((count (distinct cd.customer_unique_id))::double precision/12) as avg_monthly_user,
date_part('year', od.order_purchase_timestamp) as years
from customers_dataset as cd
right join orders_dataset as od on cd.customer_id = od.customer_id
group by years
) as q1
join
(
with subq as
(
select
cd.customer_unique_id as customer_unique_id,
date_part('year', od.order_purchase_timestamp) as years
from customers_dataset as cd
right join orders_dataset as od on cd.customer_id = od.customer_id
order by customer_unique_id, years asc
)
select
count(subq2.customer_unique_id) as number_of_new_customers,
subq2.years
from
(
select
distinct on (subq.customer_unique_id)
subq.customer_unique_id,
subq.years
from subq
) as subq2
group by years
order by years asc
)as q2
on
q1.years = q2.years
join
(
with subq as
(
select
cd.customer_unique_id as customer_unique_id,
date_part('year', od.order_purchase_timestamp) as years,
count(cd.customer_unique_id) as frequency
from customers_dataset as cd
right join orders_dataset as od on cd.customer_id = od.customer_id
group by years, customer_unique_id
)
select
count(subq.frequency) as number_of_customers_repeat_orders,
subq.years as years
from subq
where subq.frequency > 1
group by years
)as q3
on
q1.years = q3.years
join
(
with subq as
(
select
cd.customer_unique_id as customer_unique_id,
date_part('year', od.order_purchase_timestamp) as years,
count(cd.customer_unique_id) as frequency
from customers_dataset as cd
right join orders_dataset as od on cd.customer_id = od.customer_id
group by years, customer_unique_id
)
select
avg(subq.frequency) as avg_order_by_customers,
subq.years as years
from subq
group by years
)as q4
on
q1.years = q4.years;
-- Exporting master table
copy annual_customer_activity_growth
to 'C:\Farrell\Rakamin\Mini Projects\eCommerce Business Performance with SQL\annual_customer_activity_growth.csv'
with csv header;