-
Notifications
You must be signed in to change notification settings - Fork 0
/
Music_data_analysis.sql
330 lines (282 loc) · 11.8 KB
/
Music_data_analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
create database music_database;
use music_database;
-- Analyzing Each Table
EXEC sp_columns album; -- Primary Key album_id
EXEC sp_columns artist; -- Primary Key artist_id
EXEC sp_columns customer; -- Primary Key customer_id
EXEC sp_columns employee; -- Primary Key employye_id
EXEC sp_columns genre; -- Primary Key genre_id
EXEC sp_columns invoice; -- Primary Key invoice_id
EXEC sp_columns invoice_line; -- Primary Key invoice_line_id
EXEC sp_columns media_type; -- Primary Key media_type_id
EXEC sp_columns playlist; -- Primary Key playlist_id
EXEC sp_columns playlist_track; -- Primary Key playlist_id & track_id
EXEC sp_columns track; -- Primary Key track_id
-- Q1: Who is the senior most employee based on job title?
SELECT Top 1 title, concat(first_name, last_name) as fullname
FROM employee
ORDER BY levels DESC;
-- Q2: Which countries have the most Invoices?
SELECT COUNT(*) AS country, billing_country
FROM invoice
GROUP BY billing_country
ORDER BY country DESC;
-- Q3: What are top 3 values of total invoice?
SELECT top 3 round(total, 2)
FROM invoice
ORDER BY total DESC;
-- Q4: Which 2 city has the best customers? We would like to throw a promotional Music Festival in the city we made the most money.Write a query that returns one city that has the highest sum of invoice totals.Return both the city name & sum of all invoice totals
SELECT top 2 billing_city,SUM(total) AS InvoiceTotal
FROM invoice
GROUP BY billing_city
ORDER BY InvoiceTotal DESC;
-- Q5: Who is the best customer? The customer who has spent the most money will be declared the best customer. Write a query that returns the person who has spent the most money.
SELECT TOP 1 WITH TIES
customer.customer_id,
first_name,
last_name,
SUM(total) AS total_spending
FROM customer
JOIN invoice ON customer.customer_id = invoice.customer_id
GROUP BY customer.customer_id, first_name, last_name
ORDER BY total_spending DESC;
-- Q6: Write query to return the email, first name, last name, & Genre of all Rock Music listeners. Return your list ordered alphabetically by email starting with A.
SELECT DISTINCT c.email, c.first_name, c.last_name
FROM customer AS c
JOIN invoice AS i ON c.customer_id = i.customer_id
JOIN invoice_line AS il ON i.invoice_id = il.invoice_id
WHERE il.track_id IN (
SELECT t.track_id
FROM track AS t
JOIN genre AS g ON t.genre_id = g.genre_id
WHERE g.name LIKE 'Rock'
)
ORDER BY c.email;
-- Q7: Let's invite the artists who have written the most rock music in our dataset. Write a query that returns the Artist name and total track count of the top 10 rock bands. */
SELECT TOP 10 artist.artist_id, artist.name, COUNT(artist.artist_id) AS number_of_songs
FROM track
JOIN album ON album.album_id = track.album_id
JOIN artist ON artist.artist_id = album.artist_id
JOIN genre ON genre.genre_id = track.genre_id
WHERE genre.name LIKE 'Rock'
GROUP BY artist.artist_id, artist.name
ORDER BY number_of_songs DESC;
-- Q8: Return all the track names that have a song length longer than the average song length. Return the Name and Milliseconds for each track. Order by the song length with the longest songs listed first. */
SELECT name, milliseconds
FROM track
WHERE milliseconds > (
SELECT AVG(milliseconds) AS avg_track_length
FROM track
)
ORDER BY milliseconds DESC;
-- Q9: Retrieve the names of artists who have albums containing more than 10 tracks, and for each artist, list the album names and the total number of tracks in each album. Sort the result by the total number of tracks in descending order
SELECT artist.name AS artist_name, album.title AS album_title, COUNT(track.track_id) AS total_tracks
FROM artist
JOIN album ON artist.artist_id = album.artist_id
JOIN track ON album.album_id = track.album_id
GROUP BY artist.artist_id, artist.name, album.album_id, album.title
HAVING COUNT(track.track_id) > 10
ORDER BY total_tracks DESC;
-- Q10: Retrieve the names of all artists and the total number of tracks they have in the database, ordered by the number of tracks in descending order.
SELECT
artist.name AS artist_name,
COUNT(track.track_id) AS total_tracks
FROM artist
LEFT JOIN album ON artist.artist_id = album.artist_id
LEFT JOIN track ON album.album_id = track.album_id
GROUP BY artist.name
ORDER BY total_tracks DESC;
-- Q11: Find how much amount spent by each customer on artists? Write a query to return customer name, artist name and total spent.
-- Steps to Solve: First, find which artist has earned the most according to the InvoiceLines. Now use this artist to find which customer spent the most on this artist. For this query, you will need to use the Invoice, InvoiceLine, Track, Customer, Album, and Artist tables. Note, this one is tricky because the Total spent in the Invoice table might not be on a single product, so you need to use the InvoiceLine table to find out how many of each product was purchased, and then multiply this by the pricefor each artist.
WITH best_selling_artist AS (
SELECT TOP 1
artist.artist_id AS artist_id,
artist.name AS artist_name,
SUM(invoice_line.unit_price * invoice_line.quantity) AS total_sales
FROM invoice_line
JOIN track ON track.track_id = invoice_line.track_id
JOIN album ON album.album_id = track.album_id
JOIN artist ON artist.artist_id = album.artist_id
GROUP BY artist.artist_id, artist.name
ORDER BY total_sales DESC
)
SELECT
c.customer_id,
c.first_name,
c.last_name,
bsa.artist_name,
SUM(il.unit_price * il.quantity) AS amount_spent
FROM invoice i
JOIN customer c ON c.customer_id = i.customer_id
JOIN invoice_line il ON il.invoice_id = i.invoice_id
JOIN track t ON t.track_id = il.track_id
JOIN album alb ON alb.album_id = t.album_id
JOIN best_selling_artist bsa ON bsa.artist_id = alb.artist_id
GROUP BY c.customer_id, c.first_name, c.last_name, bsa.artist_name
ORDER BY amount_spent DESC;
-- Q12: We want to find out the most popular music Genre for each country. We determine the most popular genre as the genre with the highest amount of purchases. Write a query that returns each country along with the top Genre. For countries where the maximum number of purchases is shared return all Genres.
-- Steps to Solve: There are two parts in question- first most popular music genre and second need data at country level.
-- Method 1: Using CTE
WITH popular_genre AS
(
SELECT
COUNT(invoice_line.quantity) AS purchases,
customer.country,
genre.name AS popular_genre
FROM invoice_line
JOIN invoice ON invoice.invoice_id = invoice_line.invoice_id
JOIN customer ON customer.customer_id = invoice.customer_id
JOIN track ON track.track_id = invoice_line.track_id
JOIN genre ON genre.genre_id = track.genre_id
GROUP BY customer.country, genre.name
)
SELECT
country,
popular_genre
FROM (
SELECT
country,
popular_genre,
ROW_NUMBER() OVER (PARTITION BY country ORDER BY purchases DESC) AS RowNo
FROM popular_genre
) ranked_genres
WHERE RowNo = 1;
-- Method 2: Using Recursive
WITH sales_per_country AS (
SELECT
COUNT(*) AS purchases_per_genre,
customer.country,
genre.name AS popular_genre
FROM invoice_line
JOIN invoice ON invoice.invoice_id = invoice_line.invoice_id
JOIN customer ON customer.customer_id = invoice.customer_id
JOIN track ON track.track_id = invoice_line.track_id
JOIN genre ON genre.genre_id = track.genre_id
GROUP BY customer.country, genre.name
),
max_genre_per_country AS (
SELECT
MAX(purchases_per_genre) AS max_genre_number,
country
FROM sales_per_country
GROUP BY country
)
SELECT
sales_per_country.country,
sales_per_country.popular_genre
FROM sales_per_country
JOIN max_genre_per_country ON sales_per_country.country = max_genre_per_country.country
WHERE sales_per_country.purchases_per_genre = max_genre_per_country.max_genre_number;
-- Q13: Write a query that determines the customer that has spent the most on music for each country. Write a query that returns the country along with the top customer and how much they spent.For countries where the top amount spent is shared, provide all customers who spent this amount.
-- Steps to Solve: Similar to the above question. There are two parts in question- first find the most spent on music for each country and second filter the data for respective customers.
-- Method 1: using CTE
WITH CustomersWithCountry AS (
SELECT
customer.customer_id,
first_name,
last_name,
billing_country,
SUM(total) AS total_spending,
ROW_NUMBER() OVER (PARTITION BY billing_country ORDER BY SUM(total) DESC) AS RowNo
FROM invoice
JOIN customer ON customer.customer_id = invoice.customer_id
GROUP BY customer.customer_id, first_name, last_name, billing_country
)
SELECT
billing_country,
customer_id,
first_name,
last_name,
total_spending
FROM CustomersWithCountry
WHERE RowNo = 1
UNION ALL
SELECT
billing_country,
customer_id,
first_name,
last_name,
total_spending
FROM CustomersWithCountry
WHERE RowNo = 1
ORDER BY billing_country, total_spending DESC;
-- Method 2: Using Recursive
WITH CustomterWithCountry AS (
SELECT
customer.customer_id,
first_name,
last_name,
billing_country,
SUM(total) AS total_spending
FROM invoice
JOIN customer ON customer.customer_id = invoice.customer_id
GROUP BY customer.customer_id, first_name, last_name, billing_country
),
CountryMaxSpending AS (
SELECT
billing_country,
MAX(total_spending) AS max_spending
FROM CustomterWithCountry
GROUP BY billing_country
)
SELECT
cc.billing_country,
cc.total_spending,
cc.first_name,
cc.last_name,
cc.customer_id
FROM CustomterWithCountry cc
JOIN CountryMaxSpending ms
ON cc.billing_country = ms.billing_country
WHERE cc.total_spending = ms.max_spending
ORDER BY cc.billing_country;
-- Q14: View that represents the most popular music genre for each country
CREATE VIEW MostPopularGenreByCountry AS
WITH PopularGenreByCountry AS (
SELECT
customer.country,
genre.name AS popular_genre,
COUNT(invoice_line.quantity) AS purchases
FROM invoice_line
JOIN invoice ON invoice.invoice_id = invoice_line.invoice_id
JOIN customer ON customer.customer_id = invoice.customer_id
JOIN track ON track.track_id = invoice_line.track_id
JOIN genre ON genre.genre_id = track.genre_id
GROUP BY customer.country, genre.name
)
SELECT
country,
popular_genre
FROM (
SELECT
country,
popular_genre,
ROW_NUMBER() OVER (PARTITION BY country ORDER BY purchases DESC) AS RowNo
FROM PopularGenreByCountry
) ranked_genres
WHERE RowNo = 1;
SELECT *
FROM MostPopularGenreByCountry;
-- Q15: Total number of purchases for each genre in each country
CREATE VIEW MostPopularGenreByCountryWithDetails AS
WITH PopularGenreByCountry AS (
SELECT
customer.country,
genre.name AS popular_genre,
COUNT(invoice_line.quantity) AS purchases
FROM invoice_line
JOIN invoice ON invoice.invoice_id = invoice_line.invoice_id
JOIN customer ON customer.customer_id = invoice.customer_id
JOIN track ON track.track_id = invoice_line.track_id
JOIN genre ON genre.genre_id = track.genre_id
GROUP BY customer.country, genre.name
)
SELECT
country,
popular_genre,
MAX(purchases) AS max_purchases,
SUM(purchases) AS total_purchases
FROM PopularGenreByCountry
GROUP BY country, popular_genre;
SELECT *
FROM MostPopularGenreByCountryWithDetails;