-
Notifications
You must be signed in to change notification settings - Fork 3
/
genre.py
81 lines (72 loc) · 2.09 KB
/
genre.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
raw = pd.read_csv('data/audio_features_hot_100_1958_2019.csv')
print(raw.columns)
songs = []
genres = []
supergenres = []
ids = []
for row in raw.iterrows():
genre_list = str(row[1].spotify_genre).replace('[','').replace(']','').strip().split(',')
rock = 0
pop = 0
rap = 0
hiphop = 0
metal = 0
country = 0
other = 0
jazz = 0
house = 0
rnb = 0
latin = 0
showtunes = 0
folk = 0
adult = 0
soul = 0
for genre in genre_list:
if 'pop' in genre:
pop += 1
elif 'rock' in genre:
rock += 1
elif 'hip hop' in genre:
hiphop += 1
elif 'metal' in genre:
metal += 1
elif 'jazz' in genre:
jazz += 1
elif 'country' in genre:
country += 1
elif 'rap' in genre:
rap += 1
elif 'house' in genre:
house += 1
elif 'r&b' in genre:
rnb += 1
elif 'latin' in genre:
latin += 1
elif 'show tunes' in genre:
showtunes += 1
elif 'folk' in genre:
folk += 1
elif 'adult standards' in genre:
adult += 1
elif 'soul' in genre:
soul += 1
else:
other += 1
sg_list = ['metal','rock','country','r&b','hiphop','rap','jazz','house','latin','show tunes', 'folk','soul','pop','adult standards']
genre_count = [metal, rock, country,rnb, hiphop, rap, jazz, house, latin, showtunes,folk,soul,pop,adult]
m = max(genre_count)
if m != 0:
genre_champ = [i for i, j in enumerate(genre_count) if j == m]
supergenre = sg_list[genre_champ[0]]
else:
supergenre = 'other'
for genre in genre_list:
ids.append(row[1].track_id)
genres.append(genre)
songs.append(row[1].track)
supergenres.append(supergenre)
genre_df = pd.DataFrame({ 'track_id':ids,'song':songs, 'genre':genres, 'supergenre':supergenres}).dropna(how='any')
# print(genre_df['genre'].nunique())
genre_df.to_csv("data/songGenre.csv")
print("Finished!")