-
Notifications
You must be signed in to change notification settings - Fork 0
/
addbird.py
119 lines (98 loc) · 3.83 KB
/
addbird.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import sys
from urllib import request, error
import shutil
import json
import os
import ssl
import csv
def load_labels():
BIRDS = {}
fields = ['num','name']
with open('BIRDS.csv') as csvfile:
f = csv.DictReader(csvfile, fields)
for row in f:
BIRDS[row['num']] = row['name']
return BIRDS
def main():
BIRDS = load_labels()
bird = ""
if len(sys.argv) > 1:
for i in range(1,len(sys.argv)):
bird = bird + sys.argv[i]
BIRDS[len(BIRDS)] = bird
with open('BIRDS.csv', 'a', newline='') as csvfile:
f = csv.writer(csvfile)
f.writerow([str(len(BIRDS)-1), bird])
else:
print("Please include an argument, ex: python3 addbird Baltimore Oriole")
download(sys.argv[1:])
# adapted from https://github.com/ntivirikin/xeno-canto-py.git
# Disable certificate verification
ssl._create_default_https_context = ssl._create_unverified_context
def metadata(filt):
page = 1
page_num = 1
filt_path = list()
filt_url = list()
print("Retrieving metadata...")
# Scrubbing input for file name and url
for f in filt:
filt_url.append(f.replace(' ', '%20'))
filt_path.append((f.replace(' ', '')).replace(':', '_').replace("\"",""))
path = 'dataset/metadata/' + ''.join(filt_path)
# Overwrite metadata query folder
if os.path.exists(path):
shutil.rmtree(path)
os.makedirs(path)
# Save all pages of the JSON response
while page < page_num + 1:
url = 'https://www.xeno-canto.org/api/2/recordings?query={0}&page={1}'.format('%20'.join(filt_url), page)
try:
r = request.urlopen(url)
except error.HTTPError as e:
print('An error has occurred: ' + str(e))
exit()
print("Downloading metadate page " + str(page) + "...")
data = json.loads(r.read().decode('UTF-8'))
filename = path + '/page' + str(page) + '.json'
with open(filename, 'w') as saved:
json.dump(data, saved)
page_num = data['numPages']
page += 1
# Return the path to the folder containing downloaded metadata
return path
# Retrieves metadata and audio recordings
# adapted from https://github.com/ntivirikin/xeno-canto-py.git
def download(filt):
page = 1
page_num = 1
print("Downloading all recordings for query...")
# Retrieve metadata to parse for download links
path = metadata(filt)
with open(path + '/page' + str(page) + ".json", 'r') as jsonfile:
data = jsonfile.read()
data = json.loads(data)
page_num = data['numPages']
print("Found " + str(data['numRecordings']) + " recordings for given query, downloading...")
while page < page_num + 1:
# Pulling species name, track ID, and download link for naming and retrieval
# while i < range(len)
for i in range(len((data['recordings']))):
url = 'http:' + data['recordings'][i]['file']
name = (data['recordings'][i]['en']).replace(' ', '')
track_id = data['recordings'][i]['id']
# altered from original code - data saved in numbered directory in the
# 'data' directory. the number is generated from the existing bird species
# loaded and mapped into the BIRDS.csv (and loaded into the global variable BIRDS).
audio_path = 'data/' + str(len(BIRDS)-1) + '/'
audio_file = track_id + '.mp3'
if not os.path.exists(audio_path):
os.makedirs(audio_path)
# If the file exists in the directory, we will skip it
elif os.path.exists(audio_path + audio_file):
continue
print("Downloading " + track_id + ".mp3...")
request.urlretrieve(url, audio_path + audio_file)
page += 1
if __name__ == "__main__":
main()