-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsave_image_all.py
81 lines (69 loc) · 2.88 KB
/
save_image_all.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import json
import flickrapi
import pandas as pd
import numpy as np
from argparse import Namespace
from collections import Counter
import pickle
import os
import urllib
args = Namespace(
# Data and Path information
api_key = u'[api_key]',
api_secret = u'[api_secret]',
radius = 0.3,
save_dir = 'data_storage/',
tags = None,
len_grid = 20,
image_dir = 'data_storage/images/grid/'
#tags = 'landscape,urban,heritage,culture,building,architecture,park,street'
)
def get_photos(flickr, Photos, Ids):
processed = Photos.keys()
for id_now in Ids:
if id_now in processed:
continue
else:
Photos[id_now] = {}
sizes = json.loads(flickr.photos.getSizes(photo_id = id_now, format='json'))
try:
url_c = sizes['sizes']['size'][-2]['source']
url_q = sizes['sizes']['size'][1]['source']
can = sizes['sizes']['candownload']
Photos[id_now]['candownload'] = can
Photos[id_now]['url_c'] = url_c
Photos[id_now]['url_q'] = url_q
Photos[id_now]['others'] = sizes
if can:
urllib.request.urlretrieve(url_q, args.image_dir+'{}.jpg'.format(id_now))
if len(processed)%20 ==1:
print('{}/{} photos collected'.format(len(processed),len(Ids)))
with open(args.image_dir+'Photo_sizes_pre_sep.p', 'wb') as fp:
pickle.dump(Photos,fp, protocol=pickle.HIGHEST_PROTOCOL)
with open(args.image_dir+'Photo_sizes_sep.p', 'wb') as fp:
pickle.dump(Photos,fp, protocol=pickle.HIGHEST_PROTOCOL)
photo_df = pd.DataFrame(Photos).T.drop('others',axis=1)
photo_df.to_csv(args.image_dir+'photos_sizes_sep.csv', sep='\t',encoding='utf-8-sig')
except:
print(id_now)
continue
with open(args.image_dir+'Photo_sizes_pre_sep.p', 'wb') as fp:
pickle.dump(Photos,fp, protocol=pickle.HIGHEST_PROTOCOL)
with open(args.image_dir+'Photo_sizes_sep.p', 'wb') as fp:
pickle.dump(Photos,fp, protocol=pickle.HIGHEST_PROTOCOL)
photo_df = pd.DataFrame(Photos).T.drop('others',axis=1)
photo_df.to_csv(args.image_dir+'photos_sizes_sep.csv', sep='\t',encoding='utf-8-sig')
return Photos
def main():
flickr = flickrapi.FlickrAPI(args.api_key, args.api_secret)
df=pd.read_csv(args.save_dir+'grid/photos_sep.csv',sep='\t').rename(columns={'Unnamed: 0':'num'})
Ids = df['ids']
if 'Photo_sizes_sep.p' in [files for root, dirs, files in os.walk(args.image_dir)][0]:
with open(args.image_dir+'Photo_sizes_sep.p', 'rb') as fp:
Photos = pickle.load(fp)
else:
Photos = {}
Photos = get_photos(flickr, Photos, Ids)
if __name__ == "__main__":
main()
"""## END"""