-
Notifications
You must be signed in to change notification settings - Fork 4
/
VocFormat.py
116 lines (100 loc) · 3.73 KB
/
VocFormat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import argparse
import json
import urllib.error
import urllib.request
from PIL import Image
from xml.etree.ElementTree import Element, SubElement, Comment, tostring
import xml.etree.ElementTree as ET
parser = argparse.ArgumentParser(description = 'Collect FCDBv2 from YFCC100M')
parser.add_argument('--yfcc', default='./yfcc100m_dataset', type=str, help='path for yfcc100m metadata')
parser.add_argument('--id_json', default='./image_id_list.json', type=str, help='path for image id list')
parser.add_argument('--save_dir', default='./VOC_format', type=str, help='path for save dir')
args = parser.parse_args()
# make save dirs
os.mkdir(args.save_dir)
os.mkdir(os.path.join(args.save_dir, 'Annotations'))
os.mkdir(os.path.join(args.save_dir, 'JPEGImages'))
os.mkdir(os.path.join(args.save_dir, 'ImageSets'))
os.mkdir(os.path.join(args.save_dir, 'ImageSets', 'Main'))
# Load metadata and ImageID list
print('Loading Data...')
f1 = open(args.yfcc)
lines = f1.readlines()
f2 = open(args.id_json, 'r')
ids = json.load(f2)
err = 0
all = len(ids.items())
# Start Main loop
print('Start!!')
for i, (k, v) in enumerate(ids.items()):
line = lines[int(k)]
line_split = line.strip().split('\t')
photo_id = line_split[1]
photo_url = line_split[16]
# Download Images
save_img_path = os.path.join(args.save_dir, 'JPEGImages', photo_id + '.jpg')
if os.path.exists(save_img_path) == True: continue
try:
with urllib.request.urlopen(photo_url) as web_file:
data = web_file.read()
with open(save_img_path, mode='wb') as local_file:
local_file.write(data)
except:
err += 1
continue
try:
img = Image.open(save_img_path)
size = img.size
except:
os.remove(save_img_path)
err += 1
continue
# Make VOC format
annotation_el = Element('annotation')
folder_el = SubElement(annotation_el, 'folder')
folder_el.text = 'FCDBv2'
filename_el = SubElement(annotation_el, 'filename')
filename_el.text = photo_id
source_el = SubElement(annotation_el, 'source')
database_el = SubElement(source_el, 'database')
database_el.text = 'Fashion Culture DataBase V2'
size_el = SubElement(annotation_el, 'size')
width_el = SubElement(size_el, 'width')
width_el.text = str(size[0])
height_el = SubElement(size_el, 'height')
height_el.text = str(size[1])
for box in v:
object_el = SubElement(annotation_el, 'object')
name_el = SubElement(object_el, 'name')
name_el.text = 'person'
diff_el = SubElement(object_el, 'difficult')
diff_el.text = '0'
bndbox_el = SubElement(object_el, 'bndbox')
xmin_el = SubElement(bndbox_el, 'xmin')
xmin_el.text = str(box[0])
ymin_el = SubElement(bndbox_el, 'ymin')
ymin_el.text = str(box[1])
xmax_el = SubElement(bndbox_el, 'xmax')
xmax_el.text = str(box[2])
ymax_el = SubElement(bndbox_el, 'ymax')
ymax_el.text = str(box[3])
save_anno_path = os.path.join(args.save_dir, 'Annotations', photo_id + '.xml')
tree = ET.ElementTree(element=annotation_el)
tree.write(save_anno_path, xml_declaration = False)
if (i + 1) % 2500 == 0:
print('Progress:', i + 1, '/', all)
print('Saved :', i + 1 - err)
print('Error :', err)
anno_files = os.listdir(os.path.join(args.save_dir, 'Annotations'))
for anno_file in anno_files:
name, ext = os.path.splitext(anno_file)
text_p = name + " 1"
text_t = name
t = open(os.path.join(args.save_dir, 'ImageSets/Main/person_trainval.txt'), "a")
t.write(text_p + "\n")
t.close()
t = open(os.path.join(args.save_dir, 'ImageSets/Main/trainval.txt'), "a")
t.write(text_t + "\n")
t.close()
print('Finish!!')