-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathvisualize.py
151 lines (137 loc) · 6.86 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import os
import json
import argparse
import glob
import cv2
import numpy as np
from tqdm import trange
_colors = [(244, 67, 54), (255, 245, 157), (29, 233, 182), (118, 255, 3),
(33, 150, 243), (179, 157, 219), (233, 30, 99), (205, 220, 57),
(27, 94, 32), (255, 111, 0), (187, 222, 251), (24, 255, 255),
(63, 81, 181), (156, 39, 176), (183, 28, 28), (130, 119, 23),
(139, 195, 74), (0, 188, 212), (224, 64, 251), (96, 125, 139),
(0, 150, 136), (121, 85, 72), (26, 35, 126), (129, 212, 250),
(158, 158, 158), (225, 190, 231), (183, 28, 28), (230, 81, 0),
(245, 127, 23), (27, 94, 32), (0, 96, 100), (13, 71, 161),
(74, 20, 140), (198, 40, 40), (239, 108, 0), (249, 168, 37),
(46, 125, 50), (0, 131, 143), (21, 101, 192), (106, 27, 154),
(211, 47, 47), (245, 124, 0), (251, 192, 45), (56, 142, 60),
(0, 151, 167), (25, 118, 210), (123, 31, 162), (229, 57, 53),
(251, 140, 0), (253, 216, 53), (67, 160, 71), (0, 172, 193),
(30, 136, 229), (142, 36, 170), (244, 67, 54), (255, 152, 0),
(255, 235, 59), (76, 175, 80), (0, 188, 212), (33, 150, 243)]
def read_video(path):
cap = cv2.VideoCapture(path, cv2.CAP_FFMPEG)
if not cap.isOpened():
raise Exception('Cannot open {}'.format(path))
video = []
while cap.isOpened():
ret, frame = cap.read()
if ret:
video.append(frame)
else:
break
cap.release()
return video
def write_video(video, fps, size, path):
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(path, cv2.CAP_FFMPEG, fourcc, fps, size)
for frame in video:
out.write(frame)
out.release()
def visualize(anno, video_path, out_path):
video = read_video(video_path)
assert anno['frame_count']==len(video), '{} : anno {} video {}'.format(anno['video_id'], anno['frame_count'], len(video))
assert anno['width']==video[0].shape[1] and anno['height']==video[0].shape[0],\
'{} : anno ({}, {}) video {}'.format(anno['video_id'], anno['height'], anno['width'], video[0].shape)
# resize video to be 720p
ratio = 720.0/anno['height']
boundary = 20
size = int(round(anno['width']*ratio))+2*boundary, int(round(anno['height']*ratio))+2*boundary
for i in range(anno['frame_count']):
background = np.zeros((size[1], size[0], 3), dtype=np.uint8)
background[boundary:size[1]-boundary, boundary:size[0]-boundary] = cv2.resize(video[i], (size[0]-2*boundary, size[1]-2*boundary))
video[i] = background
# collect subject/objects
subobj = dict()
for x in anno['subject/objects']:
subobj[x['tid']] = {
'id': x['tid']+1,
'name': x['category'],
'color': _colors[x['tid']%len(_colors)]
}
# collect related relations in each frame
for i, f in enumerate(anno['trajectories']):
for x in f:
x['rels'] = []
x['timestamp'] = -1
for r in anno['relation_instances']:
if r['subject_tid']==x['tid'] and r['begin_fid']<=i<=r['end_fid']:
x['rels'].append({
'timestamp': r['begin_fid'],
'predicate': r['predicate'],
'object_tid': r['object_tid']
})
if r['begin_fid']>x['timestamp']:
x['timestamp'] = r['begin_fid']
# draw frames
max_timestamp = 1
for i in range(len(anno['trajectories'])):
f = anno['trajectories'][i]
for x in sorted(f, key=lambda a: a['timestamp']):
xmin = int(round(x['bbox']['xmin']*ratio))+boundary
xmax = int(round(x['bbox']['xmax']*ratio))+boundary
ymin = int(round(x['bbox']['ymin']*ratio))+boundary
ymax = int(round(x['bbox']['ymax']*ratio))+boundary
bbox_thickness = 1
sub_name = '{}.{}'.format(x['tid']+1, subobj[x['tid']]['name'])
sub_color = subobj[x['tid']]['color']
font = cv2.FONT_HERSHEY_SIMPLEX
font_scalar = 0.5
font_thickness = 1
font_size, font_baseline = cv2.getTextSize(sub_name, font, font_scalar, font_thickness)
h_font_scalar = 0.8
h_font_thickness = 2
h_font_size, h_font_baseline = cv2.getTextSize(sub_name, font, h_font_scalar, h_font_thickness)
# draw subject
cv2.rectangle(video[i], (xmin, ymin), (xmax, ymax), sub_color[::-1], bbox_thickness)
cv2.rectangle(video[i], (xmin, ymin-font_size[1]-font_baseline), (xmin+font_size[0], ymin), sub_color[::-1], -1)
cv2.putText(video[i], sub_name, (xmin, ymin-font_baseline), font, font_scalar, (0, 0, 0), font_thickness, cv2.LINE_AA)
# draw relations
if len(x['rels'])>0:
rels = sorted(x['rels'], key=lambda a: a['timestamp'], reverse=True)
if rels[0]['timestamp']>max_timestamp:
max_timestamp = rels[0]['timestamp']
y = ymin+h_font_size[1]
for r in rels:
obj_color = subobj[r['object_tid']]['color']
rel_name = '{}_{}.{}'.format(r['predicate'], r['object_tid']+1, subobj[r['object_tid']]['name'])
if r['timestamp']==max_timestamp:
cv2.putText(video[i], rel_name, (xmin, y+font_baseline), font, h_font_scalar, obj_color[::-1], h_font_thickness, cv2.LINE_AA)
y += h_font_size[1]+h_font_baseline
else:
cv2.putText(video[i], rel_name, (xmin, y+font_baseline), font, font_scalar, obj_color[::-1], font_thickness, cv2.LINE_AA)
y += font_size[1]+font_baseline
write_video(video, anno['fps'], size, out_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Visualize annotation in video')
parser.add_argument('video', type=str, help='Root path of videos')
parser.add_argument('anno', type=str, help='A annotation json file or a directory of annotation jsons')
parser.add_argument('out', type=str, help='Root path of output videos')
args = parser.parse_args()
if os.path.isdir(args.anno):
anno_paths = glob.glob('{}/*.json'.format(args.anno))
args.out = os.path.join(args.out, os.path.basename(os.path.normpath(args.anno)))
if not os.path.exists(args.out):
os.mkdir(args.out)
else:
anno_paths = [args.anno]
for i in trange(len(anno_paths)):
with open(anno_paths[i], 'r') as fin:
anno = json.load(fin)
if 'video_path' in anno:
video_path = os.path.join(args.video, anno['video_path'])
else:
video_path = os.path.join(args.video, '{}.mp4'.format(anno['video_id']))
out_path = os.path.join(args.out, '{}.mp4'.format(anno['video_id']))
visualize(anno, video_path, out_path)