-
Notifications
You must be signed in to change notification settings - Fork 1
/
model_helper.py
158 lines (108 loc) · 5.99 KB
/
model_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def _build_dboxes():
layers_anchors=[]
for i, feat_layer in enumerate(self.params.feat_shapes):
anchor_bboxes=self._build_dboxes_per_layer(self.params.img_shape,
feat_layer,
self.params.anchor_sizes[i],
self.params.anchor_ratios[i],
self.params.anchor_steps[i],)
layers_anchors.append(anchor_bboxes)
return layers_anchors
def _build_dboxes_per_layer(self, img_shape,feat_layer,sizes,ratios,step,offset=0.5,dtype=np.float32):
y, x = np.mgrid[0:feat_layer[0], 0:feat_layer[1]]
y = (y.astype(dtype=np.float32) + offset) / feat_layer[0]
x = (x.astype(dtype=np.float32) + offset) / feat_layer[0]
y = np.expand_dims(y, axis=-1)
x = np.expand_dims(x, axis=-1)
num_anchors = len(sizes) + len(ratios)
h = np.zeros((num_anchors, ), dtype=dtype)
w = np.zeros((num_anchors, ), dtype=dtype)
h[0] = sizes[0] / img_shape[0]
w[0] = sizes[0] / img_shape[1]
di = 1
if len(sizes) > 1:
h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
di += 1
for i, r in enumerate(ratios):
h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r)
w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r)
return y, x, h, w
def _encode_bboxes(self, bboxes, labels, dboxes):
target_labels = []
target_localizations = []
target_scores = []
for i, layer in enumerate(dboxes):
t_labels, t_loc, t_scores = self._encode_per_layer(labels, bboxes, layer)
target_labels.append(t_labels)
target_localizations.append(t_loc)
target_scores.append(t_scores)
return target_labels, target_localizations, target_scores
def _encode_per_layer(self, labels, bboxes, layer):
def jaccard_with_anchors(bbox):
int_ymin = tf.maximum(y_min, bbox[0])
int_xmin = tf.maximum(x_min, bbox[1])
int_ymax = tf.minimum(y_max, bbox[2])
int_xmax = tf.minimum(x_max, bbox[3])
h = tf.maximum(int_ymax - int_ymin, 0.)
w = tf.maximum(int_xmax - int_xmin, 0.)
inter_vol = h * w
union_vol = vol_anchors - inter_vol + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
jaccard = tf.div(inter_vol, union_vol)
return jaccard
def condition(i, feat_labels, feat_scores,feat_ymin, feat_xmin, feat_ymax, feat_xmax):
# print(labels.shape)
r = tf.less(i, tf.shape(labels))
return r[0]
def body(i, feat_labels, feat_scores,feat_ymin, feat_xmin, feat_ymax, feat_xmax):
label = labels[i]
bbox = bboxes[i]
jaccard = jaccard_with_anchors(bbox)
mask = tf.greater(jaccard, feat_scores)
mask = tf.logical_and(mask, feat_scores > -0.5)
mask = tf.logical_and(mask, label < self.params.num_classes)
imask = tf.cast(mask, tf.int64)
fmask = tf.cast(mask, dtype=tf.float32)
feat_labels = imask * label + (1 - imask) * feat_labels
feat_scores = tf.where(mask, jaccard, feat_scores)
feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin
feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin
feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax
feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax
return [i+1, feat_labels, feat_scores,
feat_ymin, feat_xmin, feat_ymax, feat_xmax]
# min and max coordinates of the default boxes
y, x, h, w = layer
y_min = y - h / 2.
x_min = x - w / 2.
y_max = y + h / 2.
x_max = x + w / 2.
vol_anchors = (x_max - x_min) * (y_max - y_min)
shape = (y.shape[0], y.shape[1], h.size)
feat_labels = tf.zeros(shape, dtype=tf.int64)
feat_scores = tf.zeros(shape, dtype=tf.float32)
feat_ymin = tf.zeros(shape, dtype=tf.float32)
feat_xmin = tf.zeros(shape, dtype=tf.float32)
feat_ymax = tf.ones(shape, dtype=tf.float32)
feat_xmax = tf.ones(shape, dtype=tf.float32)
i = 0
[i, feat_labels, feat_scores,
feat_ymin, feat_xmin,feat_ymax, feat_xmax] = tf.while_loop(condition, body,[i, feat_labels, feat_scores,feat_ymin, feat_xmin,feat_ymax, feat_xmax],
shape_invariants=[tf.TensorShape([]),tf.TensorShape([None, None,None]),
tf.TensorShape([None, None,None]),
tf.TensorShape([None, None,None]),
tf.TensorShape([None, None,None]),
tf.TensorShape([None, None,None]),
tf.TensorShape([None, None,None])])
# Transform to center / size.
feat_cy = (feat_ymax + feat_ymin) / 2.
feat_cx = (feat_xmax + feat_xmin) / 2.
feat_h = feat_ymax - feat_ymin
feat_w = feat_xmax - feat_xmin
# Encode features.
feat_cy = (feat_cy - y) / h / self.params.prior_scaling[0]
feat_cx = (feat_cx - x) / w / self.params.prior_scaling[1]
feat_h = tf.log(feat_h / h) / self.params.prior_scaling[2]
feat_w = tf.log(feat_w / w) / self.params.prior_scaling[3]
feat_loc = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1)
return feat_labels, feat_loc, feat_scores