-
Notifications
You must be signed in to change notification settings - Fork 3
/
onnx_utils.py
206 lines (150 loc) · 6.85 KB
/
onnx_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# author:samge
# date:2024-06-03 14:59
# describe:使用onnx提取图片中目标边框的工具
import time
import cv2
import numpy as np
import onnxruntime
class OnnxUtils:
def __init__(
self,
path,
conf_thres=0.7,
iou_thres=0.7,
imgsz=[640, 640]
):
self.conf_threshold = conf_thres
self.iou_threshold = iou_thres
self.imgsz = imgsz
# Initialize model
self.initialize_model(path)
def __call__(self, image):
return self.detect_objects(image)
def initialize_model(self, path):
self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider','CPUExecutionProvider'])
# Get model info
self.get_input_details()
self.get_output_details()
def detect_objects(self, image):
input_tensor, ratio = self.prepare_input(image)
# Perform inference on the image
outputs = self.inference(input_tensor)
self.boxes, self.scores, self.class_ids = self.process_output(outputs, ratio)
return self.boxes, self.scores, self.class_ids
def prepare_input(self, image):
self.img_height, self.img_width = image.shape[:2]
input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize image with aspect ratio and padding
input_img, ratio = self.ratioresize(input_img)
# Scale input pixel values to 0 to 1
input_img = input_img / 255.0
input_img = input_img.transpose(2, 0, 1)
input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)
return input_tensor, ratio
def inference(self, input_tensor):
start = time.perf_counter()
outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})
# print(f"Inference time: {(time.perf_counter() - start)*1000:.2f} ms")
return outputs
def process_output(self, output, ratio):
predictions = np.squeeze(output[0]).T
# Filter out object confidence scores below threshold
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > self.conf_threshold, :]
scores = scores[scores > self.conf_threshold]
if len(scores) == 0:
return [], [], []
# Get the class with the highest confidence
class_ids = np.argmax(predictions[:, 4:], axis=1)
# Get bounding boxes for each object
boxes = self.extract_boxes(predictions, ratio)
# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
indices = self.nms(boxes, scores, self.iou_threshold)
return boxes[indices], scores[indices], class_ids[indices]
def extract_boxes(self, predictions, ratio):
# Extract boxes from predictions
boxes = predictions[:, :4]
# Scale boxes to original image dimensions
boxes *= ratio
# Convert boxes to xyxy format
boxes = self.xywh2xyxy(boxes)
return boxes
def get_input_details(self):
model_inputs = self.session.get_inputs()
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
self.input_shape = model_inputs[0].shape
self.input_width, self.input_height = self.imgsz
# self.input_width = self.input_shape[3]
# self.input_height = self.input_shape[2]
def get_output_details(self):
model_outputs = self.session.get_outputs()
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
def ratioresize(self, im, color=114):
# print(f"Color type: {type(color)}") # Debug print to check type of color
shape = im.shape[:2]
new_h, new_w = self.input_height, self.input_width
padded_img = np.ones((new_h, new_w, 3), dtype=np.uint8) * color
# Scale ratio (new / old)
r = min(new_h / shape[0], new_w / shape[1])
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
if shape[::-1] != new_unpad:
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
padded_img[: new_unpad[1], : new_unpad[0]] = im
padded_img = np.ascontiguousarray(padded_img)
return padded_img, 1 / r
def nms(self, boxes, scores, iou_threshold):
# Sort by score
sorted_indices = np.argsort(scores)[::-1]
keep_boxes = []
while sorted_indices.size > 0:
# Pick the last box
box_id = sorted_indices[0]
keep_boxes.append(box_id)
# Compute IoU of the picked box with the rest
ious = self.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
# Remove boxes with IoU over the threshold
keep_indices = np.where(ious < iou_threshold)[0]
sorted_indices = sorted_indices[keep_indices + 1]
return keep_boxes
def compute_iou(self, box, boxes):
# Compute xmin, ymin, xmax, ymax for both boxes
xmin = np.maximum(box[0], boxes[:, 0])
ymin = np.maximum(box[1], boxes[:, 1])
xmax = np.minimum(box[2], boxes[:, 2])
ymax = np.minimum(box[3], boxes[:, 3])
# Compute intersection area
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
# Compute union area
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
union_area = box_area + boxes_area - intersection_area
# Compute IoU
iou = intersection_area / union_area
return iou
def xywh2xyxy(self, x):
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
def draw_boxes(self, image, boxes, scores, class_ids):
for box, score, class_id in zip(boxes, scores, class_ids):
x, y, w, h = box
# Draw bounding box
cv2.rectangle(image, (int(x), int(y)), (int(w), int(h)), (0, 0, 255), 1)
# Display class label and score
# label = f"Class: {class_id}, Score: {score:.2f}"
label = f"{score:.2f}"
# Get text size
(text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_TRIPLEX, 0.6, 1)
# Calculate y-coordinate to place the text
y_top = int(y - 6)
if y_top < text_height: # if text will be truncated
y_top = int(y + text_height + 6) # move text inside the box
# Put the text on the image
cv2.putText(image, label, (int(x), y_top), cv2.FONT_HERSHEY_TRIPLEX, 0.6, (0, 0, 255), 1)