Skip to content

Commit

Permalink
Merge pull request #129 from pipeless-ai/improve_examples
Browse files Browse the repository at this point in the history
fix(examples): Improve yolo onnx example
  • Loading branch information
miguelaeh authored Jan 31, 2024
2 parents 537e8e9 + 583cb26 commit 5f8254f
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 70 deletions.
129 changes: 87 additions & 42 deletions examples/onnx-yolo/post-process.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,21 @@
def hook(frame_data, _):
frame = frame_data['original']
model_output = frame_data['inference_output']
yolo_input_shape = (640, 640, 3) # h,w,c
boxes, scores, class_ids = parse_yolo_output(model_output, frame.shape, yolo_input_shape)
class_labels = [yolo_classes[id] for id in class_ids]
for i in range(len(boxes)):
draw_bbox(frame, boxes[i], class_labels[i], scores[i])
if len(model_output) > 0:
yolo_input_shape = (640, 640, 3) # h,w,c
boxes, scores, class_ids = postprocess_yolo(frame.shape, yolo_input_shape, model_output)
class_labels = [yolo_classes[id] for id in class_ids]
for i in range(len(boxes)):
draw_bbox(frame, boxes[i], class_labels[i], scores[i], color_palette[class_ids[i]])

frame_data['modified'] = frame
frame_data['modified'] = frame

#################################################
# Util functions to make the hook more readable #
#################################################
confidence_thres = 0.45
iou_thres = 0.5

yolo_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
Expand All @@ -24,47 +28,19 @@ def hook(frame_data, _):
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
color_palette = np.random.uniform(0, 255, size=(len(yolo_classes), 3))

def xywh2xyxy(i):
"""
Converts from (center-x, center-y,w,h) to (x1,y1,x2,y2)
"""
o = i.view() # Create numpy view
dw = i[..., 2] / 2
dh = i[..., 3] / 2
o[..., 0] = i[..., 0] - dw
o[..., 1] = i[..., 1] - dh
o[..., 2] = i[..., 0] + dw
o[..., 3] = i[..., 1] + dh
o[..., 0] = i[..., 0] - i[..., 2] / 2
o[..., 1] = i[..., 1] - i[..., 3] / 2
o[..., 2] = i[..., 0] + i[..., 2]
o[..., 3] = i[..., 1] + i[..., 3]
return o

def rescale_boxes(original_image_shape, model_input_shape, boxes):
img_height, img_width, _ = original_image_shape
input_height, input_width, _ = model_input_shape
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([img_width, img_height, img_width, img_height])
return boxes

def parse_yolo_output(model_output, orginal_image_shape, model_input_shape):
confidence_threshold = 0.3
iou_threshold = 0.7

predictions = np.squeeze(model_output[0]).T

scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > confidence_threshold, :]
scores = scores[scores > confidence_threshold]
if len(scores) == 0:
return [], [], []

class_ids = np.argmax(predictions[:, 4:], axis=1)

# Extract boxes
boxes = predictions[:, :4]
boxes = rescale_boxes(orginal_image_shape, model_input_shape, boxes)
boxes = xywh2xyxy(boxes)

indices = cv2.dnn.NMSBoxes(boxes, scores, confidence_threshold, iou_threshold)
return boxes[indices], scores[indices], class_ids[indices]

def clip_boxes(boxes, shape):
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
Expand All @@ -85,3 +61,72 @@ def draw_bbox(image, box, label='', score=None, color=(255, 0, 255), txt_color=(
else:
cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
0, lw / 3, txt_color, thickness=tf, lineType=cv2.LINE_AA)

def postprocess_yolo(original_frame_shape, resized_img_shape, output):
original_height, original_width, _ = original_frame_shape
resized_height, resized_width, _ = resized_img_shape

outputs = np.transpose(np.squeeze(output[0]))

# Get the number of rows in the outputs array
rows = outputs.shape[0]

boxes = []
scores = []
class_ids = []

# Calculate the scaling factors for the bounding box coordinates
if original_height > original_width:
scale_factor = original_height / resized_height
else:
scale_factor = original_width / resized_width

# Iterate over each row in the outputs array
for i in range(rows):
classes_scores = outputs[i][4:]

# FIXME: For some reason when using YOLO in ONNX sometimes it returns NaN values in the classes scores
# and other times it returns 1 for some classes and 0 for the rest which is almost certainly a bad prediction.
# This hack skips those entries
nan_mask = np.isnan(classes_scores)
if np.any(nan_mask):
continue
if np.any(classes_scores == 1):
continue

max_score = np.amax(classes_scores)
if max_score >= confidence_thres:
class_id = np.argmax(classes_scores) # Get the class ID with the highest score
x, y, w, h = outputs[i][0], outputs[i][1], outputs[i][2], outputs[i][3]

## Calculate the scaled coordinates of the bounding box
## the original image was padded to be square
if original_height > original_width:
# we added pad on the width
pad = (resized_width - original_width / scale_factor) // 2
left = int((x - pad) * scale_factor)
top = int(y * scale_factor)
else:
# we added pad on the height
pad = (resized_height - original_height / scale_factor) // 2
left = int(x * scale_factor)
top = int((y - pad) * scale_factor)
width = int(w * scale_factor)
height = int(h * scale_factor)

class_ids.append(class_id)
scores.append(max_score)
boxes.append([left, top, width, height])

if len(boxes) > 0:
boxes = np.array(boxes)
scores = np.array(scores)
class_ids = np.array(class_ids)

clip_boxes(boxes, original_frame_shape)
boxes = xywh2xyxy(boxes)
indices = cv2.dnn.NMSBoxes(boxes, scores, confidence_thres, iou_thres)

return boxes[indices], scores[indices], class_ids[indices]
else:
return [], [], []
54 changes: 35 additions & 19 deletions examples/onnx-yolo/pre-process.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,50 @@
import cv2
import numpy as np

def resize_rgb_frame(frame, target_dim):
target_height = target_dim[0]
target_width = target_dim[1]
channels = target_dim[2]
# Scale the image maintaining aspect ratio
width_ratio = target_width / frame.shape[1]
height_ratio = target_height / frame.shape[0]
def is_cuda_available():
return cv2.cuda.getCudaEnabledDeviceCount() > 0

"""
Resize and pad image. Uses CUDA when available
"""
def resize_and_pad(frame, target_dim, pad_top, pad_bottom, pad_left, pad_right):
target_height, target_width = target_dim
if is_cuda_available():
# FIXME: due to the memory allocation here could be even slower than running on CPU. We must provide the frame from GPU memory to the hook
frame_gpu = cv2.cuda_GpuMat(frame)
resized_frame_gpu = cv2.cuda.resize(frame_gpu, (target_width, target_height), interpolation=cv2.INTER_CUBIC)
padded_frame_gpu = cv2.cuda.copyMakeBorder(resized_frame_gpu, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
result = padded_frame_gpu.download()
return result
else:
resized_frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_CUBIC)
padded_frame = cv2.copyMakeBorder(resized_frame, pad_top, pad_bottom, pad_left, pad_right,
borderType=cv2.BORDER_CONSTANT, value=(0, 0, 0))
return padded_frame

def resize_with_padding(frame, target_dim):
target_height, target_width, _ = target_dim
frame_height, frame_width, _ = frame.shape

width_ratio = target_width / frame_width
height_ratio = target_height / frame_height
# Choose the minimum scaling factor to maintain aspect ratio
scale_factor = min(width_ratio, height_ratio)
# Calculate new dimensions after resizing
new_width = int(frame.shape[1] * scale_factor)
new_height = int(frame.shape[0] * scale_factor)
new_width = int(frame_width * scale_factor)
new_height = int(frame_height * scale_factor)
# Calculate padding dimensions
pad_width = (target_width - new_width) // 2
pad_height = (target_height - new_height) // 2
# Create a canvas with the desired dimensions and padding
canvas = np.zeros((target_height, target_width, channels), dtype=np.uint8)
# Resize the image and place it on the canvas
resized_image = cv2.resize(frame, (new_width, new_height))
canvas[pad_height:pad_height+new_height, pad_width:pad_width+new_width] = resized_image
return canvas

def hook(frame_data, context):
padded_image = resize_and_pad(frame, (new_height, new_width), pad_height, pad_height, pad_width, pad_width)
return padded_image

def hook(frame_data, _):
frame = frame_data["original"].view()
yolo_input_shape = (640, 640, 3) # h,w,c
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = resize_rgb_frame(frame, yolo_input_shape)
frame = cv2.normalize(frame, None, 0.0, 1.0, cv2.NORM_MINMAX)
frame = resize_with_padding(frame, yolo_input_shape)
frame = np.array(frame) / 255.0 # Normalize pixel values
frame = np.transpose(frame, axes=(2,0,1)) # Convert to c,h,w
inference_inputs = frame.astype("float32")
frame_data['inference_input'] = inference_inputs
12 changes: 3 additions & 9 deletions examples/yolo/process.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
import numpy as np
import time

def hook(frame, context):
rgb_frame = frame['original']
model = context['model']
input_fps = frame['fps']
delay = time.time() - frame['input_ts']
if input_fps > 0 and delay > 1 / input_fps:
print('Skipping frame to maintain real-time')
else:
prediction = next(model(rgb_frame, stream=True))
bboxes = prediction.boxes.data.tolist() if prediction.boxes else []
frame['inference_output'] = np.array(bboxes, dtype="float32")
prediction = next(model(rgb_frame, stream=True))
bboxes = prediction.boxes.data.tolist() if prediction.boxes else []
frame['inference_output'] = np.array(bboxes, dtype="float32")

0 comments on commit 5f8254f

Please sign in to comment.