From 67661c61500e94f5b67c97f7ff1285f3ac59e6fb Mon Sep 17 00:00:00 2001 From: pinto0309 Date: Sun, 15 Dec 2024 09:38:15 +0900 Subject: [PATCH] disable heatmap mode --- 462_Gaze-LLE/README.md | 9 ++++-- 462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py | 32 +++++++++++++++---- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/462_Gaze-LLE/README.md b/462_Gaze-LLE/README.md index e6d07f116c..0090560370 100644 --- a/462_Gaze-LLE/README.md +++ b/462_Gaze-LLE/README.md @@ -1,10 +1,8 @@ # 462_Gaze-LLE -## WIP - Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight gaze decoder on top of a frozen, pretrained visual encoder (DINOv2). Gaze-LLE learns 1-2 orders of magnitude fewer parameters than prior works and doesn't require any extra input modalities like depth and pose! -- Single person test - `gazelle_dinov2_vitb14_inout_1x3x448x448_1xNx4.onnx` +- Single person test - `gazelle_dinov2_vitb14_inout_1x3x448x448_1xNx4.onnx` + ONNX-TensorRT https://github.com/user-attachments/assets/b8d45d91-55b4-41fe-b177-ab3497026967 @@ -12,6 +10,8 @@ Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight https://github.com/user-attachments/assets/12c5b44b-328c-4d32-b17c-182ddac564f3 +- Disable Heatmap + ## 1. Test - Python 3.10 - onnx 1.16.1+ @@ -50,6 +50,7 @@ Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight [-dgm] [-dlr] [-dhm] + [-dah] [-drc [DISABLE_RENDER_CLASSIDS ...]] [-oyt] [-bblw BOUNDING_BOX_LINE_WIDTH] @@ -87,6 +88,8 @@ Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight Disable left and right hand identification mode. (Press H on the keyboard to switch modes) -dhm, --disable_headpose_identification_mode Disable HeadPose identification mode. (Press P on the keyboard to switch modes) + -dah, --disable_attention_heatmap_mode + Disable Attention Heatmap mode. (Press A on the keyboard to switch modes) -drc [DISABLE_RENDER_CLASSIDS ...], --disable_render_classids [DISABLE_RENDER_CLASSIDS ...] Class ID to disable bounding box drawing. List[int]. e.g. -drc 17 18 19 -oyt, --output_yolo_format_text diff --git a/462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py b/462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py index 9858d1ca90..d39a4f5f57 100755 --- a/462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py +++ b/462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py @@ -663,6 +663,7 @@ def __call__( self, image: np.ndarray, head_boxes: List[Box], + disable_attention_heatmap_mode: bool, ) -> Tuple[np.ndarray, np.ndarray]: """ @@ -674,6 +675,8 @@ def __call__( head_boxes: List[Box] Head boxes + disable_attention_heatmap_mode: bool + Returns ------- result_image: np.ndarray @@ -699,16 +702,14 @@ def __call__( if len(outputs) == 2: inout = outputs[1] # PostProcess - result_image = \ + result_image, resized_heatmatps = \ self._postprocess( image_bgr=temp_image, heatmaps=heatmaps, ) - image_height = temp_image.shape[0] - image_width = temp_image.shape[1] - heatmap_list = [cv2.resize(heatmap[..., None], (image_width, image_height)) for heatmap in heatmaps] - resized_heatmatp = np.asarray(heatmap_list) - return result_image, resized_heatmatp + if disable_attention_heatmap_mode: + result_image = image + return result_image, resized_heatmatps def _preprocess( self, @@ -754,6 +755,8 @@ def _postprocess( ------- result_image: uint8[image_height, image_width, 3] BGR + resized_heatmatps: uint8[image_height, image_width] + Single-channel """ image_height = image_bgr.shape[0] image_width = image_bgr.shape[1] @@ -768,7 +771,11 @@ def _postprocess( heatmaps_all.putalpha(128) image_rgba = Image.alpha_composite(Image.fromarray(image_rgb).convert("RGBA"), heatmaps_all) image_bgr = cv2.cvtColor(np.asarray(image_rgba)[..., [2,1,0,3]], cv2.COLOR_BGRA2BGR) - return image_bgr + + heatmap_list = [cv2.resize(heatmap[..., None], (image_width, image_height)) for heatmap in heatmaps] + resized_heatmatps = np.asarray(heatmap_list) + + return image_bgr, resized_heatmatps def list_image_files(dir_path: str) -> List[str]: path = Path(dir_path) @@ -946,6 +953,13 @@ def check_positive(value): help=\ 'Disable HeadPose identification mode. (Press P on the keyboard to switch modes)', ) + parser.add_argument( + '-dah', + '--disable_attention_heatmap_mode', + action='store_true', + help=\ + 'Disable Attention Heatmap mode. (Press A on the keyboard to switch modes)', + ) parser.add_argument( '-drc', '--disable_render_classids', @@ -1002,6 +1016,7 @@ def check_positive(value): disable_gender_identification_mode: bool = args.disable_gender_identification_mode disable_left_and_right_hand_identification_mode: bool = args.disable_left_and_right_hand_identification_mode disable_headpose_identification_mode: bool = args.disable_headpose_identification_mode + disable_attention_heatmap_mode: bool = args.disable_attention_heatmap_mode disable_render_classids: List[int] = args.disable_render_classids output_yolo_format_text: bool = args.output_yolo_format_text execution_provider: str = args.execution_provider @@ -1126,6 +1141,7 @@ def check_positive(value): debug_image, heatmaps = gazelle_model( image=debug_image, head_boxes=head_boxes, + disable_attention_heatmap_mode=disable_attention_heatmap_mode, ) elapsed_time = time.perf_counter() - start_time @@ -1448,6 +1464,8 @@ def calculate_centroid(heatmap: np.ndarray) -> Tuple[int, int, float]: disable_headpose_identification_mode = not disable_headpose_identification_mode elif key == 104: # H, mode switch disable_left_and_right_hand_identification_mode = not disable_left_and_right_hand_identification_mode + elif key == 97: # A, mode switch + disable_attention_heatmap_mode = not disable_attention_heatmap_mode if video_writer is not None: video_writer.release()