From 67661c61500e94f5b67c97f7ff1285f3ac59e6fb Mon Sep 17 00:00:00 2001
From: pinto0309 <rmsdh122@yahoo.co.jp>
Date: Sun, 15 Dec 2024 09:38:15 +0900
Subject: [PATCH] disable heatmap mode

---
 462_Gaze-LLE/README.md                        |  9 ++++--
 462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py | 32 +++++++++++++++----
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/462_Gaze-LLE/README.md b/462_Gaze-LLE/README.md
index e6d07f116c..0090560370 100644
--- a/462_Gaze-LLE/README.md
+++ b/462_Gaze-LLE/README.md
@@ -1,10 +1,8 @@
 # 462_Gaze-LLE
 
-## WIP
-
 Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight gaze decoder on top of a frozen, pretrained visual encoder (DINOv2). Gaze-LLE learns 1-2 orders of magnitude fewer parameters than prior works and doesn't require any extra input modalities like depth and pose!
 
-- Single person test - `gazelle_dinov2_vitb14_inout_1x3x448x448_1xNx4.onnx`
+- Single person test - `gazelle_dinov2_vitb14_inout_1x3x448x448_1xNx4.onnx` + ONNX-TensorRT
 
   https://github.com/user-attachments/assets/b8d45d91-55b4-41fe-b177-ab3497026967
 
@@ -12,6 +10,8 @@ Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight
 
   https://github.com/user-attachments/assets/12c5b44b-328c-4d32-b17c-182ddac564f3
 
+- Disable Heatmap
+
 ## 1. Test
   - Python 3.10
   - onnx 1.16.1+
@@ -50,6 +50,7 @@ Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight
     [-dgm]
     [-dlr]
     [-dhm]
+    [-dah]
     [-drc [DISABLE_RENDER_CLASSIDS ...]]
     [-oyt]
     [-bblw BOUNDING_BOX_LINE_WIDTH]
@@ -87,6 +88,8 @@ Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight
         Disable left and right hand identification mode. (Press H on the keyboard to switch modes)
       -dhm, --disable_headpose_identification_mode
         Disable HeadPose identification mode. (Press P on the keyboard to switch modes)
+      -dah, --disable_attention_heatmap_mode
+        Disable Attention Heatmap mode. (Press A on the keyboard to switch modes)
       -drc [DISABLE_RENDER_CLASSIDS ...], --disable_render_classids [DISABLE_RENDER_CLASSIDS ...]
         Class ID to disable bounding box drawing. List[int]. e.g. -drc 17 18 19
       -oyt, --output_yolo_format_text
diff --git a/462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py b/462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py
index 9858d1ca90..d39a4f5f57 100755
--- a/462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py
+++ b/462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py
@@ -663,6 +663,7 @@ def __call__(
         self,
         image: np.ndarray,
         head_boxes: List[Box],
+        disable_attention_heatmap_mode: bool,
     ) -> Tuple[np.ndarray, np.ndarray]:
         """
 
@@ -674,6 +675,8 @@ def __call__(
         head_boxes: List[Box]
             Head boxes
 
+        disable_attention_heatmap_mode: bool
+
         Returns
         -------
         result_image: np.ndarray
@@ -699,16 +702,14 @@ def __call__(
         if len(outputs) == 2:
             inout = outputs[1]
         # PostProcess
-        result_image = \
+        result_image, resized_heatmatps = \
             self._postprocess(
                 image_bgr=temp_image,
                 heatmaps=heatmaps,
             )
-        image_height = temp_image.shape[0]
-        image_width = temp_image.shape[1]
-        heatmap_list = [cv2.resize(heatmap[..., None], (image_width, image_height)) for heatmap in heatmaps]
-        resized_heatmatp = np.asarray(heatmap_list)
-        return result_image, resized_heatmatp
+        if disable_attention_heatmap_mode:
+            result_image = image
+        return result_image, resized_heatmatps
 
     def _preprocess(
         self,
@@ -754,6 +755,8 @@ def _postprocess(
         -------
         result_image: uint8[image_height, image_width, 3]
             BGR
+        resized_heatmatps: uint8[image_height, image_width]
+            Single-channel
         """
         image_height = image_bgr.shape[0]
         image_width = image_bgr.shape[1]
@@ -768,7 +771,11 @@ def _postprocess(
         heatmaps_all.putalpha(128)
         image_rgba = Image.alpha_composite(Image.fromarray(image_rgb).convert("RGBA"), heatmaps_all)
         image_bgr = cv2.cvtColor(np.asarray(image_rgba)[..., [2,1,0,3]], cv2.COLOR_BGRA2BGR)
-        return image_bgr
+
+        heatmap_list = [cv2.resize(heatmap[..., None], (image_width, image_height)) for heatmap in heatmaps]
+        resized_heatmatps = np.asarray(heatmap_list)
+
+        return image_bgr, resized_heatmatps
 
 def list_image_files(dir_path: str) -> List[str]:
     path = Path(dir_path)
@@ -946,6 +953,13 @@ def check_positive(value):
         help=\
             'Disable HeadPose identification mode. (Press P on the keyboard to switch modes)',
     )
+    parser.add_argument(
+        '-dah',
+        '--disable_attention_heatmap_mode',
+        action='store_true',
+        help=\
+            'Disable Attention Heatmap mode. (Press A on the keyboard to switch modes)',
+    )
     parser.add_argument(
         '-drc',
         '--disable_render_classids',
@@ -1002,6 +1016,7 @@ def check_positive(value):
     disable_gender_identification_mode: bool = args.disable_gender_identification_mode
     disable_left_and_right_hand_identification_mode: bool = args.disable_left_and_right_hand_identification_mode
     disable_headpose_identification_mode: bool = args.disable_headpose_identification_mode
+    disable_attention_heatmap_mode: bool = args.disable_attention_heatmap_mode
     disable_render_classids: List[int] = args.disable_render_classids
     output_yolo_format_text: bool = args.output_yolo_format_text
     execution_provider: str = args.execution_provider
@@ -1126,6 +1141,7 @@ def check_positive(value):
             debug_image, heatmaps = gazelle_model(
                 image=debug_image,
                 head_boxes=head_boxes,
+                disable_attention_heatmap_mode=disable_attention_heatmap_mode,
             )
         elapsed_time = time.perf_counter() - start_time
 
@@ -1448,6 +1464,8 @@ def calculate_centroid(heatmap: np.ndarray) -> Tuple[int, int, float]:
             disable_headpose_identification_mode = not disable_headpose_identification_mode
         elif key == 104: # H, mode switch
             disable_left_and_right_hand_identification_mode = not disable_left_and_right_hand_identification_mode
+        elif key == 97: # A, mode switch
+            disable_attention_heatmap_mode = not disable_attention_heatmap_mode
 
     if video_writer is not None:
         video_writer.release()