Skip to content

Commit

Permalink
disable heatmap mode
Browse files Browse the repository at this point in the history
  • Loading branch information
PINTO0309 committed Dec 15, 2024
1 parent dfe6c45 commit 67661c6
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 10 deletions.
9 changes: 6 additions & 3 deletions 462_Gaze-LLE/README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# 462_Gaze-LLE

## WIP

Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight gaze decoder on top of a frozen, pretrained visual encoder (DINOv2). Gaze-LLE learns 1-2 orders of magnitude fewer parameters than prior works and doesn't require any extra input modalities like depth and pose!

- Single person test - `gazelle_dinov2_vitb14_inout_1x3x448x448_1xNx4.onnx`
- Single person test - `gazelle_dinov2_vitb14_inout_1x3x448x448_1xNx4.onnx` + ONNX-TensorRT

https://github.com/user-attachments/assets/b8d45d91-55b4-41fe-b177-ab3497026967

- Gaze estimation test when facing backwards

https://github.com/user-attachments/assets/12c5b44b-328c-4d32-b17c-182ddac564f3

- Disable Heatmap

## 1. Test
- Python 3.10
- onnx 1.16.1+
Expand Down Expand Up @@ -50,6 +50,7 @@ Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight
[-dgm]
[-dlr]
[-dhm]
[-dah]
[-drc [DISABLE_RENDER_CLASSIDS ...]]
[-oyt]
[-bblw BOUNDING_BOX_LINE_WIDTH]
Expand Down Expand Up @@ -87,6 +88,8 @@ Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight
Disable left and right hand identification mode. (Press H on the keyboard to switch modes)
-dhm, --disable_headpose_identification_mode
Disable HeadPose identification mode. (Press P on the keyboard to switch modes)
-dah, --disable_attention_heatmap_mode
Disable Attention Heatmap mode. (Press A on the keyboard to switch modes)
-drc [DISABLE_RENDER_CLASSIDS ...], --disable_render_classids [DISABLE_RENDER_CLASSIDS ...]
Class ID to disable bounding box drawing. List[int]. e.g. -drc 17 18 19
-oyt, --output_yolo_format_text
Expand Down
32 changes: 25 additions & 7 deletions 462_Gaze-LLE/demo/demo_yolov9_onnx_gazelle.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,7 @@ def __call__(
self,
image: np.ndarray,
head_boxes: List[Box],
disable_attention_heatmap_mode: bool,
) -> Tuple[np.ndarray, np.ndarray]:
"""
Expand All @@ -674,6 +675,8 @@ def __call__(
head_boxes: List[Box]
Head boxes
disable_attention_heatmap_mode: bool
Returns
-------
result_image: np.ndarray
Expand All @@ -699,16 +702,14 @@ def __call__(
if len(outputs) == 2:
inout = outputs[1]
# PostProcess
result_image = \
result_image, resized_heatmatps = \
self._postprocess(
image_bgr=temp_image,
heatmaps=heatmaps,
)
image_height = temp_image.shape[0]
image_width = temp_image.shape[1]
heatmap_list = [cv2.resize(heatmap[..., None], (image_width, image_height)) for heatmap in heatmaps]
resized_heatmatp = np.asarray(heatmap_list)
return result_image, resized_heatmatp
if disable_attention_heatmap_mode:
result_image = image
return result_image, resized_heatmatps

def _preprocess(
self,
Expand Down Expand Up @@ -754,6 +755,8 @@ def _postprocess(
-------
result_image: uint8[image_height, image_width, 3]
BGR
resized_heatmatps: uint8[image_height, image_width]
Single-channel
"""
image_height = image_bgr.shape[0]
image_width = image_bgr.shape[1]
Expand All @@ -768,7 +771,11 @@ def _postprocess(
heatmaps_all.putalpha(128)
image_rgba = Image.alpha_composite(Image.fromarray(image_rgb).convert("RGBA"), heatmaps_all)
image_bgr = cv2.cvtColor(np.asarray(image_rgba)[..., [2,1,0,3]], cv2.COLOR_BGRA2BGR)
return image_bgr

heatmap_list = [cv2.resize(heatmap[..., None], (image_width, image_height)) for heatmap in heatmaps]
resized_heatmatps = np.asarray(heatmap_list)

return image_bgr, resized_heatmatps

def list_image_files(dir_path: str) -> List[str]:
path = Path(dir_path)
Expand Down Expand Up @@ -946,6 +953,13 @@ def check_positive(value):
help=\
'Disable HeadPose identification mode. (Press P on the keyboard to switch modes)',
)
parser.add_argument(
'-dah',
'--disable_attention_heatmap_mode',
action='store_true',
help=\
'Disable Attention Heatmap mode. (Press A on the keyboard to switch modes)',
)
parser.add_argument(
'-drc',
'--disable_render_classids',
Expand Down Expand Up @@ -1002,6 +1016,7 @@ def check_positive(value):
disable_gender_identification_mode: bool = args.disable_gender_identification_mode
disable_left_and_right_hand_identification_mode: bool = args.disable_left_and_right_hand_identification_mode
disable_headpose_identification_mode: bool = args.disable_headpose_identification_mode
disable_attention_heatmap_mode: bool = args.disable_attention_heatmap_mode
disable_render_classids: List[int] = args.disable_render_classids
output_yolo_format_text: bool = args.output_yolo_format_text
execution_provider: str = args.execution_provider
Expand Down Expand Up @@ -1126,6 +1141,7 @@ def check_positive(value):
debug_image, heatmaps = gazelle_model(
image=debug_image,
head_boxes=head_boxes,
disable_attention_heatmap_mode=disable_attention_heatmap_mode,
)
elapsed_time = time.perf_counter() - start_time

Expand Down Expand Up @@ -1448,6 +1464,8 @@ def calculate_centroid(heatmap: np.ndarray) -> Tuple[int, int, float]:
disable_headpose_identification_mode = not disable_headpose_identification_mode
elif key == 104: # H, mode switch
disable_left_and_right_hand_identification_mode = not disable_left_and_right_hand_identification_mode
elif key == 97: # A, mode switch
disable_attention_heatmap_mode = not disable_attention_heatmap_mode

if video_writer is not None:
video_writer.release()
Expand Down

0 comments on commit 67661c6

Please sign in to comment.