From 56ec09fac8db9fa01f2eeff8f955ef6c91f85451 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Mon, 21 Oct 2024 10:57:19 -0400
Subject: [PATCH] add detailer multi-class support

Signed-off-by: Vladimir Mandic <mandic00@live.com>
---
 CHANGELOG.md                | 10 +++++--
 modules/postprocess/yolo.py | 58 +++++++++++++++++++++++++------------
 modules/shared.py           |  1 +
 3 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f1942da6..6b5cd1d98 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,8 @@
 # Change Log for SD.Next
 
-## Update for 2024-10-20
+## Update for 2024-10-21
 
-### Highlights for 2024-10-20
+### Highlights for 2024-10-21
 
 #### Workflow highlights
 
@@ -42,7 +42,7 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
 [README](https://github.com/vladmandic/automatic/blob/master/README.md) | [CHANGELOG](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
 
-### Details for 2024-10-20
+### Details for 2024-10-21
 
 - **reprocess**
   - new top-level button: reprocess latent from your history of generated image(s)  
@@ -89,6 +89,10 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
   - image metadata includes info on used detailer models  
   - *note* detailer defaults are not save in ui settings, they are saved in server settings  
     to apply your defaults, set ui values and apply via *system -> settings -> apply settings*  
+  - if using models trained on multiple classes, you can specify which classes you want to detail  
+    e.g. original yolo detection model is trained on coco dataset with 80 predefined classes  
+    if you leave field blank, it will use any class found in the model  
+    you can see classes defined in the model while model itself is loaded for the first time  
 
 - **extract lora**: extract combined lora from current memory state, thanks @AI-Casanova  
   load any LoRA(s) and play with generate as usual and once you like the results simply extract combined LoRA for future use!  
diff --git a/modules/postprocess/yolo.py b/modules/postprocess/yolo.py
index 7d8ea113a..2f0e12086 100644
--- a/modules/postprocess/yolo.py
+++ b/modules/postprocess/yolo.py
@@ -1,3 +1,4 @@
+from typing import TYPE_CHECKING
 import os
 import numpy as np
 import gradio as gr
@@ -7,6 +8,7 @@
 
 
 PREDEFINED = [ # <https://huggingface.co/vladmandic/yolo-detailers/tree/main>
+    'https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt',
     'https://huggingface.co/vladmandic/yolo-detailers/resolve/main/face-yolo8n.pt',
     'https://huggingface.co/vladmandic/yolo-detailers/resolve/main/hand_yolov8n.pt',
     'https://huggingface.co/vladmandic/yolo-detailers/resolve/main/person_yolov8n-seg.pt',
@@ -16,7 +18,9 @@
 
 
 class YoloResult:
-    def __init__(self, score: float, box: list[int], mask: Image.Image = None, item: Image.Image = None, size: float = 0, width = 0, height = 0, args = {}):
+    def __init__(self, cls: int, label: str, score: float, box: list[int], mask: Image.Image = None, item: Image.Image = None, size: float = 0, width = 0, height = 0, args = {}):
+        self.cls = cls
+        self.label = label
         self.score = score
         self.box = box
         self.mask = mask
@@ -79,10 +83,12 @@ def predict(
         args = {
             'conf': shared.opts.detailer_conf,
             'iou': shared.opts.detailer_iou,
-            'max_det': shared.opts.detailer_max,
+            # 'max_det': shared.opts.detailer_max,
         }
         try:
-            model.to(device)
+            if TYPE_CHECKING:
+                from ultralytics import YOLO # pylint: disable=import-outside-toplevel, unused-import
+            model: YOLO = model.to(device)
             predictions = model.predict(
                 source=[image],
                 stream=False,
@@ -101,10 +107,19 @@ def predict(
             shared.log.error(f'Detailer predict: {e}')
             return result
 
+        desired = shared.opts.detailer_classes.split(',')
+        desired = [d.lower().strip() for d in desired]
+        desired = [d for d in desired if len(d) > 0]
+
         for prediction in predictions:
             boxes = prediction.boxes.xyxy.detach().int().cpu().numpy() if prediction.boxes is not None else []
             scores = prediction.boxes.conf.detach().float().cpu().numpy() if prediction.boxes is not None else []
-            for score, box in zip(scores, boxes):
+            classes = prediction.boxes.cls.detach().float().cpu().numpy() if prediction.boxes is not None else []
+            for score, box, cls in zip(scores, boxes, classes):
+                cls = int(cls)
+                label = prediction.names[cls] if cls < len(prediction.names) else f'cls{cls}'
+                if len(desired) > 0 and label.lower() not in desired:
+                    continue
                 box = box.tolist()
                 mask_image = None
                 w, h = box[2] - box[0], box[3] - box[1]
@@ -116,7 +131,9 @@ def predict(
                         draw = ImageDraw.Draw(mask_image)
                         draw.rectangle(box, fill="white", outline=None, width=0)
                         cropped = image.crop(box)
-                        result.append(YoloResult(score=round(score, 2), box=box, mask=mask_image, item=cropped, size=size, width=w, height=h, args=args))
+                        result.append(YoloResult(cls=cls, label=label, score=round(score, 2), box=box, mask=mask_image, item=cropped, size=size, width=w, height=h, args=args))
+                if len(result) >= shared.opts.detailer_max:
+                    break
         return result
 
     def load(self, model_name: str = None):
@@ -133,9 +150,10 @@ def load(self, model_name: str = None):
             try:
                 model_file = modelloader.load_file_from_url(url=model_url, model_dir=shared.opts.yolo_dir, file_name=file_name)
                 if model_file is not None:
-                    shared.log.info(f'Load: type=Detailer name="{model_name}" model="{model_file}"')
                     from ultralytics import YOLO # pylint: disable=import-outside-toplevel
                     model = YOLO(model_file)
+                    classes = list(model.names.values())
+                    shared.log.info(f'Load: type=Detailer name="{model_name}" model="{model_file}" classes={classes}')
                     self.models[model_name] = model
                     return model_name, model
             except Exception as e:
@@ -218,7 +236,7 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None):
             if p.steps < 1:
                 p.steps = orig_p.get('steps', 0)
 
-            report = [{'score': i.score, 'size': f'{i.width}x{i.height}' } for i in items]
+            report = [{'label': i.label, 'score': i.score, 'size': f'{i.width}x{i.height}' } for i in items]
             shared.log.info(f'Detailer: model="{name}" items={report} args={items[0].args} denoise={p.denoising_strength} blur={p.mask_blur} width={p.width} height={p.height} padding={p.inpaint_full_res_padding}')
             shared.log.debug(f'Detailer: prompt="{prompt}" negative="{negative}"')
             models_used.append(name)
@@ -265,8 +283,9 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None):
         return np_image
 
     def ui(self, tab: str):
-        def ui_settings_change(detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou):
+        def ui_settings_change(detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou):
             shared.opts.detailer_models = detailers
+            shared.opts.detailer_classes = classes
             shared.opts.detailer_strength = strength
             shared.opts.detailer_padding = padding
             shared.opts.detailer_blur = blur
@@ -276,7 +295,7 @@ def ui_settings_change(detailers, strength, padding, blur, min_confidence, max_d
             shared.opts.detailer_max_size = max_size
             shared.opts.detailer_iou = iou
             shared.opts.save(shared.config_filename, silent=True)
-            shared.log.debug(f'Detailer settings: models={shared.opts.detailer_models} strength={shared.opts.detailer_strength} conf={shared.opts.detailer_conf} max={shared.opts.detailer_max} iou={shared.opts.detailer_iou} size={shared.opts.detailer_min_size}-{shared.opts.detailer_max_size} padding={shared.opts.detailer_padding}')
+            shared.log.debug(f'Detailer settings: models={shared.opts.detailer_models} classes={shared.opts.detailer_classes} strength={shared.opts.detailer_strength} conf={shared.opts.detailer_conf} max={shared.opts.detailer_max} iou={shared.opts.detailer_iou} size={shared.opts.detailer_min_size}-{shared.opts.detailer_max_size} padding={shared.opts.detailer_padding}')
 
         with gr.Accordion(open=False, label="Detailer", elem_id=f"{tab}_detailer_accordion", elem_classes=["small-accordion"], visible=shared.native):
             with gr.Row():
@@ -284,6 +303,8 @@ def ui_settings_change(detailers, strength, padding, blur, min_confidence, max_d
             with gr.Row():
                 detailers = gr.Dropdown(label="Detailers", elem_id=f"{tab}_detailers", choices=self.list, value=shared.opts.detailer_models, multiselect=True)
                 ui_common.create_refresh_button(detailers, self.enumerate, {}, elem_id=f"{tab}_detailers_refresh")
+            with gr.Row():
+                classes = gr.Textbox(label="Classes", placeholder="Classes", elem_id=f"{tab}_detailer_classes")
             with gr.Row():
                 strength = gr.Slider(label="Detailer strength", elem_id=f"{tab}_detailer_strength", value=shared.opts.detailer_strength, minimum=0, maximum=1, step=0.01)
                 max_detected = gr.Slider(label="Max detected", elem_id=f"{tab}_detailer_max", value=shared.opts.detailer_max, min=1, maximum=10, step=1)
@@ -296,15 +317,16 @@ def ui_settings_change(detailers, strength, padding, blur, min_confidence, max_d
             with gr.Row():
                 min_size = gr.Slider(label="Min size", elem_id=f"{tab}_detailer_min_size", value=shared.opts.detailer_min_size, minimum=0, maximum=1024, step=1)
                 max_size = gr.Slider(label="Max size", elem_id=f"{tab}_detailer_max_size", value=shared.opts.detailer_max_size, minimum=0, maximum=1024, step=1)
-            detailers.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
-            strength.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
-            padding.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
-            blur.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
-            min_confidence.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
-            max_detected.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
-            min_size.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
-            max_size.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
-            iou.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            detailers.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            classes.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            strength.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            padding.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            blur.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            min_confidence.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            max_detected.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            min_size.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            max_size.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
+            iou.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
             return enabled
 
 
diff --git a/modules/shared.py b/modules/shared.py
index 69574ff92..99bf39a3d 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -807,6 +807,7 @@ def get_default_modes():
 
     # "postprocessing_sep_detailer": OptionInfo("<h2>Detailer</h2>", "", gr.HTML),
     "detailer_model": OptionInfo("Detailer", "Detailer model", gr.Radio, lambda: {"choices": [x.name() for x in detailers], "visible": False}),
+    "detailer_classes": OptionInfo("", "Detailer classes", gr.Textbox, { "visible": False}),
     "detailer_conf": OptionInfo(0.6, "Min confidence", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05, "visible": False}),
     "detailer_max": OptionInfo(2, "Max detected", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1, "visible": False}),
     "detailer_iou": OptionInfo(0.5, "Max overlap", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.05, "visible": False}),