Skip to content

Commit

Permalink
add detailer multi-class support
Browse files Browse the repository at this point in the history
Signed-off-by: Vladimir Mandic <mandic00@live.com>
  • Loading branch information
vladmandic committed Oct 21, 2024
1 parent 77f7ce6 commit 56ec09f
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 21 deletions.
10 changes: 7 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Change Log for SD.Next

## Update for 2024-10-20
## Update for 2024-10-21

### Highlights for 2024-10-20
### Highlights for 2024-10-21

#### Workflow highlights

Expand Down Expand Up @@ -42,7 +42,7 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
[README](https://github.com/vladmandic/automatic/blob/master/README.md) | [CHANGELOG](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)


### Details for 2024-10-20
### Details for 2024-10-21

- **reprocess**
- new top-level button: reprocess latent from your history of generated image(s)
Expand Down Expand Up @@ -89,6 +89,10 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
- image metadata includes info on used detailer models
- *note* detailer defaults are not save in ui settings, they are saved in server settings
to apply your defaults, set ui values and apply via *system -> settings -> apply settings*
- if using models trained on multiple classes, you can specify which classes you want to detail
e.g. original yolo detection model is trained on coco dataset with 80 predefined classes
if you leave field blank, it will use any class found in the model
you can see classes defined in the model while model itself is loaded for the first time

- **extract lora**: extract combined lora from current memory state, thanks @AI-Casanova
load any LoRA(s) and play with generate as usual and once you like the results simply extract combined LoRA for future use!
Expand Down
58 changes: 40 additions & 18 deletions modules/postprocess/yolo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import TYPE_CHECKING
import os
import numpy as np
import gradio as gr
Expand All @@ -7,6 +8,7 @@


PREDEFINED = [ # <https://huggingface.co/vladmandic/yolo-detailers/tree/main>
'https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt',
'https://huggingface.co/vladmandic/yolo-detailers/resolve/main/face-yolo8n.pt',
'https://huggingface.co/vladmandic/yolo-detailers/resolve/main/hand_yolov8n.pt',
'https://huggingface.co/vladmandic/yolo-detailers/resolve/main/person_yolov8n-seg.pt',
Expand All @@ -16,7 +18,9 @@


class YoloResult:
def __init__(self, score: float, box: list[int], mask: Image.Image = None, item: Image.Image = None, size: float = 0, width = 0, height = 0, args = {}):
def __init__(self, cls: int, label: str, score: float, box: list[int], mask: Image.Image = None, item: Image.Image = None, size: float = 0, width = 0, height = 0, args = {}):
self.cls = cls
self.label = label
self.score = score
self.box = box
self.mask = mask
Expand Down Expand Up @@ -79,10 +83,12 @@ def predict(
args = {
'conf': shared.opts.detailer_conf,
'iou': shared.opts.detailer_iou,
'max_det': shared.opts.detailer_max,
# 'max_det': shared.opts.detailer_max,
}
try:
model.to(device)
if TYPE_CHECKING:
from ultralytics import YOLO # pylint: disable=import-outside-toplevel, unused-import
model: YOLO = model.to(device)
predictions = model.predict(
source=[image],
stream=False,
Expand All @@ -101,10 +107,19 @@ def predict(
shared.log.error(f'Detailer predict: {e}')
return result

desired = shared.opts.detailer_classes.split(',')
desired = [d.lower().strip() for d in desired]
desired = [d for d in desired if len(d) > 0]

for prediction in predictions:
boxes = prediction.boxes.xyxy.detach().int().cpu().numpy() if prediction.boxes is not None else []
scores = prediction.boxes.conf.detach().float().cpu().numpy() if prediction.boxes is not None else []
for score, box in zip(scores, boxes):
classes = prediction.boxes.cls.detach().float().cpu().numpy() if prediction.boxes is not None else []
for score, box, cls in zip(scores, boxes, classes):
cls = int(cls)
label = prediction.names[cls] if cls < len(prediction.names) else f'cls{cls}'
if len(desired) > 0 and label.lower() not in desired:
continue
box = box.tolist()
mask_image = None
w, h = box[2] - box[0], box[3] - box[1]
Expand All @@ -116,7 +131,9 @@ def predict(
draw = ImageDraw.Draw(mask_image)
draw.rectangle(box, fill="white", outline=None, width=0)
cropped = image.crop(box)
result.append(YoloResult(score=round(score, 2), box=box, mask=mask_image, item=cropped, size=size, width=w, height=h, args=args))
result.append(YoloResult(cls=cls, label=label, score=round(score, 2), box=box, mask=mask_image, item=cropped, size=size, width=w, height=h, args=args))
if len(result) >= shared.opts.detailer_max:
break
return result

def load(self, model_name: str = None):
Expand All @@ -133,9 +150,10 @@ def load(self, model_name: str = None):
try:
model_file = modelloader.load_file_from_url(url=model_url, model_dir=shared.opts.yolo_dir, file_name=file_name)
if model_file is not None:
shared.log.info(f'Load: type=Detailer name="{model_name}" model="{model_file}"')
from ultralytics import YOLO # pylint: disable=import-outside-toplevel
model = YOLO(model_file)
classes = list(model.names.values())
shared.log.info(f'Load: type=Detailer name="{model_name}" model="{model_file}" classes={classes}')
self.models[model_name] = model
return model_name, model
except Exception as e:
Expand Down Expand Up @@ -218,7 +236,7 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None):
if p.steps < 1:
p.steps = orig_p.get('steps', 0)

report = [{'score': i.score, 'size': f'{i.width}x{i.height}' } for i in items]
report = [{'label': i.label, 'score': i.score, 'size': f'{i.width}x{i.height}' } for i in items]
shared.log.info(f'Detailer: model="{name}" items={report} args={items[0].args} denoise={p.denoising_strength} blur={p.mask_blur} width={p.width} height={p.height} padding={p.inpaint_full_res_padding}')
shared.log.debug(f'Detailer: prompt="{prompt}" negative="{negative}"')
models_used.append(name)
Expand Down Expand Up @@ -265,8 +283,9 @@ def restore(self, np_image, p: processing.StableDiffusionProcessing = None):
return np_image

def ui(self, tab: str):
def ui_settings_change(detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou):
def ui_settings_change(detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou):
shared.opts.detailer_models = detailers
shared.opts.detailer_classes = classes
shared.opts.detailer_strength = strength
shared.opts.detailer_padding = padding
shared.opts.detailer_blur = blur
Expand All @@ -276,14 +295,16 @@ def ui_settings_change(detailers, strength, padding, blur, min_confidence, max_d
shared.opts.detailer_max_size = max_size
shared.opts.detailer_iou = iou
shared.opts.save(shared.config_filename, silent=True)
shared.log.debug(f'Detailer settings: models={shared.opts.detailer_models} strength={shared.opts.detailer_strength} conf={shared.opts.detailer_conf} max={shared.opts.detailer_max} iou={shared.opts.detailer_iou} size={shared.opts.detailer_min_size}-{shared.opts.detailer_max_size} padding={shared.opts.detailer_padding}')
shared.log.debug(f'Detailer settings: models={shared.opts.detailer_models} classes={shared.opts.detailer_classes} strength={shared.opts.detailer_strength} conf={shared.opts.detailer_conf} max={shared.opts.detailer_max} iou={shared.opts.detailer_iou} size={shared.opts.detailer_min_size}-{shared.opts.detailer_max_size} padding={shared.opts.detailer_padding}')

with gr.Accordion(open=False, label="Detailer", elem_id=f"{tab}_detailer_accordion", elem_classes=["small-accordion"], visible=shared.native):
with gr.Row():
enabled = gr.Checkbox(label="Enable detailer pass", elem_id=f"{tab}_detailer_enabled", value=False)
with gr.Row():
detailers = gr.Dropdown(label="Detailers", elem_id=f"{tab}_detailers", choices=self.list, value=shared.opts.detailer_models, multiselect=True)
ui_common.create_refresh_button(detailers, self.enumerate, {}, elem_id=f"{tab}_detailers_refresh")
with gr.Row():
classes = gr.Textbox(label="Classes", placeholder="Classes", elem_id=f"{tab}_detailer_classes")
with gr.Row():
strength = gr.Slider(label="Detailer strength", elem_id=f"{tab}_detailer_strength", value=shared.opts.detailer_strength, minimum=0, maximum=1, step=0.01)
max_detected = gr.Slider(label="Max detected", elem_id=f"{tab}_detailer_max", value=shared.opts.detailer_max, min=1, maximum=10, step=1)
Expand All @@ -296,15 +317,16 @@ def ui_settings_change(detailers, strength, padding, blur, min_confidence, max_d
with gr.Row():
min_size = gr.Slider(label="Min size", elem_id=f"{tab}_detailer_min_size", value=shared.opts.detailer_min_size, minimum=0, maximum=1024, step=1)
max_size = gr.Slider(label="Max size", elem_id=f"{tab}_detailer_max_size", value=shared.opts.detailer_max_size, minimum=0, maximum=1024, step=1)
detailers.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
strength.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
padding.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
blur.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
min_confidence.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
max_detected.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
min_size.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
max_size.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
iou.change(fn=ui_settings_change, inputs=[detailers, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
detailers.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
classes.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
strength.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
padding.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
blur.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
min_confidence.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
max_detected.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
min_size.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
max_size.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
iou.change(fn=ui_settings_change, inputs=[detailers, classes, strength, padding, blur, min_confidence, max_detected, min_size, max_size, iou], outputs=[])
return enabled


Expand Down
1 change: 1 addition & 0 deletions modules/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,7 @@ def get_default_modes():

# "postprocessing_sep_detailer": OptionInfo("<h2>Detailer</h2>", "", gr.HTML),
"detailer_model": OptionInfo("Detailer", "Detailer model", gr.Radio, lambda: {"choices": [x.name() for x in detailers], "visible": False}),
"detailer_classes": OptionInfo("", "Detailer classes", gr.Textbox, { "visible": False}),
"detailer_conf": OptionInfo(0.6, "Min confidence", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05, "visible": False}),
"detailer_max": OptionInfo(2, "Max detected", gr.Slider, {"minimum": 1, "maximum": 10, "step": 1, "visible": False}),
"detailer_iou": OptionInfo(0.5, "Max overlap", gr.Slider, {"minimum": 0, "maximum": 1.0, "step": 0.05, "visible": False}),
Expand Down

0 comments on commit 56ec09f

Please sign in to comment.