Bugfix: Workaround for Gradio Slider Change Bug

Bugfix: CSS Styling to fix Gradio Image Height Bug Face swapping mask offsets resolution independant Show offset mask as overlay Changed layout for masking
C0untFloyd · Mar 20, 2024 · 39fed86 · 39fed86
1 parent 64e2275
commit 39fed86
Show file tree

Hide file tree

Showing 9 changed files with 71 additions and 65 deletions.
diff --git a/README.md b/README.md
@@ -58,6 +58,16 @@ Additional commandline arguments are currently unsupported and settings should b
 
 
 ### Changelog
+
+**20.03.2024** v3.6.3
+
+- Bugfix: Workaround for Gradio Slider Change Bug
+- Bugfix: CSS Styling to fix Gradio Image Height Bug
+- Made face swapping mask offsets resolution independant
+- Show offset mask as overlay
+- Changed layout for masking
+
+
 **18.03.2024** v3.6.0
 
 - Updated to Gradio 4.21.0 - requiring many changes under the hood

diff --git a/installer/installer.py b/installer/installer.py
@@ -32,7 +32,7 @@ def install_dependencies():
     # Install Git and clone repo
     run_cmd("conda install -y -k git")
     run_cmd("git clone https://github.com/C0untFloyd/roop-unleashed.git")
-    run_cmd("git checkout 87943ad5413545db620921228bbcf73a6f9dab62")
+    run_cmd("git checkout 64e227539d70dc9b83953bd230fbd4d26d2759c7")
     # Install the webui dependencies
     update_dependencies()
 

diff --git a/roop/ProcessMgr.py b/roop/ProcessMgr.py
@@ -551,65 +551,52 @@ def simple_blend_with_mask(self, image1, image2, mask):
         return blended_image.astype(np.uint8)
 
 
-    # Paste back adapted from here
-    # https://github.com/fAIseh00d/refacer/blob/main/refacer.py
-    # which is revised insightface paste back code
-
     def paste_upscale(self, fake_face, upsk_face, M, target_img, scale_factor, mask_offsets):
         M_scale = M * scale_factor
         IM = cv2.invertAffineTransform(M_scale)
 
-        face_matte = np.full((target_img.shape[0],target_img.shape[1]), 255, dtype=np.uint8)
         ##Generate white square sized as a upsk_face
         img_matte = np.full((upsk_face.shape[0],upsk_face.shape[1]), 0, dtype=np.uint8)
 
-        top = mask_offsets[0]
-        bottom = target_img.shape[0] - mask_offsets[1]
-        left = mask_offsets[2]
-        right = target_img.shape[1] - mask_offsets[3]
+        w = img_matte.shape[1]
+        h = img_matte.shape[0]
+
+        top = int(mask_offsets[0] * h)
+        bottom = int(h - (mask_offsets[1] * h))
+        left = int(mask_offsets[2] * w)
+        right = int(w - (mask_offsets[3] * w))
         img_matte[top:bottom,left:right] = 255
 
         ##Transform white square back to target_img
         img_matte = cv2.warpAffine(img_matte, IM, (target_img.shape[1], target_img.shape[0]), flags=cv2.INTER_NEAREST, borderValue=0.0) 
-        ##Blacken the edges of face_matte by 1 pixels (so the mask in not expanded on the image edges)
         img_matte[:1,:] = img_matte[-1:,:] = img_matte[:,:1] = img_matte[:,-1:] = 0
 
-        #Detect the affine transformed white area
-        mask_h_inds, mask_w_inds = np.where(img_matte==255) 
-        #Calculate the size (and diagonal size) of transformed white area width and height boundaries
-        mask_h = np.max(mask_h_inds) - np.min(mask_h_inds) 
-        mask_w = np.max(mask_w_inds) - np.min(mask_w_inds)
-        mask_size = int(np.sqrt(mask_h*mask_w))
-        #Calculate the kernel size for eroding img_matte by kernel (insightface empirical guess for best size was max(mask_size//10,10))
-        # k = max(mask_size//12, 8)
-        k = max(mask_size//10, 10)
-        kernel = np.ones((k,k),np.uint8)
-        img_matte = cv2.erode(img_matte,kernel,iterations = 1)
-        #Calculate the kernel size for blurring img_matte by blur_size (insightface empirical guess for best size was max(mask_size//20, 5))
-        # k = max(mask_size//24, 4) 
-        k = max(mask_size//20, 5) 
+        # Normalize and blur img_matte as before
+        k = max(int(np.sqrt(np.max(np.where(img_matte==255)) - np.min(np.where(img_matte==255)))//10), 10)
         kernel_size = (k, k)
         blur_size = tuple(2*i+1 for i in kernel_size)
         img_matte = cv2.GaussianBlur(img_matte, blur_size, 0)
-
-        #Normalize images to float values and reshape
         img_matte = img_matte.astype(np.float32)/255
-        face_matte = face_matte.astype(np.float32)/255
-        img_matte = np.minimum(face_matte, img_matte)
+        if self.options.show_mask:
+            # Additional steps for green overlay
+            green_overlay = np.zeros_like(target_img)
+            green_color = [0, 255, 0]  # RGB for green
+            for i in range(3):  # Apply green color where img_matte is not zero
+                green_overlay[:, :, i] = np.where(img_matte > 0, green_color[i], 0)
         img_matte = np.reshape(img_matte, [img_matte.shape[0],img_matte.shape[1],1]) 
-        ##Transform upcaled face back to target_img
+
+        # Transform upsk_face and optionally blend with fake_face
         paste_face = cv2.warpAffine(upsk_face, IM, (target_img.shape[1], target_img.shape[0]), borderMode=cv2.BORDER_REPLICATE)
         if upsk_face is not fake_face:
             fake_face = cv2.warpAffine(fake_face, IM, (target_img.shape[1], target_img.shape[0]), borderMode=cv2.BORDER_REPLICATE)
             paste_face = cv2.addWeighted(paste_face, self.options.blend_ratio, fake_face, 1.0 - self.options.blend_ratio, 0)
 
-        ##Re-assemble image
-        paste_face = img_matte * paste_face
-        paste_face = paste_face + (1-img_matte) * target_img.astype(np.float32)
-        del img_matte
-        del face_matte
-        del upsk_face
-        del fake_face
+        # Re-assemble image
+        paste_face = img_matte * paste_face + (1-img_matte) * target_img.astype(np.float32)
+
+        if self.options.show_mask:
+            # Overlay the green overlay on the final image
+            paste_face = cv2.addWeighted(paste_face.astype(np.uint8), 1 - 0.5, green_overlay, 0.5, 0)
         return paste_face.astype(np.uint8)
 
 

diff --git a/roop/ProcessOptions.py b/roop/ProcessOptions.py
@@ -1,10 +1,11 @@
 class ProcessOptions:
 
-    def __init__(self,processors, face_distance,  blend_ratio, swap_mode, selected_index, masking_text, imagemask):
+    def __init__(self,processors, face_distance,  blend_ratio, swap_mode, selected_index, masking_text, imagemask, show_mask=False):
         self.processors = processors
         self.face_distance_threshold = face_distance
         self.blend_ratio = blend_ratio
         self.swap_mode = swap_mode
         self.selected_index = selected_index
         self.masking_text = masking_text
-        self.imagemask = imagemask
+        self.imagemask = imagemask
+        self.show_mask = show_mask
diff --git a/roop/core.py b/roop/core.py
@@ -178,7 +178,7 @@ def get_processing_plugins(use_clip):
     return processors
 
 
-def live_swap(frame, swap_mode, use_clip, clip_text, imagemask, selected_index = 0):
+def live_swap(frame, swap_mode, use_clip, clip_text, imagemask, show_mask, selected_index = 0):
     global process_mgr
 
     if frame is None:
@@ -190,7 +190,7 @@ def live_swap(frame, swap_mode, use_clip, clip_text, imagemask, selected_index =
     if len(roop.globals.INPUT_FACESETS) <= selected_index:
         selected_index = 0
     options = ProcessOptions(get_processing_plugins(use_clip), roop.globals.distance_threshold, roop.globals.blend_ratio,
-                              swap_mode, selected_index, clip_text,imagemask)
+                              swap_mode, selected_index, clip_text,imagemask, show_mask)
     process_mgr.initialize(roop.globals.INPUT_FACESETS, roop.globals.TARGET_FACES, options)
     newframe = process_mgr.process_frame(frame)
     if newframe is None:

diff --git a/roop/metadata.py b/roop/metadata.py
@@ -1,2 +1,2 @@
 name = 'roop unleashed'
-version = '3.6.0'
+version = '3.6.3'
diff --git a/roop/virtualcam.py b/roop/virtualcam.py
@@ -51,7 +51,7 @@ def virtualcamera(streamobs, cam_num,width,height):
             break
 
         if len(roop.globals.INPUT_FACESETS) > 0:
-            frame = live_swap(frame, "all", False, None, None)
+            frame = live_swap(frame, "all", False, None, None, False)
         #frame = fast_quantize_to_palette(frame)
         if cam:
             cam.send(frame)

diff --git a/ui/main.py b/ui/main.py
@@ -44,7 +44,9 @@ def run():
             max-height: 238.4px;
             overflow-y: auto !important;
         }
-"""
+        .image-container.svelte-1l6wqyv {height: 100%}
+
+    """
 
     while run_server:
         server_name = roop.globals.CFG.server_name

diff --git a/ui/tabs/faceswap_tab.py b/ui/tabs/faceswap_tab.py
@@ -45,11 +45,17 @@ def faceswap_tab():
                 with gr.Row():
                     with gr.Column(min_width=160):
                         input_faces = gr.Gallery(label="Input faces", allow_preview=False, preview=False, height=128, object_fit="scale-down", columns=8)
-                        with gr.Accordion(label="Advanced Settings", open=False):
-                            mask_top = gr.Slider(0, 256, value=0, label="Offset Face Top", step=1.0, interactive=True)
-                            mask_bottom = gr.Slider(0, 256, value=0, label="Offset Face Bottom", step=1.0, interactive=True)
-                            mask_left = gr.Slider(0, 256, value=0, label="Offset Face Left", step=1.0, interactive=True)
-                            mask_right = gr.Slider(0, 256, value=0, label="Offset Face Right", step=1.0, interactive=True)
+                        with gr.Accordion(label="Advanced Masking", open=False):
+                            chk_showmaskoffsets = gr.Checkbox(label="Show mask overlay in preview", value=False, interactive=True)
+                            mask_top = gr.Slider(0, 1.0, value=0, label="Offset Face Top", step=0.01, interactive=True)
+                            mask_bottom = gr.Slider(0, 1.0, value=0, label="Offset Face Bottom", step=0.01, interactive=True)
+                            mask_left = gr.Slider(0, 1.0, value=0, label="Offset Face Left", step=0.01, interactive=True)
+                            mask_right = gr.Slider(0, 1.0, value=0, label="Offset Face Right", step=0.01, interactive=True)
+                            bt_toggle_masking = gr.Button("Toggle manual masking", variant='secondary', size='sm')
+                            chk_useclip = gr.Checkbox(label="Use Text Masking", value=False)
+                            clip_text = gr.Textbox(label="List of objects to mask and restore back on fake image", value="cup,hands,hair,banana" ,elem_id='tooltip')
+                            gr.Dropdown(["Clip2Seg"], value="Clip2Seg", label="Engine")
+                            bt_preview_mask = gr.Button("👥 Show Mask Preview", variant='secondary')
                         bt_remove_selected_input_face = gr.Button("❌ Remove selected", size='sm')
                         bt_clear_input_faces = gr.Button("💥 Clear all", variant='stop', size='sm')
                     with gr.Column(min_width=160):
@@ -102,14 +108,6 @@ def faceswap_tab():
                     roop.globals.skip_audio = gr.Checkbox(label="Skip audio", value=False)
                     roop.globals.keep_frames = gr.Checkbox(label="Keep Frames (relevant only when extracting frames)", value=False)
                     roop.globals.wait_after_extraction = gr.Checkbox(label="Wait for user key press before creating video ", value=False)
-            with gr.Column(scale=1):
-                with gr.Tab("Text masking"):
-                    chk_useclip = gr.Checkbox(label="Use Text Masking", value=False)
-                    clip_text = gr.Textbox(label="List of objects to mask and restore back on fake image", value="cup,hands,hair,banana" ,elem_id='tooltip')
-                    gr.Dropdown(["Clip2Seg"], value="Clip2Seg", label="Engine")
-                    bt_preview_mask = gr.Button("👥 Show Mask Preview", variant='secondary')
-                with gr.Tab("Manual masking"):
-                    bt_toggle_masking = gr.Button("Toggle manual masking", variant='secondary', size='sm')
         with gr.Row(variant='panel'):
             with gr.Column():
                 bt_start = gr.Button("▶ Start", variant='primary')
@@ -126,16 +124,16 @@ def faceswap_tab():
                 resultvideo = gr.Video(label='Final Video', interactive=False, visible=False)
 
     previewinputs = [preview_frame_num, bt_destfiles, fake_preview, ui.globals.ui_selected_enhancer, selected_face_detection,
-                        max_face_distance, ui.globals.ui_blend_ratio, chk_useclip, clip_text, no_face_action, vr_mode, autorotate, maskimage]
+                        max_face_distance, ui.globals.ui_blend_ratio, chk_useclip, clip_text, no_face_action, vr_mode, autorotate, maskimage, chk_showmaskoffsets]
     previewoutputs = [previewimage, maskimage, preview_frame_num] 
     input_faces.select(on_select_input_face, None, None).then(fn=on_preview_frame_changed, inputs=previewinputs, outputs=previewoutputs)
     bt_remove_selected_input_face.click(fn=remove_selected_input_face, outputs=[input_faces])
     bt_srcfiles.change(fn=on_srcfile_changed, show_progress='full', inputs=bt_srcfiles, outputs=[dynamic_face_selection, face_selection, input_faces])
 
-    mask_top.input(fn=on_mask_top_changed, inputs=[mask_top], show_progress='hidden')
-    mask_bottom.input(fn=on_mask_bottom_changed, inputs=[mask_bottom], show_progress='hidden')
-    mask_left.input(fn=on_mask_left_changed, inputs=[mask_left], show_progress='hidden')
-    mask_right.input(fn=on_mask_right_changed, inputs=[mask_right], show_progress='hidden')
+    mask_top.release(fn=on_mask_top_changed, inputs=[mask_top], show_progress='hidden')
+    mask_bottom.release(fn=on_mask_bottom_changed, inputs=[mask_bottom], show_progress='hidden')
+    mask_left.release(fn=on_mask_left_changed, inputs=[mask_left], show_progress='hidden')
+    mask_right.release(fn=on_mask_right_changed, inputs=[mask_right], show_progress='hidden')
 
 
     target_faces.select(on_select_target_face, None, None)
@@ -192,7 +190,15 @@ def set_mask_offset(index, mask_offset):
     global SELECTED_INPUT_FACE_INDEX
 
     if len(roop.globals.INPUT_FACESETS) > SELECTED_INPUT_FACE_INDEX:
-        roop.globals.INPUT_FACESETS[SELECTED_INPUT_FACE_INDEX].faces[0].mask_offsets[index] = mask_offset
+        offs = roop.globals.INPUT_FACESETS[SELECTED_INPUT_FACE_INDEX].faces[0].mask_offsets
+        offs[index] = mask_offset
+        if offs[0] + offs[1] > 0.99:
+            offs[0] = 0.99
+            offs[1] = 0.0
+        if offs[2] + offs[3] > 0.99:
+            offs[2] = 0.99
+            offs[3] = 0.0
+        roop.globals.INPUT_FACESETS[SELECTED_INPUT_FACE_INDEX].faces[0].mask_offsets = offs
 
 
 
@@ -373,7 +379,7 @@ def on_end_face_selection():
 
 
 def on_preview_frame_changed(frame_num, files, fake_preview, enhancer, detection, face_distance, blend_ratio,
-                              use_clip, clip_text, no_face_action, vr_mode, auto_rotate, maskimage):
+                              use_clip, clip_text, no_face_action, vr_mode, auto_rotate, maskimage, show_mask):
     global SELECTED_INPUT_FACE_INDEX, manual_masking, current_video_fps
 
     from roop.core import live_swap
@@ -427,7 +433,7 @@ def on_preview_frame_changed(frame_num, files, fake_preview, enhancer, detection
 
     roop.globals.execution_threads = roop.globals.CFG.max_threads
     mask = layers[0] if layers is not None else None
-    current_frame = live_swap(current_frame, roop.globals.face_swap_mode, use_clip, clip_text, maskimage, SELECTED_INPUT_FACE_INDEX)
+    current_frame = live_swap(current_frame, roop.globals.face_swap_mode, use_clip, clip_text, maskimage, show_mask, SELECTED_INPUT_FACE_INDEX)
     if current_frame is None:
         return gr.Image(visible=True), None, gr.Slider(info=timeinfo)
     return gr.Image(value=util.convert_to_gradio(current_frame), visible=True), gr.ImageEditor(visible=False), gr.Slider(info=timeinfo)