diff --git a/README.md b/README.md index 17d244b65..3879ca13e 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,15 @@ Additional commandline arguments are currently unsupported and settings should b ### Changelog +**08.01.2024** v3.5.0 + +- Bugfix: wrong access options when creating folders +- New auto rotation of horizontal faces, fixing bad landmark positions (expanded on ![PR 364](https://github.com/C0untFloyd/roop-unleashed/pull/364)) +- Simple VR Option for stereo Images/Movies, best used in selected face mode +- Added RestoreFormer Enhancer - https://github.com/wzhouxiff/RestoreFormer +- Bumped up package versions for onnx/Torch etc. + + **16.10.2023** v3.3.4 **11.8.2023** v2.7.0 diff --git a/requirements.txt b/requirements.txt index f182bbeef..3f78996c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,19 @@ --extra-index-url https://download.pytorch.org/whl/cu118 -numpy==1.24.2 -gradio==3.44.2 -opencv-python==4.8.0.76 -onnx==1.14.1 +numpy==1.26.2 +gradio==3.50.2 +opencv-python==4.8.1.78 +onnx==1.15.0 insightface==0.7.3 -psutil==5.9.5 -pillow==10.0.1 -torch==2.0.1+cu118; sys_platform != 'darwin' -torch==2.0.1; sys_platform == 'darwin' -torchvision==0.15.2+cu118; sys_platform != 'darwin' -torchvision==0.15.2; sys_platform == 'darwin' -onnxruntime==1.16.0; sys_platform == 'darwin' and platform_machine != 'arm64' +psutil==5.9.6 +torch==2.1.2+cu118; sys_platform != 'darwin' +torch==2.1.2; sys_platform == 'darwin' +torchvision==0.16.2+cu118; sys_platform != 'darwin' +torchvision==0.16.2; sys_platform == 'darwin' +onnxruntime==1.16.3; sys_platform == 'darwin' and platform_machine != 'arm64' onnxruntime-silicon==1.13.1; sys_platform == 'darwin' and platform_machine == 'arm64' -onnxruntime-gpu==1.16.1; sys_platform != 'darwin' -protobuf==4.23.2 +onnxruntime-gpu==1.16.3; sys_platform != 'darwin' tqdm==4.66.1 ftfy regex -pyvirtualcam -imutils==0.5.4 +pyvirtualcam \ No newline at end of file diff --git a/roop/ProcessMgr.py b/roop/ProcessMgr.py index b03d6b695..becc40b62 100644 --- a/roop/ProcessMgr.py +++ b/roop/ProcessMgr.py @@ -5,11 +5,12 @@ from roop.ProcessOptions import ProcessOptions -from roop.face_util import get_first_face, get_all_faces, rotate_image_180, rotate_image_90, rotate_anticlockwise, rotate_clockwise +from roop.face_util import get_first_face, get_all_faces, rotate_image_180, rotate_anticlockwise, rotate_clockwise, clamp_cut_values from roop.utilities import compute_cosine_distance, get_device, str_to_class +import roop.vr_util as vr from typing import Any, List, Callable -from roop.typing import Frame +from roop.typing import Frame, Face from concurrent.futures import ThreadPoolExecutor, as_completed from threading import Thread, Lock from queue import Queue @@ -59,12 +60,13 @@ class ProcessMgr(): plugins = { - 'faceswap' : 'FaceSwapInsightFace', - 'mask_clip2seg' : 'Mask_Clip2Seg', - 'codeformer' : 'Enhance_CodeFormer', - 'gfpgan' : 'Enhance_GFPGAN', - 'dmdnet' : 'Enhance_DMDNet', - 'gpen' : 'Enhance_GPEN', + 'faceswap' : 'FaceSwapInsightFace', + 'mask_clip2seg' : 'Mask_Clip2Seg', + 'codeformer' : 'Enhance_CodeFormer', + 'gfpgan' : 'Enhance_GFPGAN', + 'dmdnet' : 'Enhance_DMDNet', + 'gpen' : 'Enhance_GPEN', + 'restoreformer' : 'Enhance_RestoreFormer', } def __init__(self, progress): @@ -310,40 +312,6 @@ def swap_faces(self, frame, temp_frame): faces = get_all_faces(frame) if faces is None: return num_faces_found, frame - - if self.options.swap_mode == "single_face_frames_only": - if len(faces) == 1: - num_faces_found += 1 - target_face = faces[0] - - temp_frame = self.process_face(self.options.selected_index, target_face, temp_frame) - - input_face = self.input_face_datas[self.options.selected_index].faces[0] - rotation_action = self.rotation_action(target_face, frame) - swapped_face = None - optimal_frame = temp_frame.copy() - - # before we try and get the swapped face again, we need to make sure we're getting it from the most optimal version of the frame - # otherwise it sometimes doesn't detect it, so if it needs to be rotated, then do that first. - if rotation_action == "rotate_clockwise": - optimal_frame = rotate_clockwise(optimal_frame) - elif rotation_action == "rotate_anticlockwise": - optimal_frame = rotate_anticlockwise(optimal_frame) - - swapped_face = get_first_face(optimal_frame) - - if swapped_face is None: - num_faces_found = 0 - return num_faces_found, frame - else: - # check if the face matches closely the face we intended to swap it too - # if it doesn't, it's probably insightface failing and returning some garbled mess, so skip it - cosine_distance = compute_cosine_distance(swapped_face.embedding, input_face.embedding) - if cosine_distance >= self.options.face_distance_threshold: - num_faces_found = 0 - return num_faces_found, frame - else: - return num_faces_found, frame if self.options.swap_mode == "all": for face in faces: @@ -358,7 +326,8 @@ def swap_faces(self, frame, temp_frame): if i < len(self.input_face_datas): temp_frame = self.process_face(i, face, temp_frame) num_faces_found += 1 - break + if not roop.globals.vr_mode: + break del face elif self.options.swap_mode == "all_female" or self.options.swap_mode == "all_male": gender = 'F' if self.options.swap_mode == "all_female" else 'M' @@ -368,6 +337,10 @@ def swap_faces(self, frame, temp_frame): temp_frame = self.process_face(self.options.selected_index, face, temp_frame) del face + if roop.globals.vr_mode and num_faces_found % 2 > 0: + # stereo image, there has to be an even number of faces + num_faces_found = 0 + return num_faces_found, frame if num_faces_found == 0: return num_faces_found, frame @@ -377,7 +350,7 @@ def swap_faces(self, frame, temp_frame): return num_faces_found, temp_frame - def rotation_action(self, original_face, frame:Frame): + def rotation_action(self, original_face:Face, frame:Frame): (height, width) = frame.shape[:2] bounding_box_width = original_face.bbox[2] - original_face.bbox[0] @@ -412,7 +385,7 @@ def rotation_action(self, original_face, frame:Frame): # this is someone lying down with their face in the left hand side of the frame return "rotate_clockwise" - return "noop" + return None def auto_rotate_frame(self, original_face, frame:Frame): @@ -423,23 +396,13 @@ def auto_rotate_frame(self, original_face, frame:Frame): if rotation_action == "rotate_anticlockwise": #face is horizontal, rotating frame anti-clockwise and getting face bounding box from rotated frame - rotated_bbox = self.rotate_bbox_anticlockwise(original_face.bbox, frame) frame = rotate_anticlockwise(frame) - target_face = self.get_rotated_target_face(rotated_bbox, frame) elif rotation_action == "rotate_clockwise": #face is horizontal, rotating frame clockwise and getting face bounding box from rotated frame - rotated_bbox = self.rotate_bbox_clockwise(original_face.bbox, frame) frame = rotate_clockwise(frame) - target_face = self.get_rotated_target_face(rotated_bbox, frame) - - if target_face is None: - #no face was detected in the rotated frame, so use the original frame and face - target_face = original_face - frame = original_frame - rotation_action = "noop" return target_face, frame, rotation_action - + def auto_unrotate_frame(self, frame:Frame, rotation_action): if rotation_action == "rotate_anticlockwise": @@ -450,90 +413,56 @@ def auto_unrotate_frame(self, frame:Frame, rotation_action): return frame - def get_rotated_target_face(self, rotated_bbox, rotated_frame:Frame): - rotated_faces = get_all_faces(rotated_frame) - - if not rotated_faces: - return None - - rotated_target_face = rotated_faces[0] - best_iou = 0 - - for rotated_face in rotated_faces: - iou = self.intersection_over_union(rotated_bbox, rotated_face.bbox) - if iou > best_iou: - rotated_target_face = rotated_face - best_iou = iou - - return rotated_target_face + def process_face(self,face_index, target_face:Face, frame:Frame): + enhanced_frame = None + inputface = self.input_face_datas[face_index].faces[0] - def rotate_bbox_clockwise(self, bbox, frame:Frame): - (height, width) = frame.shape[:2] + rotation_action = None + if roop.globals.autorotate_faces: + # check for sideways rotation of face + rotation_action = self.rotation_action(target_face, frame) + if rotation_action is not None: + (startX, startY, endX, endY) = target_face["bbox"].astype("int") + width = endX - startX + height = endY - startY + offs = int(max(width,height) * 0.25) + rotcutframe,startX, startY, endX, endY = self.cutout(frame, startX - offs, startY - offs, endX + offs, endY + offs) + if rotation_action == "rotate_anticlockwise": + rotcutframe = rotate_anticlockwise(rotcutframe) + elif rotation_action == "rotate_clockwise": + rotcutframe = rotate_clockwise(rotcutframe) + # rotate image and re-detect face to correct wonky landmarks + rotface = get_first_face(rotcutframe) + if rotface is None: + rotation_action = None + else: + saved_frame = frame.copy() + frame = rotcutframe + target_face = rotface - start_x = bbox[0] - start_y = bbox[1] - end_x = bbox[2] - end_y = bbox[3] - #bottom left corner becomes top left corner - #top right corner becomes bottom right corner - rotated_start_x = height - end_y - rotated_start_y = start_x - rotated_end_x = height - start_y - rotated_end_y = end_x + # if roop.globals.vr_mode: + # bbox = target_face.bbox + # [orig_width, orig_height, _] = frame.shape - return [rotated_start_x, rotated_start_y, rotated_end_x, rotated_end_y] + # # Convert bounding box to ints + # x1, y1, x2, y2 = map(int, bbox) + # # Determine the center of the bounding box + # x_center = (x1 + x2) / 2 + # y_center = (y1 + y2) / 2 - def rotate_bbox_anticlockwise(self, bbox, frame:Frame): - - (height, width) = frame.shape[:2] + # # Normalize coordinates to range [-1, 1] + # x_center_normalized = x_center / (orig_width / 2) - 1 + # y_center_normalized = y_center / (orig_width / 2) - 1 - start_x = bbox[0] - start_y = bbox[1] - end_x = bbox[2] - end_y = bbox[3] - - # So the algorithm is - # - top right corner translates to top left corner which gives start_x, start_y and is calculated as follows: (start_y, width - end_x) - # - bottom left corner translates to bottom right corner giving end_x, end_y and is calculated as follows: (end_y, width - start_x) - - rotated_start_x = start_y - rotated_start_y = width - end_x - rotated_end_x = end_y - rotated_end_y = width - start_x - - return [rotated_start_x, rotated_start_y, rotated_end_x, rotated_end_y] - - - def intersection_over_union(self,boxA, boxB): - # https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/ - # determine the (x, y)-coordinates of the intersection rectangle - xA = max(boxA[0], boxB[0]) - yA = max(boxA[1], boxB[1]) - xB = min(boxA[2], boxB[2]) - yB = min(boxA[3], boxB[3]) - # compute the area of intersection rectangle - interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) - # compute the area of both the prediction and ground-truth - # rectangles - boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) - boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) - # compute the intersection over union by taking the intersection - # area and dividing it by the sum of prediction + ground-truth - # areas - the interesection area - iou = interArea / float(boxAArea + boxBArea - interArea) - # return the intersection over union value - return iou - - - def process_face(self,face_index, target_face, frame:Frame): - target_face, frame, rotation_action = self.auto_rotate_frame(target_face, frame) + # # Convert normalized coordinates to spherical (theta, phi) + # theta = x_center_normalized * 180 # Theta ranges from -180 to 180 degrees + # phi = -y_center_normalized * 90 # Phi ranges from -90 to 90 degrees - enhanced_frame = None - inputface = self.input_face_datas[face_index].faces[0] + # img = vr.GetPerspective(frame, 90, theta, phi, 1280, 1280) # Generate perspective image for p in self.processors: if p.type == 'swap': @@ -546,6 +475,7 @@ def process_face(self,face_index, target_face, frame:Frame): upscale = 512 orig_width = fake_frame.shape[1] + fake_frame = cv2.resize(fake_frame, (upscale, upscale), cv2.INTER_CUBIC) mask_offsets = inputface.mask_offsets @@ -555,7 +485,11 @@ def process_face(self,face_index, target_face, frame:Frame): else: result = self.paste_upscale(fake_frame, enhanced_frame, target_face.matrix, frame, scale_factor, mask_offsets) - return self.auto_unrotate_frame(result, rotation_action) + if rotation_action is not None: + fake_frame = self.auto_unrotate_frame(result, rotation_action) + return self.paste_simple(fake_frame, saved_frame, startX, startY) + + return result @@ -571,6 +505,13 @@ def cutout(self, frame:Frame, start_x, start_y, end_x, end_y): end_y = frame.shape[0] return frame[start_y:end_y, start_x:end_x], start_x, start_y, end_x, end_y + def paste_simple(self, src:Frame, dest:Frame, start_x, start_y): + end_x = start_x + src.shape[1] + end_y = start_y + src.shape[0] + + start_x, end_x, start_y, end_y = clamp_cut_values(start_x, end_x, start_y, end_y, dest) + dest[start_y:end_y, start_x:end_x] = src + return dest # Paste back adapted from here diff --git a/roop/core.py b/roop/core.py index c475d7a1f..a89061415 100755 --- a/roop/core.py +++ b/roop/core.py @@ -121,7 +121,7 @@ def pre_check() -> bool: util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/GFPGANv1.4.onnx']) util.conditional_download(download_directory_path, ['https://github.com/csxmli2016/DMDNet/releases/download/v1/DMDNet.pth']) util.conditional_download(download_directory_path, ['https://github.com/facefusion/facefusion-assets/releases/download/models/GPEN-BFR-512.onnx']) - + util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/restoreformer.onnx']) download_directory_path = util.resolve_relative_path('../models/CLIP') util.conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/rd64-uni-refined.pth']) download_directory_path = util.resolve_relative_path('../models/CodeFormer') @@ -173,6 +173,8 @@ def get_processing_plugins(use_clip): processors += ",dmdnet" elif roop.globals.selected_enhancer == 'GPEN': processors += ",gpen" + elif roop.globals.selected_enhancer == 'Restoreformer': + processors += ",restoreformer" return processors diff --git a/roop/face_util.py b/roop/face_util.py index 74c52be1d..9914ff737 100644 --- a/roop/face_util.py +++ b/roop/face_util.py @@ -5,7 +5,6 @@ import roop.globals from roop.typing import Frame, Face -import imutils import cv2 import numpy as np from skimage import transform as trans @@ -24,11 +23,18 @@ def get_face_analyser() -> Any: with THREAD_LOCK_ANALYSER: if FACE_ANALYSER is None: if roop.globals.CFG.force_cpu: - print('Forcing CPU for Face Analysis') - FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=['CPUExecutionProvider']) + print("Forcing CPU for Face Analysis") + FACE_ANALYSER = insightface.app.FaceAnalysis( + name="buffalo_l", providers=["CPUExecutionProvider"] + ) else: - FACE_ANALYSER = insightface.app.FaceAnalysis(name='buffalo_l', providers=roop.globals.execution_providers) - FACE_ANALYSER.prepare(ctx_id=0, det_size=(640, 640) if roop.globals.default_det_size else (320,320)) + FACE_ANALYSER = insightface.app.FaceAnalysis( + name="buffalo_l", providers=roop.globals.execution_providers + ) + FACE_ANALYSER.prepare( + ctx_id=0, + det_size=(640, 640) if roop.globals.default_det_size else (320, 320), + ) return FACE_ANALYSER @@ -44,7 +50,7 @@ def get_first_face(frame: Frame) -> Any: def get_all_faces(frame: Frame) -> Any: try: faces = get_face_analyser().get(frame) - return sorted(faces, key = lambda x : x.bbox[0]) + return sorted(faces, key=lambda x: x.bbox[0]) except: return None @@ -52,7 +58,7 @@ def get_all_faces(frame: Frame) -> Any: def extract_face_images(source_filename, video_info, extra_padding=-1.0): face_data = [] source_image = None - + if video_info[0]: frame = get_video_frame(source_filename, video_info[1]) if frame is not None: @@ -61,23 +67,23 @@ def extract_face_images(source_filename, video_info, extra_padding=-1.0): return face_data else: source_image = cv2.imread(source_filename) - + faces = get_all_faces(source_image) if faces is None: return face_data i = 0 for face in faces: - (startX, startY, endX, endY) = face['bbox'].astype("int") + (startX, startY, endX, endY) = face["bbox"].astype("int") if extra_padding > 0.0: - if source_image.shape[:2] == (512,512): + if source_image.shape[:2] == (512, 512): i += 1 face_data.append([face, source_image]) continue found = False - for i in range(1,3): - (startX, startY, endX, endY) = face['bbox'].astype("int") + for i in range(1, 3): + (startX, startY, endX, endY) = face["bbox"].astype("int") cutout_padding = extra_padding # top needs extra room for detection padding = int((endY - startY) * cutout_padding) @@ -91,7 +97,9 @@ def extract_face_images(source_filename, video_info, extra_padding=-1.0): padding = int((endX - startX) * cutout_padding) startX -= padding endX += padding - startX, endX, startY, endY = clamp_cut_values(startX, endX, startY, endY, source_image) + startX, endX, startY, endY = clamp_cut_values( + startX, endX, startY, endY, source_image + ) face_temp = source_image[startY:endY, startX:endX] face_temp = resize_image_keep_content(face_temp) testfaces = get_all_faces(face_temp) @@ -104,7 +112,6 @@ def extract_face_images(source_filename, video_info, extra_padding=-1.0): if not found: print("No face found after resizing, this shouldn't happen!") continue - face_temp = source_image[startY:endY, startX:endX] if face_temp.size < 1: @@ -114,6 +121,7 @@ def extract_face_images(source_filename, video_info, extra_padding=-1.0): face_data.append([face, face_temp]) return face_data + def clamp_cut_values(startX, endX, startY, endY, image): if startX < 0: startX = 0 @@ -126,34 +134,37 @@ def clamp_cut_values(startX, endX, startY, endY, image): return startX, endX, startY, endY - def get_face_swapper() -> Any: global FACE_SWAPPER with THREAD_LOCK_SWAPPER: if FACE_SWAPPER is None: - model_path = resolve_relative_path('../models/inswapper_128.onnx') - FACE_SWAPPER = insightface.model_zoo.get_model(model_path, providers=roop.globals.execution_providers) + model_path = resolve_relative_path("../models/inswapper_128.onnx") + FACE_SWAPPER = insightface.model_zoo.get_model( + model_path, providers=roop.globals.execution_providers + ) return FACE_SWAPPER def pre_check() -> bool: - download_directory_path = resolve_relative_path('../models') - conditional_download(download_directory_path, ['https://huggingface.co/countfloyd/deepfake/resolve/main/inswapper_128.onnx']) + download_directory_path = resolve_relative_path("../models") + conditional_download( + download_directory_path, + ["https://huggingface.co/countfloyd/deepfake/resolve/main/inswapper_128.onnx"], + ) return True def swap_face(source_face: Face, target_face: Face, temp_frame: Frame) -> Frame: return get_face_swapper().get(temp_frame, target_face, source_face, paste_back=True) + def face_offset_top(face: Face, offset): - smallestmin = np.min(face.landmark_2d_106, 1) - smallest = smallestmin[1] - face['bbox'][1] += offset - face['bbox'][3] += offset + face["bbox"][1] += offset + face["bbox"][3] += offset lm106 = face.landmark_2d_106 add = np.full_like(lm106, [0, offset]) - face['landmark_2d_106'] = lm106 + add + face["landmark_2d_106"] = lm106 + add return face @@ -171,15 +182,15 @@ def resize_image_keep_content(image, new_width=512, new_height=512): (h, w) = image.shape[:2] if h == new_height and w == new_width: return image - resize_img = np.zeros(shape=(new_height,new_width,3), dtype=image.dtype) + resize_img = np.zeros(shape=(new_height, new_width, 3), dtype=image.dtype) offs = (new_width - w) if h == new_height else (new_height - h) startoffs = int(offs // 2) if offs % 2 == 0 else int(offs // 2) + 1 - offs = int(offs // 2) - + offs = int(offs // 2) + if h == new_height: - resize_img[0:new_height, startoffs:new_width-offs] = image + resize_img[0:new_height, startoffs : new_width - offs] = image else: - resize_img[startoffs:new_height-offs, 0:new_width] = image + resize_img[startoffs : new_height - offs, 0:new_width] = image return resize_img @@ -187,55 +198,65 @@ def rotate_image_90(image, rotate=True): if rotate: return np.rot90(image) else: - return np.rot90(image,1,(1,0)) + return np.rot90(image, 1, (1, 0)) def rotate_anticlockwise(frame): - return imutils.rotate_bound(frame, -90) + return rotate_image_90(frame) def rotate_clockwise(frame): - return imutils.rotate_bound(frame, 90) + return rotate_image_90(frame, False) def rotate_image_180(image): - return np.flip(image,0) + return np.flip(image, 0) # alignment code from insightface https://github.com/deepinsight/insightface/blob/master/python-package/insightface/utils/face_align.py arcface_dst = np.array( - [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], - [41.5493, 92.3655], [70.7299, 92.2041]], - dtype=np.float32) - -def estimate_norm(lmk, image_size=112,mode='arcface'): + [ + [38.2946, 51.6963], + [73.5318, 51.5014], + [56.0252, 71.7366], + [41.5493, 92.3655], + [70.7299, 92.2041], + ], + dtype=np.float32, +) + + +def estimate_norm(lmk, image_size=112, mode="arcface"): assert lmk.shape == (5, 2) - assert image_size%112==0 or image_size%128==0 - if image_size%112==0: - ratio = float(image_size)/112.0 + assert image_size % 112 == 0 or image_size % 128 == 0 + if image_size % 112 == 0: + ratio = float(image_size) / 112.0 diff_x = 0 else: - ratio = float(image_size)/128.0 - diff_x = 8.0*ratio + ratio = float(image_size) / 128.0 + diff_x = 8.0 * ratio dst = arcface_dst * ratio - dst[:,0] += diff_x + dst[:, 0] += diff_x tform = trans.SimilarityTransform() tform.estimate(lmk, dst) M = tform.params[0:2, :] return M -def norm_crop(img, landmark, image_size=112, mode='arcface'): + +def norm_crop(img, landmark, image_size=112, mode="arcface"): M = estimate_norm(landmark, image_size, mode) warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0) return warped + # aligned, M = norm_crop2(f[1], face.kps, 512) -def norm_crop2(img, landmark, image_size=112, mode='arcface'): +def norm_crop2(img, landmark, image_size=112, mode="arcface"): M = estimate_norm(landmark, image_size, mode) warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0) return warped, M + def square_crop(im, S): if im.shape[0] > im.shape[1]: height = S @@ -247,26 +268,23 @@ def square_crop(im, S): scale = float(S) / im.shape[1] resized_im = cv2.resize(im, (width, height)) det_im = np.zeros((S, S, 3), dtype=np.uint8) - det_im[:resized_im.shape[0], :resized_im.shape[1], :] = resized_im + det_im[: resized_im.shape[0], : resized_im.shape[1], :] = resized_im return det_im, scale def transform(data, center, output_size, scale, rotation): scale_ratio = scale rot = float(rotation) * np.pi / 180.0 - #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio) + # translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio) t1 = trans.SimilarityTransform(scale=scale_ratio) cx = center[0] * scale_ratio cy = center[1] * scale_ratio t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy)) t3 = trans.SimilarityTransform(rotation=rot) - t4 = trans.SimilarityTransform(translation=(output_size / 2, - output_size / 2)) + t4 = trans.SimilarityTransform(translation=(output_size / 2, output_size / 2)) t = t1 + t2 + t3 + t4 M = t.params[0:2] - cropped = cv2.warpAffine(data, - M, (output_size, output_size), - borderValue=0.0) + cropped = cv2.warpAffine(data, M, (output_size, output_size), borderValue=0.0) return cropped, M @@ -274,9 +292,9 @@ def trans_points2d(pts, M): new_pts = np.zeros(shape=pts.shape, dtype=np.float32) for i in range(pts.shape[0]): pt = pts[i] - new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32) + new_pt = np.array([pt[0], pt[1], 1.0], dtype=np.float32) new_pt = np.dot(M, new_pt) - #print('new_pt', new_pt.shape, new_pt) + # print('new_pt', new_pt.shape, new_pt) new_pts[i] = new_pt[0:2] return new_pts @@ -284,13 +302,13 @@ def trans_points2d(pts, M): def trans_points3d(pts, M): scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1]) - #print(scale) + # print(scale) new_pts = np.zeros(shape=pts.shape, dtype=np.float32) for i in range(pts.shape[0]): pt = pts[i] - new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32) + new_pt = np.array([pt[0], pt[1], 1.0], dtype=np.float32) new_pt = np.dot(M, new_pt) - #print('new_pt', new_pt.shape, new_pt) + # print('new_pt', new_pt.shape, new_pt) new_pts[i][0:2] = new_pt[0:2] new_pts[i][2] = pts[i][2] * scale @@ -302,3 +320,4 @@ def trans_points(pts, M): return trans_points2d(pts, M) else: return trans_points3d(pts, M) + diff --git a/roop/globals.py b/roop/globals.py index f1236e284..a2b4c5fd8 100644 --- a/roop/globals.py +++ b/roop/globals.py @@ -9,6 +9,8 @@ frame_processors: List[str] = [] keep_fps = None keep_frames = None +autorotate_faces = None +vr_mode = None skip_audio = None wait_after_extraction = None many_faces = None diff --git a/roop/metadata.py b/roop/metadata.py index 168e2a289..f7ee09d23 100644 --- a/roop/metadata.py +++ b/roop/metadata.py @@ -1,2 +1,2 @@ name = 'roop unleashed' -version = '3.3.4' +version = '3.5.0' diff --git a/roop/processors/Enhance_DMDNet.py b/roop/processors/Enhance_DMDNet.py index e114b09cc..b4ae13571 100644 --- a/roop/processors/Enhance_DMDNet.py +++ b/roop/processors/Enhance_DMDNet.py @@ -473,7 +473,8 @@ def forward(self, x): return self.Query(x) def roi_align_self(input, location, target_size): - return torch.cat([F.interpolate(input[i:i+1,:,location[i,1]:location[i,3],location[i,0]:location[i,2]],(target_size,target_size),mode='bilinear',align_corners=False) for i in range(input.size(0))],0) + test = (target_size.item(),target_size.item()) + return torch.cat([F.interpolate(input[i:i+1,:,location[i,1]:location[i,3],location[i,0]:location[i,2]],test,mode='bilinear',align_corners=False) for i in range(input.size(0))],0) class FeatureExtractor(nn.Module): def __init__(self, ngf = 64, key_scale = 4):# @@ -827,9 +828,15 @@ def reconstruct(self, fs_in, locs, memstar): le_location = locs[:,0,:] re_location = locs[:,1,:] mo_location = locs[:,3,:] - le_location = le_location.cpu().int().numpy() - re_location = re_location.cpu().int().numpy() - mo_location = mo_location.cpu().int().numpy() + + # Somehow with latest Torch it doesn't like numpy wrappers anymore + + # le_location = le_location.cpu().int().numpy() + # re_location = re_location.cpu().int().numpy() + # mo_location = mo_location.cpu().int().numpy() + le_location = le_location.cpu().int() + re_location = re_location.cpu().int() + mo_location = mo_location.cpu().int() up_in_256 = fs_in['f256'].clone()# * 0 up_in_128 = fs_in['f128'].clone()# * 0 @@ -859,7 +866,11 @@ def generate_specific_dictionary(self, sp_imgs=None, sp_locs=None): return self.memorize(sp_imgs, sp_locs) def forward(self, lq=None, loc=None, sp_256 = None, sp_128 = None, sp_64 = None): - fs_in = self.E_lq(lq, loc) # low quality images + try: + fs_in = self.E_lq(lq, loc) # low quality images + except Exception as e: + print(e) + GeMemNorm256, GeMemNorm128, GeMemNorm64, Ind256, Ind128, Ind64 = self.enhancer(fs_in) GeOut = self.reconstruct(fs_in, loc, memstar = [GeMemNorm256, GeMemNorm128, GeMemNorm64]) if sp_256 is not None and sp_128 is not None and sp_64 is not None: diff --git a/roop/processors/Enhance_RestoreFormer.py b/roop/processors/Enhance_RestoreFormer.py new file mode 100644 index 000000000..44d184298 --- /dev/null +++ b/roop/processors/Enhance_RestoreFormer.py @@ -0,0 +1,63 @@ +from typing import Any, List, Callable +import cv2 +import numpy as np +import onnxruntime +import roop.globals + +from roop.typing import Face, Frame, FaceSet +from roop.utilities import resolve_relative_path + + +# THREAD_LOCK = threading.Lock() + + +class Enhance_RestoreFormer(): + model_restoreformer = None + devicename = None + name = None + + processorname = 'restoreformer' + type = 'enhance' + + + def Initialize(self, devicename:str): + if self.model_restoreformer is None: + # replace Mac mps with cpu for the moment + devicename = devicename.replace('mps', 'cpu') + self.devicename = devicename + model_path = resolve_relative_path('../models/restoreformer.onnx') + self.model_restoreformer = onnxruntime.InferenceSession(model_path, None, providers=roop.globals.execution_providers) + self.model_inputs = self.model_restoreformer.get_inputs() + model_outputs = self.model_restoreformer.get_outputs() + self.io_binding = self.model_restoreformer.io_binding() + self.io_binding.bind_output(model_outputs[0].name, self.devicename) + + def Run(self, source_faceset: FaceSet, target_face: Face, temp_frame: Frame) -> Frame: + # preprocess + input_size = temp_frame.shape[1] + temp_frame = cv2.resize(temp_frame, (512, 512), cv2.INTER_CUBIC) + temp_frame = cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB) + temp_frame = temp_frame.astype('float32') / 255.0 + temp_frame = (temp_frame - 0.5) / 0.5 + temp_frame = np.expand_dims(temp_frame, axis=0).transpose(0, 3, 1, 2) + + self.io_binding.bind_cpu_input(self.model_inputs[0].name, temp_frame) # .astype(np.float32) + self.model_restoreformer.run_with_iobinding(self.io_binding) + ort_outs = self.io_binding.copy_outputs_to_cpu() + result = ort_outs[0][0] + del ort_outs + + result = np.clip(result, -1, 1) + result = (result + 1) / 2 + result = result.transpose(1, 2, 0) * 255.0 + result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) + scale_factor = int(result.shape[1] / input_size) + return result.astype(np.uint8), scale_factor + + + def Release(self): + del self.model_restoreformer + self.model_restoreformer = None + del self.io_binding + self.io_binding = None + diff --git a/roop/utilities.py b/roop/utilities.py index 6968ebe3e..b194bf6c0 100644 --- a/roop/utilities.py +++ b/roop/utilities.py @@ -266,6 +266,7 @@ def unzip(zipfilename:str, target_path:str): def mkdir_with_umask(directory): oldmask = os.umask(0) + # mode needs octal os.makedirs(directory, mode=0o775, exist_ok=True) os.umask(oldmask) diff --git a/roop/vr_util.py b/roop/vr_util.py new file mode 100644 index 000000000..a72845e3c --- /dev/null +++ b/roop/vr_util.py @@ -0,0 +1,57 @@ +import cv2 +import numpy as np + +# VR Lense Distortion +# Taken from https://github.com/g0kuvonlange/vrswap + + +def get_perspective(img, FOV, THETA, PHI, height, width): + # + # THETA is left/right angle, PHI is up/down angle, both in degree + # + [orig_width, orig_height, _] = img.shape + equ_h = orig_height + equ_w = orig_width + equ_cx = (equ_w - 1) / 2.0 + equ_cy = (equ_h - 1) / 2.0 + + wFOV = FOV + hFOV = float(height) / width * wFOV + + w_len = np.tan(np.radians(wFOV / 2.0)) + h_len = np.tan(np.radians(hFOV / 2.0)) + + x_map = np.ones([height, width], np.float32) + y_map = np.tile(np.linspace(-w_len, w_len, width), [height, 1]) + z_map = -np.tile(np.linspace(-h_len, h_len, height), [width, 1]).T + + D = np.sqrt(x_map**2 + y_map**2 + z_map**2) + xyz = np.stack((x_map, y_map, z_map), axis=2) / np.repeat( + D[:, :, np.newaxis], 3, axis=2 + ) + + y_axis = np.array([0.0, 1.0, 0.0], np.float32) + z_axis = np.array([0.0, 0.0, 1.0], np.float32) + [R1, _] = cv2.Rodrigues(z_axis * np.radians(THETA)) + [R2, _] = cv2.Rodrigues(np.dot(R1, y_axis) * np.radians(-PHI)) + + xyz = xyz.reshape([height * width, 3]).T + xyz = np.dot(R1, xyz) + xyz = np.dot(R2, xyz).T + lat = np.arcsin(xyz[:, 2]) + lon = np.arctan2(xyz[:, 1], xyz[:, 0]) + + lon = lon.reshape([height, width]) / np.pi * 180 + lat = -lat.reshape([height, width]) / np.pi * 180 + + lon = lon / 180 * equ_cx + equ_cx + lat = lat / 90 * equ_cy + equ_cy + + persp = cv2.remap( + img, + lon.astype(np.float32), + lat.astype(np.float32), + cv2.INTER_CUBIC, + borderMode=cv2.BORDER_WRAP, + ) + return persp diff --git a/ui/tabs/faceswap_tab.py b/ui/tabs/faceswap_tab.py index 062988d68..21186eda3 100644 --- a/ui/tabs/faceswap_tab.py +++ b/ui/tabs/faceswap_tab.py @@ -80,14 +80,16 @@ def faceswap_tab(): with gr.Row(variant='panel'): with gr.Column(scale=1): - selected_face_detection = gr.Dropdown(["First found", "Single face frames only [auto-rotate]", "All faces", "Selected face", "All female", "All male"], value="First found", label="Select face selection for swapping") + selected_face_detection = gr.Dropdown(["First found", "All faces", "Selected face", "All female", "All male"], value="First found", label="Select face selection for swapping") max_face_distance = gr.Slider(0.01, 1.0, value=0.65, label="Max Face Similarity Threshold") video_swapping_method = gr.Dropdown(["Extract Frames to media","In-Memory processing"], value="In-Memory processing", label="Select video processing method", interactive=True) no_face_action = gr.Dropdown(choices=no_face_choices, value=no_face_choices[0], label="Action on no face detected", interactive=True) + vr_mode = gr.Checkbox(label="VR Mode", value=False) with gr.Column(scale=1): - ui.globals.ui_selected_enhancer = gr.Dropdown(["None", "Codeformer", "DMDNet", "GFPGAN", "GPEN"], value="None", label="Select post-processing") + ui.globals.ui_selected_enhancer = gr.Dropdown(["None", "Codeformer", "DMDNet", "GFPGAN", "GPEN", "Restoreformer"], value="None", label="Select post-processing") ui.globals.ui_blend_ratio = gr.Slider(0.0, 1.0, value=0.65, label="Original/Enhanced image blend ratio") with gr.Box(): + autorotate = gr.Checkbox(label="Auto rotate horizontal Faces", value=True) roop.globals.skip_audio = gr.Checkbox(label="Skip audio", value=False) roop.globals.keep_frames = gr.Checkbox(label="Keep Frames (relevant only when extracting frames)", value=False) roop.globals.wait_after_extraction = gr.Checkbox(label="Wait for user key press before creating video ", value=False) @@ -113,7 +115,7 @@ def faceswap_tab(): resultvideo = gr.Video(label='Final Video', interactive=False, visible=False) previewinputs = [preview_frame_num, bt_destfiles, fake_preview, ui.globals.ui_selected_enhancer, selected_face_detection, - max_face_distance, ui.globals.ui_blend_ratio, chk_useclip, clip_text, no_face_action] + max_face_distance, ui.globals.ui_blend_ratio, chk_useclip, clip_text, no_face_action, vr_mode, autorotate] input_faces.select(on_select_input_face, None, None).then(fn=on_preview_frame_changed, inputs=previewinputs, outputs=[previewimage, mask_top, mask_bottom]) bt_remove_selected_input_face.click(fn=remove_selected_input_face, outputs=[input_faces]) bt_srcfiles.change(fn=on_srcfile_changed, show_progress='full', inputs=bt_srcfiles, outputs=[dynamic_face_selection, face_selection, input_faces]) @@ -143,7 +145,7 @@ def faceswap_tab(): start_event = bt_start.click(fn=start_swap, inputs=[ui.globals.ui_selected_enhancer, selected_face_detection, roop.globals.keep_frames, roop.globals.wait_after_extraction, - roop.globals.skip_audio, max_face_distance, ui.globals.ui_blend_ratio, chk_useclip, clip_text,video_swapping_method, no_face_action], + roop.globals.skip_audio, max_face_distance, ui.globals.ui_blend_ratio, chk_useclip, clip_text,video_swapping_method, no_face_action, vr_mode, autorotate], outputs=[bt_start, resultfiles]) after_swap_event = start_event.then(fn=on_resultfiles_finished, inputs=[resultfiles], outputs=[resultimage, resultvideo]) @@ -349,7 +351,7 @@ def on_end_face_selection(): return gr.Column.update(visible=False), None -def on_preview_frame_changed(frame_num, files, fake_preview, enhancer, detection, face_distance, blend_ratio, use_clip, clip_text, no_face_action): +def on_preview_frame_changed(frame_num, files, fake_preview, enhancer, detection, face_distance, blend_ratio, use_clip, clip_text, no_face_action, vr_mode, auto_rotate): global SELECTED_INPUT_FACE_INDEX, is_processing from roop.core import live_swap @@ -381,6 +383,8 @@ def on_preview_frame_changed(frame_num, files, fake_preview, enhancer, detection roop.globals.distance_threshold = face_distance roop.globals.blend_ratio = blend_ratio roop.globals.no_face_action = index_of_no_face_action(no_face_action) + roop.globals.vr_mode = vr_mode + roop.globals.autorotate_faces = auto_rotate if use_clip and clip_text is None or len(clip_text) < 1: use_clip = False @@ -465,7 +469,7 @@ def translate_swap_mode(dropdown_text): def start_swap( enhancer, detection, keep_frames, wait_after_extraction, skip_audio, face_distance, blend_ratio, - use_clip, clip_text, processing_method, no_face_action, progress=gr.Progress(track_tqdm=False)): + use_clip, clip_text, processing_method, no_face_action, vr_mode, autorotate, progress=gr.Progress(track_tqdm=False)): from ui.main import prepare_environment from roop.core import batch_process global is_processing, list_files_process @@ -488,6 +492,8 @@ def start_swap( enhancer, detection, keep_frames, wait_after_extraction, skip_au roop.globals.skip_audio = skip_audio roop.globals.face_swap_mode = translate_swap_mode(detection) roop.globals.no_face_action = index_of_no_face_action(no_face_action) + roop.globals.vr_mode = vr_mode + roop.globals.autorotate_faces = autorotate if use_clip and clip_text is None or len(clip_text) < 1: use_clip = False