From 0e6dc4d70b6714d7697a45b75d814cc3869ce7f0 Mon Sep 17 00:00:00 2001 From: Chengsong Zhang Date: Mon, 6 Nov 2023 23:49:00 -0600 Subject: [PATCH] 1.11.1 (#289) * better naming convention * add date * api return b64 video * hook encode_pil_to_base64 * use date instead of datetime * remove unnecessary code for ipadapter * vram optim * recover from assertion error such as OOM without the need to re-start * bugfix * add todo to pr a1111 * fix lllite, fix absolute path, fix infotext * fix api * readme --- README.md | 16 ++-- scripts/animatediff.py | 21 +++--- scripts/animatediff_cn.py | 45 +++++++----- scripts/animatediff_i2ibatch.py | 2 +- scripts/animatediff_infv2v.py | 82 +++++++-------------- scripts/animatediff_lora.py | 15 ++-- scripts/animatediff_mm.py | 28 +++++-- scripts/animatediff_output.py | 125 ++++++++++++++++++-------------- scripts/animatediff_prompt.py | 4 + 9 files changed, 177 insertions(+), 161 deletions(-) diff --git a/README.md b/README.md index f044221f..54fac07a 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ You might also be interested in another extension I created: [Segment Anything f - `2023/10/21`: [v1.9.4](https://github.com/continue-revolution/sd-webui-animatediff/releases/tag/v1.9.4): Save prompt travel to output images, `Reverse` merged to `Closed loop` (See [WebUI Parameters](#webui-parameters)), remove `TimestepEmbedSequential` hijack, remove `hints.js`, better explanation of several context-related parameters. - `2023/10/25`: [v1.10.0](https://github.com/continue-revolution/sd-webui-animatediff/releases/tag/v1.10.0): Support img2img batch. You need ControlNet installed to make it work properly (you do not need to enable ControlNet). See [ControlNet V2V](#controlnet-v2v) for more information. - `2023/10/29`: [v1.11.0](https://github.com/continue-revolution/sd-webui-animatediff/releases/tag/v1.11.0): Support [HotShot-XL](https://github.com/hotshotco/Hotshot-XL) for SDXL. See [HotShot-XL](#hotshot-xl) for more information. +- `2023/11/06`: [v1.11.1](https://github.com/continue-revolution/sd-webui-animatediff/releases/tag/v1.11.1): optimize VRAM to support any number of control images for ControlNet V2V, patch [encode_pil_to_base64](https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/modules/api/api.py#L104-L133) to support api return a video, save frames to `AnimateDIff/yy-mm-dd/`, recover from assertion error without restart. For future update plan, please query [here](https://github.com/continue-revolution/sd-webui-animatediff/pull/224). @@ -67,16 +68,16 @@ For future update plan, please query [here](https://github.com/continue-revoluti 1. Go to txt2img if you want to try txt2gif and img2img if you want to try img2gif. 1. Choose an SD1.5 checkpoint, write prompts, set configurations such as image width/height. If you want to generate multiple GIFs at once, please [change batch number, instead of batch size](#batch-size). 1. Enable AnimateDiff extension, set up [each parameter](#webui-parameters), then click `Generate`. -1. You should see the output GIF on the output gallery. You can access GIF output at `stable-diffusion-webui/outputs/{txt2img or img2img}-images/AnimateDiff`. You can also access image frames at `stable-diffusion-webui/outputs/{txt2img or img2img}-images/{date}`. You may choose to save frames for each generation into separate directories in `Settings/AnimateDiff`. +1. You should see the output GIF on the output gallery. You can access GIF output at `stable-diffusion-webui/outputs/{txt2img or img2img}-images/AnimateDiff/{yy-mm-dd}`. You can also access image frames at `stable-diffusion-webui/outputs/{txt2img or img2img}-images/{yy-mm-dd}`. You may choose to save frames for each generation into separate directories in `Settings/AnimateDiff`. ### API -Just like how you use ControlNet. Here is a sample. Due to the limitation of WebUI, you will not be able to get a video, but only a list of generated frames. You will have to view GIF in your file system, as mentioned at [WebUI](#webui) item 4. For most up-to-date parameters, please read [here](https://github.com/continue-revolution/sd-webui-animatediff/blob/master/scripts/animatediff_ui.py#L26). +It is quite similar to the way you use ControlNet. API will return a video in base64 format. In `format`, `PDF` means to save frames to your file system without returning all the frames. If you want your API to return all frames, please add `Frame` to `format` list. For most up-to-date parameters, please read [here](https://github.com/continue-revolution/sd-webui-animatediff/blob/master/scripts/animatediff_ui.py#L26). ``` 'alwayson_scripts': { 'AnimateDiff': { 'args': [{ 'model': 'mm_sd_v15_v2.ckpt', # Motion module - 'format': ['GIF'], # Save format, 'GIF' | 'MP4' | 'PNG' | 'WEBP' | 'TXT' + 'format': ['GIF'], # Save format, 'GIF' | 'MP4' | 'PNG' | 'WEBP' | 'TXT' | 'Frame' 'enable': True, # Enable AnimateDiff 'video_length': 16, # Number of frames 'fps': 8, # FPS @@ -105,6 +106,7 @@ Just like how you use ControlNet. Here is a sample. Due to the limitation of Web 1. **Save format** — Format of the output. Choose at least one of "GIF"|"MP4"|"WEBP"|"PNG". Check "TXT" if you want infotext, which will live in the same directory as the output GIF. Infotext is also accessible via `stable-diffusion-webui/params.txt` and outputs in all formats. 1. You can optimize GIF with `gifsicle` (`apt install gifsicle` required, read [#91](https://github.com/continue-revolution/sd-webui-animatediff/pull/91) for more information) and/or `palette` (read [#104](https://github.com/continue-revolution/sd-webui-animatediff/pull/104) for more information). Go to `Settings/AnimateDiff` to enable them. 1. You can set quality and lossless for WEBP via `Settings/AnimateDiff`. Read [#233](https://github.com/continue-revolution/sd-webui-animatediff/pull/233) for more information. + 1. If you are using API, by adding "PNG" to `format`, you can save all frames to your file system without returning all the frames. If you want your API to return all frames, please add `Frame` to `format` list. 1. **Number of frames** — Choose whatever number you like. If you enter 0 (default): @@ -238,12 +240,6 @@ Batch number is NOT the same as batch size. In A1111 WebUI, batch number is abov We are currently developing approach to support batch size on WebUI in the near future. -## FAQ -1. Q: Will ADetailer be supported? - - A: I'm not planning to support ADetailer. However, I plan to refactor my [Segment Anything](https://github.com/continue-revolution/sd-webui-segment-anything) to achieve similar effects. - - ## Demo ### Basic Usage @@ -274,7 +270,7 @@ I thank researchers from [Shanghai AI Lab](https://www.shlab.org.cn/), especiall I also thank community developers, especially - [@zappityzap](https://github.com/zappityzap) who developed the majority of the [output features](https://github.com/continue-revolution/sd-webui-animatediff/blob/master/scripts/animatediff_output.py) -- [@TDS4874](https://github.com/TDS4874) and [@opparco](https://github.com/opparco) for resolving the grey issue which significantly improve the performance of this extension +- [@TDS4874](https://github.com/TDS4874) and [@opparco](https://github.com/opparco) for resolving the grey issue which significantly improve the performance - [@talesofai](https://github.com/talesofai) who developed i2v in [this forked repo](https://github.com/talesofai/AnimateDiff) - [@rkfg](https://github.com/rkfg) for developing GIF palette optimization diff --git a/scripts/animatediff.py b/scripts/animatediff.py index a5efe0b5..b1e751a1 100644 --- a/scripts/animatediff.py +++ b/scripts/animatediff.py @@ -1,5 +1,3 @@ -import os - import gradio as gr from modules import script_callbacks, scripts, shared from modules.processing import (Processed, StableDiffusionProcessing, @@ -39,12 +37,13 @@ def show(self, is_img2img): def ui(self, is_img2img): - model_dir = shared.opts.data.get("animatediff_model_path", os.path.join(script_dir, "model")) - return (AnimateDiffUiGroup().render(is_img2img, model_dir),) + return (AnimateDiffUiGroup().render(is_img2img, motion_module.get_model_dir()),) def before_process(self, p: StableDiffusionProcessing, params: AnimateDiffProcess): - if isinstance(params, dict): params = AnimateDiffProcess(**params) + if p.is_api and isinstance(params, dict): + self.ad_params = AnimateDiffProcess(**params) + params = self.ad_params if params.enable: logger.info("AnimateDiff process start.") params.set_p(p) @@ -60,25 +59,25 @@ def before_process(self, p: StableDiffusionProcessing, params: AnimateDiffProces def before_process_batch(self, p: StableDiffusionProcessing, params: AnimateDiffProcess, **kwargs): - if isinstance(params, dict): params = AnimateDiffProcess(**params) + if p.is_api and isinstance(params, dict): params = self.ad_params if params.enable and isinstance(p, StableDiffusionProcessingImg2Img) and not hasattr(p, '_animatediff_i2i_batch'): AnimateDiffI2VLatent().randomize(p, params) def postprocess_batch_list(self, p: StableDiffusionProcessing, pp: PostprocessBatchListArgs, params: AnimateDiffProcess, **kwargs): - if isinstance(params, dict): params = AnimateDiffProcess(**params) + if p.is_api and isinstance(params, dict): params = self.ad_params if params.enable: self.prompt_scheduler.save_infotext_img(p) def postprocess_image(self, p: StableDiffusionProcessing, pp: PostprocessImageArgs, params: AnimateDiffProcess, *args): - if isinstance(params, dict): params = AnimateDiffProcess(**params) + if p.is_api and isinstance(params, dict): params = self.ad_params if params.enable and isinstance(p, StableDiffusionProcessingImg2Img) and hasattr(p, '_animatediff_paste_to_full'): p.paste_to = p._animatediff_paste_to_full[p.batch_index] def postprocess(self, p: StableDiffusionProcessing, res: Processed, params: AnimateDiffProcess): - if isinstance(params, dict): params = AnimateDiffProcess(**params) + if p.is_api and isinstance(params, dict): params = self.ad_params if params.enable: self.prompt_scheduler.save_infotext_txt(res) self.cn_hacker.restore() @@ -94,7 +93,7 @@ def on_ui_settings(): shared.opts.add_option( "animatediff_model_path", shared.OptionInfo( - os.path.join(script_dir, "model"), + None, "Path to save AnimateDiff motion modules", gr.Textbox, section=section, @@ -144,7 +143,7 @@ def on_ui_settings(): "animatediff_save_to_custom", shared.OptionInfo( False, - "Save frames to stable-diffusion-webui/outputs/{ txt|img }2img-images/AnimateDiff/{gif filename}/ " + "Save frames to stable-diffusion-webui/outputs/{ txt|img }2img-images/AnimateDiff/{gif filename}/{date} " "instead of stable-diffusion-webui/outputs/{ txt|img }2img-images/{date}/.", gr.Checkbox, section=section diff --git a/scripts/animatediff_cn.py b/scripts/animatediff_cn.py index 7ad2f49f..1edf625e 100644 --- a/scripts/animatediff_cn.py +++ b/scripts/animatediff_cn.py @@ -22,12 +22,11 @@ class AnimateDiffControl: + original_processing_process_images_hijack = None + original_controlnet_main_entry = None + original_postprocess_batch = None def __init__(self, p: StableDiffusionProcessing, prompt_scheduler: AnimateDiffPromptSchedule): - self.original_processing_process_images_hijack = None - self.original_img2img_process_batch_hijack = None - self.original_controlnet_main_entry = None - self.original_postprocess_batch = None try: from scripts.external_code import find_cn_script self.cn_script = find_cn_script(p.scripts) @@ -118,15 +117,19 @@ def hacked_processing_process_images_hijack(self, p: StableDiffusionProcessing, update_infotext(p, params) return getattr(processing, '__controlnet_original_process_images_inner')(p, *args, **kwargs) - self.original_processing_process_images_hijack = BatchHijack.processing_process_images_hijack + if AnimateDiffControl.original_processing_process_images_hijack is not None: + logger.info('BatchHijack already hacked.') + return + + AnimateDiffControl.original_processing_process_images_hijack = BatchHijack.processing_process_images_hijack BatchHijack.processing_process_images_hijack = hacked_processing_process_images_hijack processing.process_images_inner = instance.processing_process_images_hijack def restore_batchhijack(self): from scripts.batch_hijack import BatchHijack, instance - BatchHijack.processing_process_images_hijack = self.original_processing_process_images_hijack - self.original_processing_process_images_hijack = None + BatchHijack.processing_process_images_hijack = AnimateDiffControl.original_processing_process_images_hijack + AnimateDiffControl.original_processing_process_images_hijack = None processing.process_images_inner = instance.processing_process_images_hijack @@ -412,21 +415,21 @@ def set_numpy_seed(p: processing.StableDiffusionProcessing) -> Optional[int]: if control_model_type == ControlModelType.IPAdapter: if model_net.is_plus: - controls_ipadapter['hidden_states'].append(control['hidden_states'][-2]) + controls_ipadapter['hidden_states'].append(control['hidden_states'][-2].cpu()) else: - controls_ipadapter['image_embeds'].append(control['image_embeds']) + controls_ipadapter['image_embeds'].append(control['image_embeds'].cpu()) if hr_control is not None: if model_net.is_plus: - hr_controls_ipadapter['hidden_states'].append(hr_control['hidden_states'][-2]) + hr_controls_ipadapter['hidden_states'].append(hr_control['hidden_states'][-2].cpu()) else: - hr_controls_ipadapter['image_embeds'].append(hr_control['image_embeds']) + hr_controls_ipadapter['image_embeds'].append(hr_control['image_embeds'].cpu()) else: hr_controls_ipadapter = None hr_controls = None else: - controls.append(control) + controls.append(control.cpu()) if hr_control is not None: - hr_controls.append(hr_control) + hr_controls.append(hr_control.cpu()) else: hr_controls = None @@ -599,17 +602,21 @@ def hacked_postprocess_batch(self, p, *args, **kwargs): images[i] = post_processor(images[i], i) return - self.original_controlnet_main_entry = self.cn_script.controlnet_main_entry - self.original_postprocess_batch = self.cn_script.postprocess_batch + if AnimateDiffControl.original_controlnet_main_entry is not None: + logger.info('ControlNet Main Entry already hacked.') + return + + AnimateDiffControl.original_controlnet_main_entry = self.cn_script.controlnet_main_entry + AnimateDiffControl.original_postprocess_batch = self.cn_script.postprocess_batch self.cn_script.controlnet_main_entry = MethodType(hacked_main_entry, self.cn_script) self.cn_script.postprocess_batch = MethodType(hacked_postprocess_batch, self.cn_script) def restore_cn(self): - self.cn_script.controlnet_main_entry = self.original_controlnet_main_entry - self.original_controlnet_main_entry = None - self.cn_script.postprocess_batch = self.original_postprocess_batch - self.original_postprocess_batch = None + self.cn_script.controlnet_main_entry = AnimateDiffControl.original_controlnet_main_entry + AnimateDiffControl.original_controlnet_main_entry = None + self.cn_script.postprocess_batch = AnimateDiffControl.original_postprocess_batch + AnimateDiffControl.original_postprocess_batch = None def hack(self, params: AnimateDiffProcess): diff --git a/scripts/animatediff_i2ibatch.py b/scripts/animatediff_i2ibatch.py index 1788af58..92ffd0cc 100644 --- a/scripts/animatediff_i2ibatch.py +++ b/scripts/animatediff_i2ibatch.py @@ -24,6 +24,7 @@ class AnimateDiffI2IBatch: def hack(self): + # TODO: PR this hack to A1111 logger.info("Hacking i2i-batch.") original_img2img_process_batch = img2img.process_batch @@ -299,5 +300,4 @@ def cap_init_image(self, p: StableDiffusionProcessingImg2Img, params): params.batch_size = len(p.init_images) - animatediff_i2ibatch = AnimateDiffI2IBatch() diff --git a/scripts/animatediff_infv2v.py b/scripts/animatediff_infv2v.py index 8dc5532d..aa547d39 100644 --- a/scripts/animatediff_infv2v.py +++ b/scripts/animatediff_infv2v.py @@ -16,9 +16,9 @@ class AnimateDiffInfV2V: + cfg_original_forward = None def __init__(self, p, prompt_scheduler: AnimateDiffPromptSchedule): - self.cfg_original_forward = None try: from scripts.external_code import find_cn_script self.cn_script = find_cn_script(p.scripts) @@ -93,45 +93,37 @@ def get_unsorted_index(lst): def hack(self, params: AnimateDiffProcess): + if AnimateDiffInfV2V.cfg_original_forward is not None: + logger.info("CFGDenoiser already hacked") + return + logger.info(f"Hacking CFGDenoiser forward function.") - self.cfg_original_forward = CFGDenoiser.forward + AnimateDiffInfV2V.cfg_original_forward = CFGDenoiser.forward cn_script = self.cn_script prompt_scheduler = self.prompt_scheduler def mm_cn_select(context: List[int]): # take control images for current context. - if cn_script is not None and cn_script.latest_network is not None: + if cn_script and cn_script.latest_network: from scripts.hook import ControlModelType for control in cn_script.latest_network.control_params: - if control.control_model_type == ControlModelType.IPAdapter: - ip_adapter_key = list(control.hint_cond)[0] - if ip_adapter_key == "image_embeds": - if control.hint_cond[ip_adapter_key].shape[0] > len(context): - control.hint_cond_backup = control.hint_cond[ip_adapter_key] - control.hint_cond[ip_adapter_key] = control.hint_cond[ip_adapter_key][context] - if control.hr_hint_cond is not None and control.hr_hint_cond[ip_adapter_key].shape[0] > len(context): - control.hr_hint_cond_backup = control.hr_hint_cond[ip_adapter_key] - control.hr_hint_cond[ip_adapter_key] = control.hr_hint_cond[ip_adapter_key][context] - elif ip_adapter_key == "hidden_states": - if control.hint_cond[ip_adapter_key][-2].shape[0] > len(context): - control.hint_cond_backup = control.hint_cond[ip_adapter_key][-2] - control.hint_cond[ip_adapter_key][-2] = control.hint_cond[ip_adapter_key][-2][context] - if control.hr_hint_cond is not None and control.hr_hint_cond[ip_adapter_key][-2].shape[0] > len(context): - control.hr_hint_cond_backup = control.hr_hint_cond[ip_adapter_key][-2] - control.hr_hint_cond[ip_adapter_key][-2] = control.hr_hint_cond[ip_adapter_key][-2][context] - else: + if control.control_model_type not in [ControlModelType.IPAdapter, ControlModelType.Controlllite]: if control.hint_cond.shape[0] > len(context): control.hint_cond_backup = control.hint_cond control.hint_cond = control.hint_cond[context] - if control.hr_hint_cond is not None and control.hr_hint_cond.shape[0] > len(context): - control.hr_hint_cond_backup = control.hr_hint_cond - control.hr_hint_cond = control.hr_hint_cond[context] - if control.control_model_type == ControlModelType.IPAdapter and control.control_model.image_emb.shape[0] > len(context): + control.hint_cond = control.hint_cond.to(device=shared.device) + if control.hr_hint_cond is not None: + if control.hr_hint_cond.shape[0] > len(context): + control.hr_hint_cond_backup = control.hr_hint_cond + control.hr_hint_cond = control.hr_hint_cond[context] + control.hr_hint_cond = control.hr_hint_cond.to(device=shared.device) + # IPAdapter and Controlllite are always on CPU. + elif control.control_model_type == ControlModelType.IPAdapter and control.control_model.image_emb.shape[0] > len(context): control.control_model.image_emb_backup = control.control_model.image_emb control.control_model.image_emb = control.control_model.image_emb[context] control.control_model.uncond_image_emb_backup = control.control_model.uncond_image_emb control.control_model.uncond_image_emb = control.control_model.uncond_image_emb[context] - if control.control_model_type == ControlModelType.Controlllite: + elif control.control_model_type == ControlModelType.Controlllite: for module in control.control_model.modules.values(): if module.cond_image.shape[0] > len(context): module.cond_image_backup = module.cond_image @@ -139,41 +131,22 @@ def mm_cn_select(context: List[int]): def mm_cn_restore(context: List[int]): # restore control images for next context - if cn_script is not None and cn_script.latest_network is not None: + if cn_script and cn_script.latest_network: from scripts.hook import ControlModelType for control in cn_script.latest_network.control_params: - if getattr(control, "hint_cond_backup", None) is not None: - if control.control_model_type == ControlModelType.IPAdapter: - ip_adapter_key = list(control.hint_cond_backup)[0] - if ip_adapter_key == "image_embeds": - control.hint_cond_backup[context] = control.hint_cond[ip_adapter_key] - control.hint_cond[ip_adapter_key] = control.hint_cond_backup - elif ip_adapter_key == "hidden_states": - control.hint_cond_backup[context] = control.hint_cond[ip_adapter_key][-2] - control.hint_cond[ip_adapter_key][-2] = control.hint_cond_backup - else: - control.hint_cond_backup[context] = control.hint_cond + if control.control_model_type not in [ControlModelType.IPAdapter, ControlModelType.Controlllite]: + if getattr(control, "hint_cond_backup", None) is not None: + control.hint_cond_backup[context] = control.hint_cond.to(device="cpu") control.hint_cond = control.hint_cond_backup - if control.hr_hint_cond is not None and getattr(control, "hr_hint_cond_backup", None) is not None: - if control.control_model_type == ControlModelType.IPAdapter: - ip_adapter_key = list(control.hr_hint_cond_backup)[0] - if ip_adapter_key == "image_embeds": - control.hr_hint_cond_backup[ip_adapter_key][context] = control.hr_hint_cond[ip_adapter_key] - control.hr_hint_cond[ip_adapter_key] = control.hr_hint_cond_backup[ip_adapter_key] - elif ip_adapter_key == "hidden_states": - control.hr_hint_cond_backup[context] = control.hr_hint_cond[ip_adapter_key][-2] - control.hr_hint_cond[ip_adapter_key][-2] = control.hr_hint_cond_backup - else: - control.hr_hint_cond_backup[context] = control.hr_hint_cond + if control.hr_hint_cond is not None and getattr(control, "hr_hint_cond_backup", None) is not None: + control.hr_hint_cond_backup[context] = control.hr_hint_cond.to(device="cpu") control.hr_hint_cond = control.hr_hint_cond_backup - if control.control_model_type == ControlModelType.IPAdapter and getattr(control.control_model, "image_emb_backup", None) is not None: - # control.control_model.image_emb_backup[context] = control.control_model.image_emb - # control.control_model.uncond_image_emb_backup[context] = control.control_model.uncond_image_emb + elif control.control_model_type == ControlModelType.IPAdapter and getattr(control.control_model, "image_emb_backup", None) is not None: control.control_model.image_emb = control.control_model.image_emb_backup control.control_model.uncond_image_emb = control.control_model.uncond_image_emb_backup - if control.control_model_type == ControlModelType.Controlllite: + elif control.control_model_type == ControlModelType.Controlllite: for module in control.control_model.modules.values(): - if module.cond_image.shape[0] > len(context): + if getattr(module, "cond_image_backup", None) is not None: module.set_cond_image(module.cond_image_backup) def mm_sd_forward(self, x_in, sigma_in, cond_in, image_cond_in, make_condition_dict): @@ -341,4 +314,5 @@ def mm_cfg_forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image def restore(self): logger.info(f"Restoring CFGDenoiser forward function.") - CFGDenoiser.forward = self.cfg_original_forward + CFGDenoiser.forward = AnimateDiffInfV2V.cfg_original_forward + AnimateDiffInfV2V.cfg_original_forward = None diff --git a/scripts/animatediff_lora.py b/scripts/animatediff_lora.py index 2a5d0236..c978645b 100644 --- a/scripts/animatediff_lora.py +++ b/scripts/animatediff_lora.py @@ -10,20 +10,24 @@ sys.path.append(f"{extensions_builtin_dir}/Lora") class AnimateDiffLora: + original_load_network = None def __init__(self, v2: bool): - self.original_load_network = None self.v2 = v2 def hack(self): if not self.v2: return - logger.info("Hacking lora to support motion lora") + if AnimateDiffLora.original_load_network is not None: + logger.info("AnimateDiff LoRA already hacked") + return + + logger.info("Hacking loral to support motion lora") import network import networks - self.original_load_network = networks.load_network - original_load_network = self.original_load_network + AnimateDiffLora.original_load_network = networks.load_network + original_load_network = AnimateDiffLora.original_load_network def mm_load_network(name, network_on_disk): @@ -70,4 +74,5 @@ def restore(self): if self.v2: logger.info("Restoring hacked lora") import networks - networks.load_network = self.original_load_network + networks.load_network = AnimateDiffLora.original_load_network + AnimateDiffLora.original_load_network = None diff --git a/scripts/animatediff_mm.py b/scripts/animatediff_mm.py index 08e26158..91720601 100644 --- a/scripts/animatediff_mm.py +++ b/scripts/animatediff_mm.py @@ -11,6 +11,7 @@ class AnimateDiffMM: + mm_injected = False def __init__(self): self.mm: MotionWrapper = None @@ -23,11 +24,15 @@ def set_script_dir(self, script_dir): self.script_dir = script_dir + def get_model_dir(self): + model_dir = shared.opts.data.get("animatediff_model_path", os.path.join(self.script_dir, "model")) + if not model_dir: + model_dir = os.path.join(self.script_dir, "model") + return model_dir + + def _load(self, model_name): - model_path = os.path.join( - shared.opts.data.get("animatediff_model_path", os.path.join(self.script_dir, "model")), - model_name, - ) + model_path = os.path.join(self.get_model_dir(), model_name) if not os.path.isfile(model_path): raise RuntimeError("Please download models manually.") if self.mm is None or self.mm.mm_name != model_name: @@ -45,13 +50,15 @@ def _load(self, model_name): def inject(self, sd_model, model_name="mm_sd_v15.ckpt"): + if AnimateDiffMM.mm_injected: + logger.info("Motion module already injected. Trying to restore.") + self.restore(sd_model) + unet = sd_model.model.diffusion_model self._load(model_name) inject_sdxl = sd_model.is_sdxl or self.mm.is_sdxl sd_ver = "SDXL" if sd_model.is_sdxl else "SD1.5" - if sd_model.is_sdxl != self.mm.is_sdxl: - logger.warn(f"Motion module incompatible with SD. You are using {sd_ver} with {self.mm.mm_type}. " - f"You will see an error afterwards. Even if the injection and inference seem to go on, you will get bad results.") + assert sd_model.is_sdxl == self.mm.is_sdxl, f"Motion module incompatible with SD. You are using {sd_ver} with {self.mm.mm_type}." if self.mm.is_v2: logger.info(f"Injecting motion module {model_name} into {sd_ver} UNet middle block.") @@ -94,6 +101,7 @@ def groupnorm32_mm_forward(self, x): self._set_ddim_alpha(sd_model) self._set_layer_mapping(sd_model) + AnimateDiffMM.mm_injected = True logger.info(f"Injection finished.") @@ -102,11 +110,13 @@ def restore(self, sd_model): sd_ver = "SDXL" if sd_model.is_sdxl else "SD1.5" self._restore_ddim_alpha(sd_model) unet = sd_model.model.diffusion_model + logger.info(f"Removing motion module from {sd_ver} UNet input blocks.") for unet_idx in [1, 2, 4, 5, 7, 8, 10, 11]: if inject_sdxl and unet_idx >= 9: break unet.input_blocks[unet_idx].pop(-1) + logger.info(f"Removing motion module from {sd_ver} UNet output blocks.") for unet_idx in range(12): if inject_sdxl and unet_idx >= 9: @@ -115,6 +125,7 @@ def restore(self, sd_model): unet.output_blocks[unet_idx].pop(-2) else: unet.output_blocks[unet_idx].pop(-1) + if self.mm.is_v2: logger.info(f"Removing motion module from {sd_ver} UNet middle block.") unet.middle_block.pop(-2) @@ -126,6 +137,8 @@ def restore(self, sd_model): from ldm.modules.diffusionmodules.util import GroupNorm32 GroupNorm32.forward = self.gn32_original_forward self.gn32_original_forward = None + + AnimateDiffMM.mm_injected = False logger.info(f"Removal finished.") if shared.cmd_opts.lowvram: self.unload() @@ -155,6 +168,7 @@ def _set_layer_mapping(self, sd_model): sd_model.network_layer_mapping[name] = module module.network_layer_name = name + def _restore_ddim_alpha(self, sd_model): logger.info(f"Restoring DDIM alpha.") sd_model.alphas_cumprod = self.prev_alpha_cumprod diff --git a/scripts/animatediff_output.py b/scripts/animatediff_output.py index af0a3ac7..e3649684 100644 --- a/scripts/animatediff_output.py +++ b/scripts/animatediff_output.py @@ -1,4 +1,5 @@ import base64 +import datetime from pathlib import Path import imageio.v3 as imageio @@ -14,48 +15,67 @@ class AnimateDiffOutput: - def output( - self, p: StableDiffusionProcessing, res: Processed, params: AnimateDiffProcess - ): + api_encode_pil_to_base64_hooked = False + + + def output(self, p: StableDiffusionProcessing, res: Processed, params: AnimateDiffProcess): video_paths = [] logger.info("Merging images into GIF.") - Path(f"{p.outpath_samples}/AnimateDiff").mkdir(exist_ok=True, parents=True) + date = datetime.datetime.now().strftime('%Y-%m-%d') + output_dir = Path(f"{p.outpath_samples}/AnimateDiff/{date}") + output_dir.mkdir(parents=True, exist_ok=True) step = params.video_length if params.video_length > params.batch_size else params.batch_size for i in range(res.index_of_first_image, len(res.images), step): # frame interpolation replaces video_list with interpolated frames # so make a copy instead of a slice (reference), to avoid modifying res - video_list = [image.copy() for image in res.images[i : i + params.video_length]] + frame_list = [image.copy() for image in res.images[i : i + params.video_length]] - seq = images.get_next_sequence_number(f"{p.outpath_samples}/AnimateDiff", "") + seq = images.get_next_sequence_number(output_dir, "") filename = f"{seq:05}-{res.all_seeds[(i-res.index_of_first_image)]}" - video_path_prefix = f"{p.outpath_samples}/AnimateDiff/{filename}" + video_path_prefix = output_dir / filename - video_list = self._add_reverse(params, video_list) - video_list = self._interp(p, params, video_list, filename) - video_paths += self._save(params, video_list, video_path_prefix, res, i) + frame_list = self._add_reverse(params, frame_list) + frame_list = self._interp(p, params, frame_list, filename) + video_paths += self._save(params, frame_list, video_path_prefix, res, i) if len(video_paths) > 0: - res.images = video_list if p.is_api else video_paths + if p.is_api: + if not AnimateDiffOutput.api_encode_pil_to_base64_hooked: + # TODO: remove this hook when WebUI is updated to v1.7.0 + logger.info("Hooking api.encode_pil_to_base64 to encode video to base64") + AnimateDiffOutput.api_encode_pil_to_base64_hooked = True + from modules.api import api + api_encode_pil_to_base64 = api.encode_pil_to_base64 + def hooked_encode_pil_to_base64(image): + if isinstance(image, str): + return image + return api_encode_pil_to_base64(image) + api.encode_pil_to_base64 = hooked_encode_pil_to_base64 + res.images = self._encode_video_to_b64(video_paths) + (frame_list if 'Frame' in params.format else []) + else: + res.images = video_paths - def _add_reverse(self, params: AnimateDiffProcess, video_list: list): + + def _add_reverse(self, params: AnimateDiffProcess, frame_list: list): if params.video_length <= params.batch_size and params.closed_loop in ['A']: - video_list_reverse = video_list[::-1] - if len(video_list_reverse) > 0: - video_list_reverse.pop(0) - if len(video_list_reverse) > 0: - video_list_reverse.pop(-1) - return video_list + video_list_reverse - return video_list + frame_list_reverse = frame_list[::-1] + if len(frame_list_reverse) > 0: + frame_list_reverse.pop(0) + if len(frame_list_reverse) > 0: + frame_list_reverse.pop(-1) + return frame_list + frame_list_reverse + return frame_list + def _interp( self, p: StableDiffusionProcessing, params: AnimateDiffProcess, - video_list: list, + frame_list: list, filename: str ): if params.interp not in ['FILM']: - return video_list + return frame_list try: from deforum_helpers.frame_interpolation import ( @@ -63,7 +83,7 @@ def _interp( from film_interpolation.film_inference import run_film_interp_infer except ImportError: logger.error("Deforum not found. Please install: https://github.com/deforum-art/deforum-for-automatic1111-webui.git") - return video_list + return frame_list import glob import os @@ -78,13 +98,13 @@ def _interp( film_model_path = os.path.join(film_model_folder, film_model_name) check_and_download_film_model('film_net_fp16.pt', film_model_folder) - film_in_between_frames_count = calculate_frames_to_add(len(video_list), params.interp_x) + film_in_between_frames_count = calculate_frames_to_add(len(frame_list), params.interp_x) # save original frames to tmp folder for deforum input tmp_folder = f"{p.outpath_samples}/AnimateDiff/tmp" input_folder = f"{tmp_folder}/input" os.makedirs(input_folder, exist_ok=True) - for tmp_seq, frame in enumerate(video_list): + for tmp_seq, frame in enumerate(frame_list): imageio.imwrite(f"{input_folder}/{tmp_seq:05}.png", frame) # deforum saves output frames to tmp/{filename} @@ -99,46 +119,45 @@ def _interp( # load deforum output frames and replace video_list interp_frame_paths = sorted(glob.glob(os.path.join(save_folder, '*.png'))) - video_list = [] + frame_list = [] for f in interp_frame_paths: with Image.open(f) as img: img.load() - video_list.append(img) + frame_list.append(img) - # if saving PNG, also save interpolated frames + # if saving PNG, enforce saving to custom folder if "PNG" in params.format: - save_interp_path = f"{p.outpath_samples}/AnimateDiff/interp" - os.makedirs(save_interp_path, exist_ok=True) - shutil.move(save_folder, save_interp_path) + params.force_save_to_custom = True # remove tmp folder try: shutil.rmtree(tmp_folder) except OSError as e: print(f"Error: {e}") - return video_list + return frame_list + def _save( self, params: AnimateDiffProcess, - video_list: list, - video_path_prefix: str, + frame_list: list, + video_path_prefix: Path, res: Processed, index: int, ): video_paths = [] - video_array = [np.array(v) for v in video_list] + video_array = [np.array(v) for v in frame_list] infotext = res.infotexts[index] use_infotext = shared.opts.enable_pnginfo and infotext is not None - if "PNG" in params.format and shared.opts.data.get("animatediff_save_to_custom", False): - Path(video_path_prefix).mkdir(exist_ok=True, parents=True) - for i, frame in enumerate(video_list): - png_filename = f"{video_path_prefix}/{i:05}.png" + if "PNG" in params.format and (shared.opts.data.get("animatediff_save_to_custom", False) or getattr(params, "force_save_to_custom", False)): + video_path_prefix.mkdir(exist_ok=True, parents=True) + for i, frame in enumerate(frame_list): + png_filename = video_path_prefix/f"{i:05}.png" png_info = PngImagePlugin.PngInfo() - png_info.add_text('parameters', res.infotexts[0]) + png_info.add_text('parameters', infotext) imageio.imwrite(png_filename, frame, pnginfo=png_info) if "GIF" in params.format: - video_path_gif = video_path_prefix + ".gif" + video_path_gif = str(video_path_prefix) + ".gif" video_paths.append(video_path_gif) if shared.opts.data.get("animatediff_optimize_gif_palette", False): try: @@ -157,7 +176,7 @@ def _save( "split": ("split", ""), "palgen": ("palettegen", ""), "paluse": ("paletteuse", ""), - "scale": ("scale", f"{video_list[0].width}:{video_list[0].height}") + "scale": ("scale", f"{frame_list[0].width}:{frame_list[0].height}") }, [ ("video_in", "scale", 0, 0), @@ -201,8 +220,9 @@ def _save( ) if shared.opts.data.get("animatediff_optimize_gif_gifsicle", False): self._optimize_gif(video_path_gif) + if "MP4" in params.format: - video_path_mp4 = video_path_prefix + ".mp4" + video_path_mp4 = str(video_path_prefix) + ".mp4" video_paths.append(video_path_mp4) try: imageio.imwrite(video_path_mp4, video_array, fps=params.fps, codec="h264") @@ -213,12 +233,15 @@ def _save( "sd-webui-animatediff save mp4 requirement: imageio[ffmpeg]", ) imageio.imwrite(video_path_mp4, video_array, fps=params.fps, codec="h264") + if "TXT" in params.format and res.images[index].info is not None: - video_path_txt = video_path_prefix + ".txt" - self._save_txt(video_path_txt, infotext) + video_path_txt = str(video_path_prefix) + ".txt" + with open(video_path_txt, "w", encoding="utf8") as file: + file.write(f"{infotext}\n") + if "WEBP" in params.format: if PIL.features.check('webp_anim'): - video_path_webp = video_path_prefix + ".webp" + video_path_webp = str(video_path_prefix) + ".webp" video_paths.append(video_path_webp) exif_bytes = b'' if use_infotext: @@ -236,8 +259,10 @@ def _save( # see additional Pillow WebP options at https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#webp else: logger.warn("WebP animation in Pillow requires system WebP library v0.5.0 or later") + return video_paths + def _optimize_gif(self, video_path: str): try: import pygifsicle @@ -255,18 +280,10 @@ def _optimize_gif(self, video_path: str): except FileNotFoundError: logger.warn("gifsicle not found, required for optimized GIFs, try: apt install gifsicle") - def _save_txt( - self, - video_path: str, - info: str, - ): - with open(video_path, "w", encoding="utf8") as file: - file.write(f"{info}\n") def _encode_video_to_b64(self, paths): videos = [] for v_path in paths: with open(v_path, "rb") as video_file: - encoded_video = base64.b64encode(video_file.read()) - videos.append(encoded_video.decode("utf-8")) + videos.append(base64.b64encode(video_file.read()).decode("utf-8")) return videos diff --git a/scripts/animatediff_prompt.py b/scripts/animatediff_prompt.py index 35da88a4..bba96255 100644 --- a/scripts/animatediff_prompt.py +++ b/scripts/animatediff_prompt.py @@ -24,6 +24,10 @@ def save_infotext_txt(self, res: Processed): parts = res.info.split('\nNegative prompt: ', 1) if len(parts) > 1: res.info = f"{self.original_prompt}\nNegative prompt: {parts[1]}" + for i in range(len(res.infotexts)): + parts = res.infotexts[i].split('\nNegative prompt: ', 1) + if len(parts) > 1: + res.infotexts[i] = f"{self.original_prompt}\nNegative prompt: {parts[1]}" write_params_txt(res.info)