vladmandic · vladmandic · Dec 29, 2023 · Dec 3, 2023 · Dec 3, 2023 · Dec 4, 2023
diff --git a/.gitignore b/.gitignore
@@ -32,6 +32,7 @@ cache
 *.lock
 *.zip
 *.rar
+*.7z
 *.pyc
 /*.bat
 /*.sh

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -10,5 +10,6 @@
     "./repositories/stable-diffusion-stability-ai",
     "./repositories/stable-diffusion-stability-ai/ldm"
   ],
-  "python.analysis.typeCheckingMode": "off"
-}
+  "python.analysis.typeCheckingMode": "off",
+  "editor.formatOnSave": false
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,146 @@
 # Change Log for SD.Next
 
+## Update for 2023-12-29
+
+- **Control**  
+  - native implementation of all image control methods:  
+    **ControlNet**, **ControlNet XS**, **Control LLLite**, **T2I Adapters** and **IP Adapters**  
+  - top-level **Control** next to **Text** and **Image** generate  
+  - supports all variations of **SD15** and **SD-XL** models  
+  - supports *Text*, *Image*, *Batch* and *Video* processing  
+  - for details and list of supported models and workflows, see Wiki documentation:  
+    <https://github.com/vladmandic/automatic/wiki/Control>  
+- **Diffusers**  
+  - [Segmind Vega](https://huggingface.co/segmind/Segmind-Vega) model support  
+    - small and fast version of **SDXL**, only 3.1GB in size!  
+    - select from *networks -> reference*  
+  - [aMUSEd 256](https://huggingface.co/amused/amused-256) and [aMUSEd 512](https://huggingface.co/amused/amused-512) model support  
+    - lightweigt models that excel at fast image generation  
+    - *note*: must select: settings -> diffusers -> generator device: unset
+    - select from *networks -> reference*
+  - [Playground v1](https://huggingface.co/playgroundai/playground-v1), [Playground v2 256](https://huggingface.co/playgroundai/playground-v2-256px-base), [Playground v2 512](https://huggingface.co/playgroundai/playground-v2-512px-base), [Playground v2 1024](https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic) model support  
+    - comparable to SD15 and SD-XL, trained from scratch for highly aesthetic images  
+    - simply select from *networks -> reference* and use as usual  
+  - [BLIP-Diffusion](https://dxli94.github.io/BLIP-Diffusion-website/)  
+    - img2img model that can replace subjects in images using prompt keywords  
+    - download and load by selecting from *networks -> reference -> blip diffusion*
+    - in image tab, select `blip diffusion` script
+  - [DemoFusion](https://github.com/PRIS-CV/DemoFusion) run your SDXL generations at any resolution!  
+    - in **Text** tab select *script* -> *demofusion*  
+    - *note*: GPU VRAM limits do not automatically go away so be careful when using it with large resolutions  
+      in the future, expect more optimizations, especially related to offloading/slicing/tiling,  
+      but at the moment this is pretty much experimental-only  
+  - [AnimateDiff](https://github.com/guoyww/animatediff/)  
+    - overall improved quality  
+    - can now be used with *second pass* - enhance, upscale and hires your videos!  
+  - [IP Adapter](https://github.com/tencent-ailab/IP-Adapter)  
+    - add support for **ip-adapter-plus_sd15, ip-adapter-plus-face_sd15 and ip-adapter-full-face_sd15**  
+    - can now be used in *xyz-grid*  
+  - **Text-to-Video**  
+    - in text tab, select `text-to-video` script  
+    - supported models: **ModelScope v1.7b, ZeroScope v1, ZeroScope v1.1, ZeroScope v2, ZeroScope v2 Dark, Potat v1**  
+      *if you know of any other t2v models youd like to see supported, let me know!*  
+    - models are auto-downloaded on first use  
+    - *note*: current base model will be unloaded to free up resources  
+  - **Prompt scheduling** now implemented for Diffusers backend, thanks @AI-Casanova
+  - **Custom pipelines** contribute by adding your own custom pipelines!  
+    - for details, see fully documented example:  
+      <https://github.com/vladmandic/automatic/blob/dev/scripts/example.py>  
+  - **Schedulers**  
+    - add timesteps range, changing it will make scheduler to be over-complete or under-complete  
+    - add rescale betas with zero SNR option (applicable to Euler, Euler a and DDIM, allows for higher dynamic range)  
+  - **Inpaint**  
+    - improved quality when using mask blur and padding  
+  - **UI**  
+    - 3 new native UI themes: **orchid-dreams**, **emerald-paradise** and **timeless-beige**, thanks @illu_Zn
+    - more dynamic controls depending on the backend (original or diffusers)  
+      controls that are not applicable in current mode are now hidden  
+    - allow setting of resize method directly in image tab  
+      (previously via settings -> upscaler_for_img2img)  
+- **Optional**
+  - **FaceID** face guidance during generation  
+    - also based on IP adapters, but with additional face detection and external embeddings calculation  
+    - calculates face embeds based on input image and uses it to guide generation  
+    - simply select from *scripts -> faceid*  
+    - *experimental module*: requirements must be installed manually:  
+        > pip install insightface ip_adapter  
+  - **Depth 3D** image to 3D scene
+    - delivered as an extension, install from extensions tab  
+      <https://github.com/vladmandic/sd-extension-depth3d>  
+    - creates fully compatible 3D scene from any image by using depth estimation  
+      and creating a fully populated mesh  
+    - scene can be freely viewed in 3D in the UI itself or downloaded for use in other applications  
+  - [ONNX/Olive](https://github.com/vladmandic/automatic/wiki/ONNX-Olive)  
+    - major work continues in olive branch, see wiki for details, thanks @lshqqytiger  
+      as a highlight, 4-5 it/s using DirectML on AMD GPU translates to 23-25 it/s using ONNX/Olive!  
+- **General**  
+  - new **onboarding**  
+    - if no models are found during startup, app will no longer ask to download default checkpoint  
+      instead, it will show message in UI with options to change model path or download any of the reference checkpoints  
+    - *extra networks -> models -> reference* section is now enabled for both original and diffusers backend  
+  - support for **Torch 2.1.2** (release) and **Torch 2.3** (dev)  
+  - **Process** create videos from batch or folder processing  
+      supports *GIF*, *PNG* and *MP4* with full interpolation, scene change detection, etc.  
+  - **LoRA**  
+    - add support for block weights, thanks @AI-Casanova  
+      example `<lora:SDXL_LCM_LoRA:1.0:in=0:mid=1:out=0>`  
+    - add support for LyCORIS GLora networks  
+    - add support for LoRA PEFT (*Diffusers*) networks  
+    - add support for Lora-OFT (*Kohya*) and Lyco-OFT (*Kohaku*) networks  
+    - reintroduce alternative loading method in settings: `lora_force_diffusers`  
+    - add support for `lora_fuse_diffusers` if using alternative method  
+      use if you have multiple complex loras that may be causing performance degradation  
+      as it fuses lora with model during load instead of interpreting lora on-the-fly  
+  - **CivitAI downloader** allow usage of access tokens for download of gated or private models  
+  - **Extra networks** new *settting -> extra networks -> build info on first access*  
+    indexes all networks on first access instead of server startup  
+  - **IPEX**, thanks @disty0  
+    - update to **Torch 2.1**  
+      if you get file not found errors, set `DISABLE_IPEXRUN=1` and run the webui with `--reinstall`  
+    - built-in *MKL* and *DPCPP* for IPEX, no need to install OneAPI anymore  
+    - **StableVideoDiffusion** is now supported with IPEX  
+    - **8 bit support with NNCF** on Diffusers backend  
+    - fix IPEX Optimize not applying with Diffusers backend  
+    - disable 32bit workarounds if the GPU supports 64bit  
+    - add `DISABLE_IPEXRUN` and `DISABLE_IPEX_1024_WA` environment variables  
+    - performance and compatibility improvements  
+  - **OpenVINO**, thanks @disty0  
+    - **8 bit support for CPUs**  
+    - reduce System RAM usage  
+    - update to Torch 2.1.2  
+    - add *Directory for OpenVINO cache* option to *System Paths*  
+    - remove Intel ARC specific 1024x1024 workaround  
+  - **HDR controls**  
+    - batch-aware for enhancement of multiple images or video frames  
+    - available in image tab  
+  - **Logging**
+    - additional *TRACE* logging enabled via specific env variables  
+      see <https://github.com/vladmandic/automatic/wiki/Debug> for details  
+    - improved profiling  
+      use with `--debug --profile`  
+    - log output file sizes  
+  - **Other**  
+    - **API** several minor but breaking changes to API behavior to better align response fields, thanks @Trojaner
+    - **Inpaint** add option `apply_overlay` to control if inpaint result should be applied as overlay or as-is  
+      can remove artifacts and hard edges of inpaint area but also remove some details from original  
+    - **chaiNNer** fix `NaN` issues due to autocast  
+    - **Upscale** increase limit from 4x to 8x given the quality of some upscalers  
+    - **Extra Networks** fix sort  
+    - reduced default **CFG scale** from 6 to 4 to be more out-of-the-box compatibile with LCM/Turbo models
+    - disable google fonts check on server startup  
+    - fix torchvision/basicsr compatibility  
+    - fix styles quick save  
+    - add hdr settings to metadata  
+    - improve handling of long filenames and filenames during batch processing  
+    - do not set preview samples when using via api  
+    - avoid unnecessary resizes in img2img and inpaint  
+    - safe handling of config updates avoid file corruption on I/O errors  
+    - updated `cli/simple-txt2img.py` and `cli/simple-img2img.py` scripts  
+    - save `params.txt` regardless of image save status  
+    - update built-in log monitor in ui, thanks @midcoastal  
+    - major CHANGELOG doc cleanup, thanks @JetVarimax  
+    - major INSTALL doc cleanup, thanks JetVarimax  
+
 ## Update for 2023-12-04
 
 Whats new? Native video in SD.Next via both **AnimateDiff** and **Stable-Video-Diffusion** - and including native MP4 encoding and smooth video outputs out-of-the-box, not just animated-GIFs.  

diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,28 @@
+cff-version: 1.2.0
+title: SD.Next
+url: 'https://github.com/vladmandic/automatic'
+message: >-
+  If you use this software, please cite it using the
+  metadata from this file
+type: software
+authors:
+  - given-names: Vladimir
+    name-particle: Vlado
+    family-names: Mandic
+    orcid: 'https://orcid.org/0009-0003-4592-5074'
+identifiers:
+  - type: url
+    value: 'https://github.com/vladmandic'
+    description: GitHub
+  - type: url
+    value: 'https://www.linkedin.com/in/cyan051/'
+    description: LinkedIn
+repository-code: 'https://github.com/vladmandic/automatic'
+abstract: >-
+  SD.Next: Advanced Implementation of Stable Diffusion and
+  other diffusion models for text, image and video
+  generation
+keywords:
+  - stablediffusion diffusers sdnext
+license: AGPL-3.0
+date-released: 2022-12-24
diff --git a/README.md b/README.md
@@ -20,15 +20,16 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 - Multiple backends!  
   ▹ **Original | Diffusers**
 - Multiple diffusion models!  
-  ▹ **Stable Diffusion | SD-XL | LCM | Segmind | Kandinsky | Pixart-α | Würstchen | DeepFloyd IF | UniDiffusion | SD-Distilled | etc.**
+  ▹ **Stable Diffusion 1.5/2.1 | SD-XL | LCM | Segmind | Kandinsky | Pixart-α | Würstchen | aMUSEd | DeepFloyd IF | UniDiffusion | SD-Distilled | BLiP Diffusion | etc.**
+- Built-in Control for Text, Image, Batch and video processing!  
+  ▹ **ControlNet | ControlNet XS | Control LLLite | T2I Adapters | IP Adapters**  
 - Multiplatform!  
  ▹ **Windows | Linux | MacOS with CPU | nVidia | AMD | IntelArc | DirectML | OpenVINO | ONNX+Olive**
 - Platform specific autodetection and tuning performed on install
 - Optimized processing with latest `torch` developments with built-in support for `torch.compile` and multiple compile backends
 - Improved prompt parser  
 - Enhanced *Lora*/*LoCon*/*Lyco* code supporting latest trends in training  
 - Built-in queue management  
-- Advanced metadata caching and handling to speed up operations  
 - Enterprise level logging and hardened API  
 - Modern localization and hints engine  
 - Broad compatibility with existing extensions ecosystem and new extensions manager  
@@ -37,7 +38,8 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 
 <br>
 
-![Screenshot-Dark](html/black-teal.jpg)
+![Screenshot-Dark](html/xmas-default.jpg)
+![Screenshot-Control](html/xmas-control.jpg)
 ![Screenshot-Light](html/light-teal.jpg)
 
 <br>
@@ -58,17 +60,23 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 
 Additional models will be added as they become available and there is public interest in them
 
-- [RunwayML Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)*
-- [StabilityAI Stable Diffusion XL](https://github.com/Stability-AI/generative-models)
-- [StabilityAI Stable Video Diffusion Base and XT](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid)
-- [Segmind SSD-1B](https://huggingface.co/segmind/SSD-1B)
-- [LCM: Latent Consistency Models](https://github.com/openai/consistency_models)
-- [Kandinsky](https://github.com/ai-forever/Kandinsky-2) *2.1 and 2.2 and latest 3.0*
-- [PixArt-α XL 2](https://github.com/PixArt-alpha/PixArt-alpha) *Medium and Large*
+- [RunwayML Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)*  
+- [StabilityAI Stable Diffusion XL](https://github.com/Stability-AI/generative-models)  
+- [StabilityAI Stable Video Diffusion](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid) Base and XT  
+- [LCM: Latent Consistency Models](https://github.com/openai/consistency_models)  
+- [aMUSEd 256](https://huggingface.co/amused/amused-256) 256 and 512
+- [Segmind Vega](https://huggingface.co/segmind/Segmind-Vega)  
+- [Segmind SSD-1B](https://huggingface.co/segmind/SSD-1B)  
+- [Kandinsky](https://github.com/ai-forever/Kandinsky-2) *2.1 and 2.2 and latest 3.0*  
+- [PixArt-α XL 2](https://github.com/PixArt-alpha/PixArt-alpha) *Medium and Large*  
 - [Warp Wuerstchen](https://huggingface.co/blog/wuertschen)  
+- [Playground](https://huggingface.co/playgroundai/playground-v2-256px-base) *v1, v2 256, v2 512, v2 1024*  
 - [Tsinghua UniDiffusion](https://github.com/thu-ml/unidiffuser)
 - [DeepFloyd IF](https://github.com/deep-floyd/IF) *Medium and Large*
+- [ModelScope T2V](https://huggingface.co/damo-vilab/text-to-video-ms-1.7b)
 - [Segmind SD Distilled](https://huggingface.co/blog/sd_distillation) *(all variants)*
+- [BLIP-Diffusion](https://dxli94.github.io/BLIP-Diffusion-website/)  
+
 
 Also supported are modifiers such as:
 - **LCM** and **Turbo** (Adversarial Diffusion Distillation) networks
@@ -209,6 +217,9 @@ General goals:
 
 ### **Docs**
 
+If you're unsure how to use a feature, best place to start is [Wiki](https://github.com/vladmandic/automatic/wiki) and if its not there,  
+check [ChangeLog](CHANGELOG.md) for when feature was first introduced as it will always have a short note on how to use it  
+
 - [Wiki](https://github.com/vladmandic/automatic/wiki)
 - [ReadMe](README.md)
 - [ToDo](TODO.md)  

diff --git a/SECURITY.md b/SECURITY.md
@@ -32,5 +32,5 @@ Any code commit is validated before merge
 - Download extensions and themes indexes from automatically updated indexes  
 - Download required packages and repositories from GitHub during installation/upgrade
 - Download installed/enabled extensions
-- Download default model from official repository
+- Download models from CivitAI and/or Huggingface when instructed by user
 - Submit benchmark info upon user interaction  
diff --git a/cli/image-grid.py b/cli/image-grid.py
@@ -56,7 +56,7 @@ def grid(images, labels = None, width = 0, height = 0, border = 0, square = Fals
     for i, img in enumerate(images): # pylint: disable=redefined-outer-name
         x = (i % cols * w) + (i % cols * border)
         y = (i // cols * h) + (i // cols * border)
-        img.thumbnail((w, h), Image.HAMMING)
+        img.thumbnail((w, h), Image.Resampling.HAMMING)
         image.paste(img, box=(x, y))
         if labels is not None and len(images) == len(labels):
             ctx = ImageDraw.Draw(image)

diff --git a/cli/simple-img2img.py b/cli/simple-img2img.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python
 import os
 import io
-import sys
+import time
 import base64
 import logging
+import argparse
 import requests
 import urllib3
 from PIL import Image
@@ -14,22 +15,13 @@
 
 logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s')
 log = logging.getLogger(__name__)
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+filename='/tmp/simple-img2img.jpg'
 options = {
-    "init_images": [],
-    "prompt": "city at night",
-    "negative_prompt": "foggy, blurry",
-    "steps": 20,
-    "batch_size": 1,
-    "n_iter": 1,
-    "seed": -1,
-    "sampler_name": "Euler a",
-    "cfg_scale": 6,
-    "width": 512,
-    "height": 512,
     "save_images": False,
     "send_images": True,
 }
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
 
 def auth():
@@ -51,26 +43,51 @@ def encode(f):
         image = image.convert('RGB')
     with io.BytesIO() as stream:
         image.save(stream, 'JPEG')
+        image.close()
         values = stream.getvalue()
         encoded = base64.b64encode(values).decode()
         return encoded
 
-def generate(num: int = 0):
-    log.info(f'sending generate request: {num+1} {options}')
-    options['init_images'] = [encode('html/logo-dark.png')]
-    options['batch_size'] = len(options['init_images'])
+def generate(args): # pylint: disable=redefined-outer-name
+    t0 = time.time()
+    if args.model is not None:
+        post('/sdapi/v1/options', { 'sd_model_checkpoint': args.model })
+        post('/sdapi/v1/reload-checkpoint') # needed if running in api-only to trigger new model load
+    options['prompt'] = args.prompt
+    options['negative_prompt'] = args.negative
+    options['steps'] = int(args.steps)
+    options['seed'] = int(args.seed)
+    options['sampler_name'] = args.sampler
+    options['init_images'] = [encode(args.init)]
+    image = Image.open(args.init)
+    options['width'] = image.width
+    options['height'] = image.height
+    image.close()
+    if args.mask is not None:
+        options['mask'] = encode(args.mask)
     data = post('/sdapi/v1/img2img', options)
+    t1 = time.time()
     if 'images' in data:
         for i in range(len(data['images'])):
             b64 = data['images'][i].split(',',1)[0]
+            info = data['info']
             image = Image.open(io.BytesIO(base64.b64decode(b64)))
-            log.info(f'received image: {image.size}')
+            image.save(filename)
+            log.info(f'received image: size={image.size} file={filename} time={t1-t0:.2f} info="{info}"')
     else:
         log.warning(f'no images received: {data}')
 
+
 if __name__ == "__main__":
-    sys.argv.pop(0)
-    repeats = int(''.join(sys.argv) or '1')
-    log.info(f'repeats: {repeats}')
-    for n in range(repeats):
-        generate(n)
+    parser = argparse.ArgumentParser(description = 'simple-img2img')
+    parser.add_argument('--init', required=True, help='init image')
+    parser.add_argument('--mask', required=False, help='mask image')
+    parser.add_argument('--prompt', required=False, default='', help='prompt text')
+    parser.add_argument('--negative', required=False, default='', help='negative prompt text')
+    parser.add_argument('--steps', required=False, default=20, help='number of steps')
+    parser.add_argument('--seed', required=False, default=-1, help='initial seed')
+    parser.add_argument('--sampler', required=False, default='Euler a', help='sampler name')
+    parser.add_argument('--model', required=False, help='model name')
+    args = parser.parse_args()
+    log.info(f'img2img: {args}')
+    generate(args)