vladmandic · vladmandic · Oct 23, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/.eslintrc.json b/.eslintrc.json
@@ -123,6 +123,7 @@
     "venv",
     "panzoom.js",
     "split.js",
-    "exifr.js"
+    "exifr.js",
+    "iframeResizer.min.js"
   ]
 }
diff --git a/.pylintrc b/.pylintrc
@@ -7,9 +7,13 @@ fail-on=
 fail-under=10
 ignore=CVS
 ignore-paths=/usr/lib/.*$,
+             modules/apg,
              modules/control/proc,
              modules/control/units,
+             modules/ctrlx,
+             modules/dcsolver,
              modules/dml,
+             modules/ggml,
              modules/hidiffusion,
              modules/hijack,
              modules/intel/ipex,
@@ -18,14 +22,16 @@ ignore-paths=/usr/lib/.*$,
              modules/ldsr,
              modules/onnx_impl,
              modules/pag,
+             modules/prompt_parser_xhinker.py,
              modules/rife,
              modules/taesd,
              modules/todo,
              modules/unipc,
+             modules/vdm,
              modules/xadapter,
-             modules/dcsolver,
+             modules/meissonic,
+             modules/omnigen,
              repositories,
-             modules/prompt_parser_xhinker.py,
              extensions-builtin/sd-webui-agent-scheduler,
              extensions-builtin/sd-extension-chainner/nodes,
              extensions-builtin/sdnext-modernui/node_modules,
@@ -163,12 +169,14 @@ disable=bad-inline-option,
         too-many-locals,
         too-many-nested-blocks,
         too-many-statements,
+        too-many-positional-arguments,
         unidiomatic-typecheck,
         unnecessary-dict-index-lookup,
         unnecessary-dunder-call,
         unnecessary-lambda,
         use-dict-literal,
         use-symbolic-message-instead,
+        unknown-option-value,
         useless-suppression,
         wrong-import-position,
 enable=c-extension-no-member

diff --git a/.ruff.toml b/.ruff.toml
@@ -3,24 +3,29 @@ exclude = [
     ".git",
     ".ruff_cache",
     ".vscode",
+    "modules/apg",
+    "modules/control/proc",
+    "modules/control/units",
+    "modules/dcsolver",
+    "modules/ggml",
     "modules/hidiffusion",
     "modules/hijack",
+    "modules/intel/ipex",
+    "modules/intel/openvino",
     "modules/k-diffusion",
     "modules/ldsr",
     "modules/pag",
+    "modules/postprocess/aurasr_arch.py",
+    "modules/prompt_parser_xhinker.py",
     "modules/rife",
+    "modules/segmoe",
     "modules/taesd",
     "modules/todo",
     "modules/unipc",
+    "modules/vdm",
     "modules/xadapter",
-    "modules/dcsolver",
-    "modules/intel/openvino",
-    "modules/intel/ipex",
-    "modules/segmoe",
-    "modules/control/proc",
-    "modules/control/units",
-    "modules/prompt_parser_xhinker.py",
-    "modules/postprocess/aurasr_arch.py",
+    "modules/meissonic",
+    "modules/omnigen",
     "repositories",
     "extensions-builtin/sd-extension-chainner/nodes",
     "extensions-builtin/sd-webui-agent-scheduler",
@@ -65,6 +70,7 @@ ignore = [
   "E731",   # Do not assign a `lambda` expression, use a `def`
   "E741",   # Ambiguous variable name
   "F401",   # Imported by unused
+  "NPY002", # replace legacy random
   "RUF005", # Consider iterable unpacking
   "RUF010", # Use explicit conversion flag
   "RUF012", # Mutable class attributes

diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/README.md b/README.md
@@ -31,7 +31,7 @@ All individual features are not listed here, instead check [ChangeLog](CHANGELOG
 - Multiple UIs!  
   ▹ **Standard | Modern**  
 - Multiple diffusion models!  
-  ▹ **Stable Diffusion 1.5/2.1/XL/3.0 | LCM | Lightning | Segmind | Kandinsky | Pixart-α | Pixart-Σ | Stable Cascade | FLUX.1 | AuraFlow | Würstchen | Lumina | Kolors | aMUSEd | DeepFloyd IF | UniDiffusion | SD-Distilled | BLiP Diffusion | KOALA | SDXS | Hyper-SD | HunyuanDiT | etc.**
+  ▹ **Stable Diffusion 1.5/2.1/XL/3.0/3.5 | LCM | Lightning | Segmind | Kandinsky | Pixart-α | Pixart-Σ | Stable Cascade | FLUX.1 | AuraFlow | Würstchen | Alpha Lumina | Kwai Kolors | aMUSEd | DeepFloyd IF | UniDiffusion | SD-Distilled | BLiP Diffusion | KOALA | SDXS | Hyper-SD | HunyuanDiT | CogView | OmniGen | Meissonic | etc.**
 - Built-in Control for Text, Image, Batch and video processing!  
   ▹ **ControlNet | ControlNet XS | Control LLLite | T2I Adapters | IP Adapters**  
 - Multiplatform!  
@@ -68,27 +68,31 @@ Additional models will be added as they become available and there is public int
 - [RunwayML Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)*
 - [StabilityAI Stable Diffusion XL](https://github.com/Stability-AI/generative-models)
 - [StabilityAI Stable Diffusion 3 Medium](https://stability.ai/news/stable-diffusion-3-medium)
+- [Stable Diffusion 3.5 Large](https://huggingface.co/stabilityai/stable-diffusion-3.5-large)  
 - [StabilityAI Stable Video Diffusion](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid) Base, XT 1.0, XT 1.1
-- [LCM: Latent Consistency Models](https://github.com/openai/consistency_models)
+- [StabilityAI Stable Cascade](https://github.com/Stability-AI/StableCascade) *Full* and *Lite*
 - [Black Forest Labs FLUX.1](https://blackforestlabs.ai/announcing-black-forest-labs/) Dev, Schnell  
 - [AuraFlow](https://huggingface.co/fal/AuraFlow)
 - [AlphaVLLM Lumina-Next-SFT](https://huggingface.co/Alpha-VLLM/Lumina-Next-SFT-diffusers)  
+- [Playground AI](https://huggingface.co/playgroundai/playground-v2-256px-base) *v1, v2 256, v2 512, v2 1024 and latest v2.5*
+- [Tencent HunyuanDiT](https://github.com/Tencent/HunyuanDiT)
+- [OmniGen](https://arxiv.org/pdf/2409.11340)  
+- [Meissonic](https://github.com/viiika/Meissonic)  
 - [Kwai Kolors](https://huggingface.co/Kwai-Kolors/Kolors)  
-- [Playground](https://huggingface.co/playgroundai/playground-v2-256px-base) *v1, v2 256, v2 512, v2 1024 and latest v2.5*
-- [Stable Cascade](https://github.com/Stability-AI/StableCascade) *Full* and *Lite*
-- [aMUSEd 256](https://huggingface.co/amused/amused-256) 256 and 512
+- [CogView 3+](https://huggingface.co/THUDM/CogView3-Plus-3B)
+- [LCM: Latent Consistency Models](https://github.com/openai/consistency_models)
+- [aMUSEd](https://huggingface.co/amused/amused-256) 256 and 512
 - [Segmind Vega](https://huggingface.co/segmind/Segmind-Vega)
 - [Segmind SSD-1B](https://huggingface.co/segmind/SSD-1B)
 - [Segmind SegMoE](https://github.com/segmind/segmoe) *SD and SD-XL*
+- [Segmind SD Distilled](https://huggingface.co/blog/sd_distillation) *(all variants)*
 - [Kandinsky](https://github.com/ai-forever/Kandinsky-2) *2.1 and 2.2 and latest 3.0*
 - [PixArt-α XL 2](https://github.com/PixArt-alpha/PixArt-alpha) *Medium and Large*
 - [PixArt-Σ](https://github.com/PixArt-alpha/PixArt-sigma)
 - [Warp Wuerstchen](https://huggingface.co/blog/wuertschen)
-- [Tencent HunyuanDiT](https://github.com/Tencent/HunyuanDiT)
 - [Tsinghua UniDiffusion](https://github.com/thu-ml/unidiffuser)
 - [DeepFloyd IF](https://github.com/deep-floyd/IF) *Medium and Large*
 - [ModelScope T2V](https://huggingface.co/damo-vilab/text-to-video-ms-1.7b)
-- [Segmind SD Distilled](https://huggingface.co/blog/sd_distillation) *(all variants)*
 - [BLIP-Diffusion](https://dxli94.github.io/BLIP-Diffusion-website/)
 - [KOALA 700M](https://github.com/youngwanLEE/sdxl-koala)
 - [VGen](https://huggingface.co/ali-vilab/i2vgen-xl)

diff --git a/cli/api-txt2img.py b/cli/api-txt2img.py
@@ -49,7 +49,7 @@ def generate(args): # pylint: disable=redefined-outer-name
     options['width'] = int(args.width)
     options['height'] = int(args.height)
     if args.faces:
-        options['restore_faces'] = args.faces
+        options['detailer'] = args.detailer
         options['denoising_strength'] = 0.5
         options['hr_sampler_name'] = args.sampler
     data = post('/sdapi/v1/txt2img', options)
@@ -75,7 +75,7 @@ def generate(args): # pylint: disable=redefined-outer-name
     parser.add_argument('--height', required=False, default=512, help='image height')
     parser.add_argument('--steps', required=False, default=20, help='number of steps')
     parser.add_argument('--seed', required=False, default=-1, help='initial seed')
-    parser.add_argument('--faces', action='store_true', help='restore faces')
+    parser.add_argument('--detailer', action='store_true', help='run detailer')
     parser.add_argument('--sampler', required=False, default='Euler a', help='sampler name')
     parser.add_argument('--output', required=False, default=None, help='output image file')
     parser.add_argument('--model', required=False, help='model name')

diff --git a/cli/create-previews.py b/cli/create-previews.py
@@ -46,7 +46,7 @@
     },
     # generate params
     'generate': {
-        'restore_faces': True,
+        'detailer': True,
         'prompt': '',
         'negative_prompt': 'foggy, blurry, blurred, duplicate, ugly, mutilated, mutation, mutated, out of frame, bad anatomy, disfigured, deformed, censored, low res, low resolution, watermark, text, poorly drawn face, poorly drawn hands, signature',
         'steps': 20,

diff --git a/cli/generate.json b/cli/generate.json
@@ -8,7 +8,7 @@
   },
   "generate":
   {
-      "restore_faces": true,
+      "detailer": true,
       "prompt": "dynamic",
       "negative_prompt": "foggy, blurry, blurred, duplicate, ugly, mutilated, mutation, mutated, out of frame, bad anatomy, disfigured, deformed, censored, low res, watermark, text, poorly drawn face, signature",
       "steps": 30,

diff --git a/cli/generate.py b/cli/generate.py
@@ -230,7 +230,7 @@ def args(): # parse cmd arguments
     parser.add_argument('--style', type = str, default = 'random', required = False, help = 'image style, used to guide dynamic prompt when prompt is not provided')
     parser.add_argument('--suffix', type = str, default = 'random', required = False, help = 'style suffix, used to guide dynamic prompt when prompt is not provided')
     parser.add_argument('--place', type = str, default = 'random', required = False, help = 'place locator, used to guide dynamic prompt when prompt is not provided')
-    parser.add_argument('--faces', default = False, action='store_true', help = 'restore faces during upscaling')
+    parser.add_argument('--detailer', default = False, action='store_true', help = 'run detailer')
     parser.add_argument('--steps', type = int, default = 0, required = False, help = 'number of steps')
     parser.add_argument('--batch', type = int, default = 0, required = False, help = 'batch size, limited by gpu vram')
     parser.add_argument('--n', type = int, default = 0, required = False, help = 'number of iterations')
@@ -299,7 +299,7 @@ def args(): # parse cmd arguments
         _dynamic = prompt(params)
 
     sd.paths.root = params.path if params.path != '' else sd.paths.root
-    sd.generate.restore_faces = params.faces if params.faces is not None else sd.generate.restore_faces
+    sd.generate.detailer = params.detailer if params.detailer is not None else sd.generate.detailer
     sd.generate.seed = params.seed if params.seed > 0 else sd.generate.seed
     sd.generate.sampler_name = params.sampler if params.sampler != 'random' else sd.generate.sampler_name
     sd.generate.batch_size = params.batch if params.batch > 0 else sd.generate.batch_size
@@ -309,7 +309,7 @@ def args(): # parse cmd arguments
     sd.generate.height = params.height if params.height > 0 else sd.generate.height
     sd.generate.steps = params.steps if params.steps > 0 else sd.generate.steps
     sd.upscale.upscaling_resize = params.upscale if params.upscale > 0 else sd.upscale.upscaling_resize
-    sd.upscale.codeformer_visibility = 1 if params.faces else sd.upscale.codeformer_visibility
+    sd.upscale.codeformer_visibility = 1 if params.detailer else sd.upscale.codeformer_visibility
     sd.options.sd_vae = params.vae if params.vae != '' else sd.options.sd_vae
     sd.options.sd_model_checkpoint = params.model if params.model != '' else sd.options.sd_model_checkpoint
     sd.upscale.upscaler_1 = 'SwinIR_4x' if params.upscale > 1 else sd.upscale.upscaler_1

diff --git a/cli/image-exif.py b/cli/image-exif.py
@@ -120,3 +120,5 @@ def read_exif(filename: str):
             for root, _dirs, files in os.walk(fn):
                 for file in files:
                     read_exif(os.path.join(root, file))
+        else:
+            print('file not found: ', fn)
diff --git a/cli/load_unet.py b/cli/load_unet.py
@@ -0,0 +1,89 @@
+import torch
+import diffusers
+
+
+class StateDictStats():
+    cls: str = None
+    device: torch.device = None
+    params: int = 0
+    weights: dict = {}
+    dtypes: dict = {}
+    config: dict = None
+
+    def __repr__(self):
+        return f'cls={self.cls} params={self.params} weights={self.weights} device={self.device} dtypes={self.dtypes} config={self.config is not None}'
+
+
+def set_module_tensor(
+    module: torch.nn.Module,
+    name: str,
+    value: torch.Tensor,
+    stats: StateDictStats,
+    device: torch.device = None,
+    dtype: torch.dtype = None,
+):
+    if "." in name:
+        splits = name.split(".")
+        for split in splits[:-1]:
+            module = getattr(module, split)
+        name = splits[-1]
+    old_value = getattr(module, name)
+    with torch.no_grad():
+        if value.dtype not in stats.dtypes:
+            stats.dtypes[value.dtype] = 0
+        stats.dtypes[value.dtype] += 1
+        if name in module._buffers: # pylint: disable=protected-access
+            module._buffers[name] = value.to(device=device, dtype=dtype, non_blocking=True) # pylint: disable=protected-access
+            if 'buffers' not in stats.weights:
+                stats.weights['buffers'] = 0
+            stats.weights['buffers'] += 1
+        elif value is not None:
+            param_cls = type(module._parameters[name]) # pylint: disable=protected-access
+            module._parameters[name] = param_cls(value, requires_grad=old_value.requires_grad).to(device, dtype=dtype, non_blocking=True) # pylint: disable=protected-access
+            if 'parameters' not in stats.weights:
+                stats.weights['parameters'] = 0
+            stats.weights['parameters'] += 1
+
+
+def load_unet(config_file: str, state_dict: dict, device: torch.device = None, dtype: torch.dtype = None):
+    # same can be done for other modules or even for entire model by loading model config and then walking through its modules
+    from accelerate import init_empty_weights
+    with init_empty_weights():
+        stats = StateDictStats()
+        stats.device = device
+        stats.config = diffusers.UNet2DConditionModel.load_config(config_file)
+        unet = diffusers.UNet2DConditionModel.from_config(stats.config)
+        stats.cls = unet.__class__.__name__
+        expected_state_dict_keys = list(unet.state_dict().keys())
+        stats.weights['expected'] = len(expected_state_dict_keys)
+    for param_name, param in state_dict.items():
+        if param_name not in expected_state_dict_keys:
+            if 'unknown' not in stats.weights:
+                stats.weights['unknown'] = 0
+            stats.weights['unknown'] += 1
+            continue
+        set_module_tensor(unet, name=param_name, value=param, device=device, dtype=dtype, stats=stats)
+        state_dict[param_name] = None # unload as we initialize the model so we dont consume double the memory
+    stats.params = sum(p.numel() for p in unet.parameters(recurse=True))
+    return unet, stats
+
+
+def load_safetensors(fn: str):
+    import safetensors.torch
+    state_dict = safetensors.torch.load_file(fn, device='cpu') # state dict should always be loaded to cpu
+    return state_dict
+
+
+if __name__ == "__main__":
+    # need pipe already present to load unet state_dict into or we could load unet first and then manually create pipe with params
+    pipe = diffusers.StableDiffusionXLPipeline.from_single_file('/mnt/models/stable-diffusion/sdxl/TempestV0.1-Artistic.safetensors', cache_dir='/mnt/models/huggingface')
+    # this could be kept in memory so we dont have to reload it
+    dct = load_safetensors('/mnt/models/UNET/dpo-sdxl-text2image.safetensors')
+    pipe.unet, s = load_unet(
+        config_file = 'configs/sdxl/unet/config.json', # can also point to online hf model with subfolder
+        state_dict = dct,
+        device = torch.device('cpu'), # can leave out to use default device
+        dtype = torch.bfloat16, # can leave out to use default dtype, especially for mixed precision modules
+    )
+    from rich import print as rprint
+    rprint(f'Stats: {s}')
diff --git a/cli/model-metadata.py b/cli/model-metadata.py
@@ -7,22 +7,27 @@
 
 def read_metadata(fn):
     res = {}
+    if not fn.lower().endswith(".safetensors"):
+        return
     with open(fn, mode="rb") as f:
-        metadata_len = f.read(8)
-        metadata_len = int.from_bytes(metadata_len, "little")
-        json_start = f.read(2)
-        if metadata_len <= 2 or json_start not in (b'{"', b"{'"):
-            print(f"Not a valid safetensors file: {fn}")
-        json_data = json_start + f.read(metadata_len-2)
-        json_obj = json.loads(json_data)
-        for k, v in json_obj.get("__metadata__", {}).items():
-            res[k] = v
-            if isinstance(v, str) and v[0:1] == '{':
-                try:
-                    res[k] = json.loads(v)
-                except Exception:
-                    pass
-    print(f"{fn}: {json.dumps(res, indent=4)}")
+        try:
+            metadata_len = f.read(8)
+            metadata_len = int.from_bytes(metadata_len, "little")
+            json_start = f.read(2)
+            if metadata_len <= 2 or json_start not in (b'{"', b"{'"):
+                print(f"Not a valid safetensors file: {fn}")
+            json_data = json_start + f.read(metadata_len-2)
+            json_obj = json.loads(json_data)
+            for k, v in json_obj.get("__metadata__", {}).items():
+                res[k] = v
+                if isinstance(v, str) and v[0:1] == '{':
+                    try:
+                        res[k] = json.loads(v)
+                    except Exception:
+                        pass
+            print(f"{fn}: {json.dumps(res, indent=4)}")
+        except Exception:
+            print(f"{fn}: cannot read metadata")
 
 
 def main():