mindspore-lab · vigo999 · Sep 27, 2024 · Aug 28, 2024 · Aug 28, 2024 · Sep 2, 2024
@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.
 
 # Installation
 
-🤗 Diffusers is tested on Python 3.8+, MindSpore 2.2.10+. Follow the installation instructions below for the deep learning library you are using:
+🤗 Diffusers is tested on Python 3.8+, MindSpore 2.3+. Follow the installation instructions below for the deep learning library you are using:
 
 - [MindSpore](https://www.mindspore.cn/install) installation instructions
 

@@ -18,6 +18,15 @@ Due to differences in framework, some APIs & models will not be identical to [hu
 Unlike the output `posterior = DiagonalGaussianDistribution(latent)`, which can do sampling by `posterior.sample()`.
 We can only output the `latent` and then do sampling through `AutoencoderKL.diag_gauss_dist.sample(latent)`.
 
+### `self.config` in `construct()`
+
+For many models, parameters used in initialization will be registered in `self.config`. They are often accessed during the `construct` like using `if self.config.xxx == xxx` to determine execution paths in origin 🤗diffusers. However getting attributes like this is not supported by static graph syntax of MindSpore. Two feasible replacement options are
+
+- set new attributes in initialization for `self` like `self.xxx = self.config.xxx`, then use `self.xxx` in `construct` instead.
+- use `self.config["xxx"]` as `self.config` is an `OrderedDict` and getting items like this is supported in static graph mode.
+
+When `self.config.xxx` changed, we change `self.xxx` and `self.config["xxx"]` both.
+
 ## Models
 
 The table below represents the current support in mindone/diffusers for each of those modules, whether they have support in Pynative fp16 mode, Graph fp16 mode, Pynative fp32 mode or Graph fp32 mode.
@@ -58,7 +67,6 @@ The table below represents the current support in mindone/diffusers for each of
 The table below represents the current support in mindone/diffusers for each of those pipelines in **MindSpore 2.3.0**,
 whether they have support in Pynative fp16 mode, Graph fp16 mode, Pynative fp32 mode or Graph fp32 mode.
 
-> Hint: Due to the precision issue with GroupNorm affecting almost all pipelines under FP16, leading to inference
 > precision issues of pipelines, the experiments in the table below default to upcasting GroupNorm to FP32 to avoid
 > this issue.
 

@@ -1,4 +1,4 @@
-__version__ = "0.27.1"
+__version__ = "0.29.2"
 
 from typing import TYPE_CHECKING
 
@@ -13,28 +13,36 @@
 
 _import_structure = {
     "configuration_utils": ["ConfigMixin"],
+    "loaders": ["FromOriginalModelMixin"],
     "models": [
         "AsymmetricAutoencoderKL",
         "AutoencoderKL",
         "AutoencoderKLTemporalDecoder",
         "AutoencoderTiny",
         "ConsistencyDecoderVAE",
         "ControlNetModel",
+        "ControlNetXSAdapter",
+        "DiTTransformer2DModel",
+        "HunyuanDiT2DModel",
         "I2VGenXLUNet",
         "Kandinsky3UNet",
         "ModelMixin",
         "MotionAdapter",
         "MultiAdapter",
+        "PixArtTransformer2DModel",
         "PriorTransformer",
+        "SD3ControlNetModel",
+        "SD3MultiControlNetModel",
+        "SD3Transformer2DModel",
         "T2IAdapter",
         "T5FilmDecoder",
         "Transformer2DModel",
-        "SD3Transformer2DModel",
         "StableCascadeUNet",
         "UNet1DModel",
         "UNet2DConditionModel",
         "UNet2DModel",
         "UNet3DConditionModel",
+        "UNetControlNetXSModel",
         "UNetMotionModel",
         "UNetSpatioTemporalConditionModel",
         "UVit2DModel",
@@ -51,6 +59,7 @@
     ],
     "pipelines": [
         "AnimateDiffPipeline",
+        "AnimateDiffSDXLPipeline",
         "AnimateDiffVideoToVideoPipeline",
         "BlipDiffusionControlNetPipeline",
         "BlipDiffusionPipeline",
@@ -60,6 +69,7 @@
         "DDPMPipeline",
         "DiffusionPipeline",
         "DiTPipeline",
+        "HunyuanDiTPipeline",
         "I2VGenXLPipeline",
         "IFImg2ImgPipeline",
         "IFImg2ImgSuperResolutionPipeline",
@@ -90,17 +100,23 @@
         "LatentConsistencyModelPipeline",
         "LDMSuperResolutionPipeline",
         "LDMTextToImagePipeline",
+        "MarigoldDepthPipeline",
+        "MarigoldNormalsPipeline",
         "PixArtAlphaPipeline",
+        "PixArtSigmaPipeline",
         "ShapEImg2ImgPipeline",
         "ShapEPipeline",
         "StableCascadeCombinedPipeline",
         "StableCascadeDecoderPipeline",
         "StableCascadePriorPipeline",
+        "StableDiffusion3ControlNetPipeline",
+        "StableDiffusion3Img2ImgPipeline",
         "StableDiffusion3Pipeline",
         "StableDiffusionAdapterPipeline",
         "StableDiffusionControlNetImg2ImgPipeline",
         "StableDiffusionControlNetInpaintPipeline",
         "StableDiffusionControlNetPipeline",
+        "StableDiffusionControlNetXSPipeline",
         "StableDiffusionDepth2ImgPipeline",
         "StableDiffusionDiffEditPipeline",
         "StableDiffusionGLIGENPipeline",
@@ -116,6 +132,7 @@
         "StableDiffusionXLControlNetImg2ImgPipeline",
         "StableDiffusionXLControlNetInpaintPipeline",
         "StableDiffusionXLControlNetPipeline",
+        "StableDiffusionXLControlNetXSPipeline",
         "StableDiffusionXLImg2ImgPipeline",
         "StableDiffusionXLInpaintPipeline",
         "StableDiffusionXLInstructPix2PixPipeline",
@@ -172,12 +189,18 @@
         AutoencoderTiny,
         ConsistencyDecoderVAE,
         ControlNetModel,
+        ControlNetXSAdapter,
+        DiTTransformer2DModel,
+        HunyuanDiT2DModel,
         I2VGenXLUNet,
         Kandinsky3UNet,
         ModelMixin,
         MotionAdapter,
         MultiAdapter,
+        PixArtTransformer2DModel,
         PriorTransformer,
+        SD3ControlNetModel,
+        SD3MultiControlNetModel,
         SD3Transformer2DModel,
         StableCascadeUNet,
         T2IAdapter,
@@ -187,6 +210,7 @@
         UNet2DConditionModel,
         UNet2DModel,
         UNet3DConditionModel,
+        UNetControlNetXSModel,
         UNetMotionModel,
         UNetSpatioTemporalConditionModel,
         UVit2DModel,
@@ -203,6 +227,7 @@
     )
     from .pipelines import (
         AnimateDiffPipeline,
+        AnimateDiffSDXLPipeline,
         AnimateDiffVideoToVideoPipeline,
         BlipDiffusionControlNetPipeline,
         BlipDiffusionPipeline,
@@ -211,6 +236,7 @@
         DDPMPipeline,
         DiffusionPipeline,
         DiTPipeline,
+        HunyuanDiTPipeline,
         I2VGenXLPipeline,
         IFImg2ImgPipeline,
         IFImg2ImgSuperResolutionPipeline,
@@ -241,17 +267,23 @@
         LatentConsistencyModelPipeline,
         LDMSuperResolutionPipeline,
         LDMTextToImagePipeline,
+        MarigoldDepthPipeline,
+        MarigoldNormalsPipeline,
         PixArtAlphaPipeline,
+        PixArtSigmaPipeline,
         ShapEImg2ImgPipeline,
         ShapEPipeline,
         StableCascadeCombinedPipeline,
         StableCascadeDecoderPipeline,
         StableCascadePriorPipeline,
+        StableDiffusion3ControlNetPipeline,
+        StableDiffusion3Img2ImgPipeline,
         StableDiffusion3Pipeline,
         StableDiffusionAdapterPipeline,
         StableDiffusionControlNetImg2ImgPipeline,
         StableDiffusionControlNetInpaintPipeline,
         StableDiffusionControlNetPipeline,
+        StableDiffusionControlNetXSPipeline,
         StableDiffusionDepth2ImgPipeline,
         StableDiffusionDiffEditPipeline,
         StableDiffusionGLIGENPipeline,
@@ -267,6 +299,7 @@
         StableDiffusionXLControlNetImg2ImgPipeline,
         StableDiffusionXLControlNetInpaintPipeline,
         StableDiffusionXLControlNetPipeline,
+        StableDiffusionXLControlNetXSPipeline,
         StableDiffusionXLImg2ImgPipeline,
         StableDiffusionXLInpaintPipeline,
         StableDiffusionXLInstructPix2PixPipeline,

@@ -0,0 +1,156 @@
+from typing import Any, Dict, List
+
+from .configuration_utils import ConfigMixin, register_to_config
+from .utils import CONFIG_NAME
+
+
+class PipelineCallback(ConfigMixin):
+    """
+    Base class for all the official callbacks used in a pipeline. This class provides a structure for implementing
+    custom callbacks and ensures that all callbacks have a consistent interface.
+
+    Please implement the following:
+        `tensor_inputs`: This should return a list of tensor inputs specific to your callback. You will only be able to
+        include
+            variables listed in the `._callback_tensor_inputs` attribute of your pipeline class.
+        `callback_fn`: This method defines the core functionality of your callback.
+    """
+
+    config_name = CONFIG_NAME
+
+    @register_to_config
+    def __init__(self, cutoff_step_ratio=1.0, cutoff_step_index=None):
+        super().__init__()
+
+        if (cutoff_step_ratio is None and cutoff_step_index is None) or (
+            cutoff_step_ratio is not None and cutoff_step_index is not None
+        ):
+            raise ValueError("Either cutoff_step_ratio or cutoff_step_index should be provided, not both or none.")
+
+        if cutoff_step_ratio is not None and (
+            not isinstance(cutoff_step_ratio, float) or not (0.0 <= cutoff_step_ratio <= 1.0)
+        ):
+            raise ValueError("cutoff_step_ratio must be a float between 0.0 and 1.0.")
+
+    @property
+    def tensor_inputs(self) -> List[str]:
+        raise NotImplementedError(f"You need to set the attribute `tensor_inputs` for {self.__class__}")
+
+    def callback_fn(self, pipeline, step_index, timesteps, callback_kwargs) -> Dict[str, Any]:
+        raise NotImplementedError(f"You need to implement the method `callback_fn` for {self.__class__}")
+
+    def __call__(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
+        return self.callback_fn(pipeline, step_index, timestep, callback_kwargs)
+
+
+class MultiPipelineCallbacks:
+    """
+    This class is designed to handle multiple pipeline callbacks. It accepts a list of PipelineCallback objects and
+    provides a unified interface for calling all of them.
+    """
+
+    def __init__(self, callbacks: List[PipelineCallback]):
+        self.callbacks = callbacks
+
+    @property
+    def tensor_inputs(self) -> List[str]:
+        return [input for callback in self.callbacks for input in callback.tensor_inputs]
+
+    def __call__(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
+        """
+        Calls all the callbacks in order with the given arguments and returns the final callback_kwargs.
+        """
+        for callback in self.callbacks:
+            callback_kwargs = callback(pipeline, step_index, timestep, callback_kwargs)
+
+        return callback_kwargs
+
+
+class SDCFGCutoffCallback(PipelineCallback):
+    """
+    Callback function for Stable Diffusion Pipelines. After certain number of steps (set by `cutoff_step_ratio` or
+    `cutoff_step_index`), this callback will disable the CFG.
+
+    Note: This callback mutates the pipeline by changing the `_guidance_scale` attribute to 0.0 after the cutoff step.
+    """
+
+    tensor_inputs = ["prompt_embeds"]
+
+    def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
+        cutoff_step_ratio = self.config.cutoff_step_ratio
+        cutoff_step_index = self.config.cutoff_step_index
+
+        # Use cutoff_step_index if it's not None, otherwise use cutoff_step_ratio
+        cutoff_step = (
+            cutoff_step_index if cutoff_step_index is not None else int(pipeline.num_timesteps * cutoff_step_ratio)
+        )
+
+        if step_index == cutoff_step:
+            prompt_embeds = callback_kwargs[self.tensor_inputs[0]]
+            prompt_embeds = prompt_embeds[-1:]  # "-1" denotes the embeddings for conditional text tokens.
+
+            pipeline._guidance_scale = 0.0
+
+            callback_kwargs[self.tensor_inputs[0]] = prompt_embeds
+        return callback_kwargs
+
+
+class SDXLCFGCutoffCallback(PipelineCallback):
+    """
+    Callback function for Stable Diffusion XL Pipelines. After certain number of steps (set by `cutoff_step_ratio` or
+    `cutoff_step_index`), this callback will disable the CFG.
+
+    Note: This callback mutates the pipeline by changing the `_guidance_scale` attribute to 0.0 after the cutoff step.
+    """
+
+    tensor_inputs = ["prompt_embeds", "add_text_embeds", "add_time_ids"]
+
+    def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
+        cutoff_step_ratio = self.config.cutoff_step_ratio
+        cutoff_step_index = self.config.cutoff_step_index
+
+        # Use cutoff_step_index if it's not None, otherwise use cutoff_step_ratio
+        cutoff_step = (
+            cutoff_step_index if cutoff_step_index is not None else int(pipeline.num_timesteps * cutoff_step_ratio)
+        )
+
+        if step_index == cutoff_step:
+            prompt_embeds = callback_kwargs[self.tensor_inputs[0]]
+            prompt_embeds = prompt_embeds[-1:]  # "-1" denotes the embeddings for conditional text tokens.
+
+            add_text_embeds = callback_kwargs[self.tensor_inputs[1]]
+            add_text_embeds = add_text_embeds[-1:]  # "-1" denotes the embeddings for conditional pooled text tokens
+
+            add_time_ids = callback_kwargs[self.tensor_inputs[2]]
+            add_time_ids = add_time_ids[-1:]  # "-1" denotes the embeddings for conditional added time vector
+
+            pipeline._guidance_scale = 0.0
+
+            callback_kwargs[self.tensor_inputs[0]] = prompt_embeds
+            callback_kwargs[self.tensor_inputs[1]] = add_text_embeds
+            callback_kwargs[self.tensor_inputs[2]] = add_time_ids
+        return callback_kwargs
+
+
+class IPAdapterScaleCutoffCallback(PipelineCallback):
+    """
+    Callback function for any pipeline that inherits `IPAdapterMixin`. After certain number of steps (set by
+    `cutoff_step_ratio` or `cutoff_step_index`), this callback will set the IP Adapter scale to `0.0`.
+
+    Note: This callback mutates the IP Adapter attention processors by setting the scale to 0.0 after the cutoff step.
+    """
+
+    tensor_inputs = []
+
+    def callback_fn(self, pipeline, step_index, timestep, callback_kwargs) -> Dict[str, Any]:
+        cutoff_step_ratio = self.config.cutoff_step_ratio
+        cutoff_step_index = self.config.cutoff_step_index
+
+        # Use cutoff_step_index if it's not None, otherwise use cutoff_step_ratio
+        cutoff_step = (
+            cutoff_step_index if cutoff_step_index is not None else int(pipeline.num_timesteps * cutoff_step_ratio)
+        )
+
+        if step_index == cutoff_step:
+            pipeline.set_ip_adapter_scale(0.0)
+        return callback_kwargs