Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump Optimum to 1.22 + Adapt to the SD task refactoring in Optimum main #686

Merged
merged 8 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions docs/source/inference_tutorials/stable_diffusion.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ Here is an example of exporting stable diffusion components with `Optimum` CLI:

```bash
optimum-cli export neuron --model stabilityai/stable-diffusion-2-1-base \
--task stable-diffusion \
--batch_size 1 \
--height 512 `# height in pixels of generated image, eg. 512, 768` \
--width 512 `# width in pixels of generated image, eg. 512, 768` \
Expand Down Expand Up @@ -229,7 +228,6 @@ Here is an example of exporting SDXL components with `Optimum` CLI:

```bash
optimum-cli export neuron --model stabilityai/stable-diffusion-xl-base-1.0 \
--task stable-diffusion-xl \
--batch_size 1 \
--height 1024 `# height in pixels of generated image, eg. 768, 1024` \
--width 1024 `# width in pixels of generated image, eg. 768, 1024` \
Expand Down Expand Up @@ -481,7 +479,7 @@ Here we will compile the [`stabilityai/sdxl-turbo`](https://huggingface.co/stabi
### Compile SDXL Turbo

```bash
optimum-cli export neuron --model stabilityai/sdxl-turbo --task stable-diffusion-xl --batch_size 1 --height 512 --width 512 --auto_cast matmul --auto_cast_type bf16 sdxl_turbo_neuron/
optimum-cli export neuron --model stabilityai/sdxl-turbo --batch_size 1 --height 512 --width 512 --auto_cast matmul --auto_cast_type bf16 sdxl_turbo_neuron/
```

### Text-to-Image
Expand Down Expand Up @@ -562,7 +560,7 @@ We can either compile one or multiple ControlNet via the Optimum CLI or programa
* Export via the Optimum CLI

```bash
optimum-cli export neuron -m runwayml/stable-diffusion-v1-5 --task stable-diffusion --batch_size 1 --height 512 --width 512 --controlnet_ids lllyasviel/sd-controlnet-canny --num_images_per_prompt 1 sd_neuron_controlnet/
optimum-cli export neuron -m runwayml/stable-diffusion-v1-5 --batch_size 1 --height 512 --width 512 --controlnet_ids lllyasviel/sd-controlnet-canny --num_images_per_prompt 1 sd_neuron_controlnet/
```

* Export via Python API
Expand Down
7 changes: 0 additions & 7 deletions optimum/commands/export/neuron.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ def parse_args_neuron(parser: "ArgumentParser"):
f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}."
),
)
optional_group.add_argument(
"--library-name",
type=str,
choices=["transformers", "sentence_transformers"],
default=None,
help=("The library on the model. If not provided, will attempt to infer the local checkpoint's library."),
)
optional_group.add_argument(
"--subfolder",
type=str,
Expand Down
7 changes: 0 additions & 7 deletions optimum/commands/export/neuronx.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,6 @@ def parse_args_neuronx(parser: "ArgumentParser"):
f" {str(list(TasksManager._TRANSFORMERS_TASKS_TO_MODEL_LOADERS.keys()) + list(TasksManager._DIFFUSERS_TASKS_TO_MODEL_LOADERS.keys()))}."
),
)
optional_group.add_argument(
"--library-name",
type=str,
choices=["transformers", "diffusers", "sentence_transformers"],
default=None,
help=("The library of the model." " If not provided, will attempt to infer the local checkpoint's library."),
)
optional_group.add_argument(
"--subfolder",
type=str,
Expand Down
32 changes: 13 additions & 19 deletions optimum/exporters/neuron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def get_submodels_and_neuron_configs(
input_shapes: Dict[str, int],
task: str,
output: Path,
library_name: Optional[str] = None,
library_name: str,
subfolder: str = "",
dynamic_batch_size: bool = False,
model_name_or_path: Optional[Union[str, Path]] = None,
Expand All @@ -276,19 +276,17 @@ def get_submodels_and_neuron_configs(
lora_scales: Optional[Union[float, List[float]]] = None,
controlnet_ids: Optional[Union[str, List[str]]] = None,
):
is_stable_diffusion = "stable-diffusion" in task
is_encoder_decoder = (
getattr(model.config, "is_encoder_decoder", False) if isinstance(model.config, PretrainedConfig) else False
)

if is_stable_diffusion:
if library_name == "diffusers":
# TODO: Enable optional outputs for Stable Diffusion
if output_attentions:
raise ValueError(f"`output_attentions`is not supported by the {task} task yet.")
models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs_for_stable_diffusion(
model=model,
input_shapes=input_shapes,
task=task,
output=output,
dynamic_batch_size=dynamic_batch_size,
submodels=submodels,
Expand Down Expand Up @@ -349,7 +347,6 @@ def _normalize_lora_params(lora_model_ids, lora_weight_names, lora_adapter_names
def _get_submodels_and_neuron_configs_for_stable_diffusion(
model: Union["PreTrainedModel", "DiffusionPipeline"],
input_shapes: Dict[str, int],
task: str,
output: Path,
dynamic_batch_size: bool = False,
submodels: Optional[Dict[str, Union[Path, str]]] = None,
Expand Down Expand Up @@ -387,7 +384,6 @@ def _get_submodels_and_neuron_configs_for_stable_diffusion(
)
models_and_neuron_configs = get_stable_diffusion_models_for_export(
pipeline=model,
task=task,
text_encoder_input_shapes=input_shapes["text_encoder"],
unet_input_shapes=input_shapes["unet"],
vae_encoder_input_shapes=input_shapes["vae_encoder"],
Expand Down Expand Up @@ -470,6 +466,7 @@ def load_models_and_neuron_configs(
trust_remote_code: bool,
subfolder: str,
revision: str,
library_name: str,
force_download: bool,
local_files_only: bool,
token: Optional[Union[bool, str]],
Expand All @@ -481,13 +478,8 @@ def load_models_and_neuron_configs(
controlnet_ids: Optional[Union[str, List[str]]] = None,
output_attentions: bool = False,
output_hidden_states: bool = False,
library_name: Optional[str] = None,
**input_shapes,
):
library_name = TasksManager.infer_library_from_model(
model_name_or_path, subfolder=subfolder, library_name=library_name
)

model_kwargs = {
"task": task,
"model_name_or_path": model_name_or_path,
Expand Down Expand Up @@ -562,6 +554,10 @@ def main_export(
output.parent.mkdir(parents=True)

task = TasksManager.map_from_synonym(task)
if library_name is None:
library_name = TasksManager.infer_library_from_model(
model_name_or_path, revision=revision, cache_dir=cache_dir, token=token
)

models_and_neuron_configs, output_model_names = load_models_and_neuron_configs(
model_name_or_path=model_name_or_path,
Expand All @@ -573,13 +569,13 @@ def main_export(
trust_remote_code=trust_remote_code,
subfolder=subfolder,
revision=revision,
library_name=library_name,
force_download=force_download,
local_files_only=local_files_only,
token=token,
submodels=submodels,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
library_name=library_name,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
Expand All @@ -602,8 +598,7 @@ def main_export(

# Validate compiled model
if do_validation is True:
is_stable_diffusion = "stable-diffusion" in task
if is_stable_diffusion:
if library_name == "diffusers":
# Do not validate vae encoder due to the sampling randomness
neuron_outputs.pop("vae_encoder")
models_and_neuron_configs.pop("vae_encoder", None)
Expand Down Expand Up @@ -672,13 +667,12 @@ def main():
args = parser.parse_args()

task = infer_task(args.task, args.model)
is_stable_diffusion = "stable-diffusion" in task
is_sentence_transformers = args.library_name == "sentence_transformers"
library_name = TasksManager.infer_library_from_model(args.model, cache_dir=args.cache_dir)

if is_stable_diffusion:
if library_name == "diffusers":
input_shapes = normalize_stable_diffusion_input_shapes(args)
submodels = {"unet": args.unet}
elif is_sentence_transformers:
elif library_name == "sentence_transformers":
input_shapes = normalize_sentence_transformers_input_shapes(args)
submodels = None
else:
Expand Down Expand Up @@ -722,7 +716,7 @@ def main():
subfolder=args.subfolder,
do_validation=not args.disable_validation,
submodels=submodels,
library_name=args.library_name,
library_name=library_name,
lora_model_ids=getattr(args, "lora_model_ids", None),
lora_weight_names=getattr(args, "lora_weight_names", None),
lora_adapter_names=getattr(args, "lora_adapter_names", None),
Expand Down
2 changes: 1 addition & 1 deletion optimum/exporters/neuron/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def add_stable_diffusion_compiler_args(config, compiler_args):
compiler_args.append("--enable-fast-loading-neuron-binaries")
# unet or controlnet
if "unet" in identifier or "controlnet" in identifier:
# SDXL unet doesn't support fast loading neuron binaries
# SDXL unet doesn't support fast loading neuron binaries(sdk 2.19.1)
if not getattr(config, "is_sdxl", False):
compiler_args.append("--enable-fast-loading-neuron-binaries")
compiler_args.append("--model-type=unet-inference")
Expand Down
8 changes: 6 additions & 2 deletions optimum/exporters/neuron/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,16 @@ class LevitNeuronConfig(ViTNeuronConfig):
pass


@register_in_tasks_manager("mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation"])
@register_in_tasks_manager(
"mobilenet-v2", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"]
)
class MobileNetV2NeuronConfig(ViTNeuronConfig):
pass


@register_in_tasks_manager("mobilevit", *["feature-extraction", "image-classification", "semantic-segmentation"])
@register_in_tasks_manager(
"mobilevit", *["feature-extraction", "image-classification", "semantic-segmentation", "image-segmentation"]
)
class MobileViTNeuronConfig(ViTNeuronConfig):
pass

Expand Down
40 changes: 22 additions & 18 deletions optimum/exporters/neuron/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,15 @@
f"We found an older version of diffusers {_diffusers_version} but we require diffusers to be >= {DIFFUSERS_MINIMUM_VERSION}. "
"Please update diffusers by running `pip install --upgrade diffusers`"
)
from diffusers import ControlNetModel, UNet2DConditionModel
from diffusers import (
ControlNetModel,
ModelMixin,
StableDiffusionPipeline,
StableDiffusionXLImg2ImgPipeline,
StableDiffusionXLInpaintPipeline,
StableDiffusionXLPipeline,
UNet2DConditionModel,
)
from diffusers.models.attention_processor import Attention


Expand All @@ -61,9 +69,6 @@

from .base import NeuronDefaultConfig

if is_diffusers_available():
from diffusers import ModelMixin, StableDiffusionPipeline, StableDiffusionXLImg2ImgPipeline


def build_stable_diffusion_components_mandatory_shapes(
batch_size: Optional[int] = None,
Expand Down Expand Up @@ -107,8 +112,7 @@ def build_stable_diffusion_components_mandatory_shapes(


def get_stable_diffusion_models_for_export(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
task: str,
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
text_encoder_input_shapes: Dict[str, int],
unet_input_shapes: Dict[str, int],
vae_encoder_input_shapes: Dict[str, int],
Expand All @@ -129,10 +133,8 @@ def get_stable_diffusion_models_for_export(
performance benefit (CLIP text encoder, VAE encoder, VAE decoder, Unet).

Args:
pipeline ([`Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"]`]):
pipeline ([`Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"]`]):
The model to export.
task (`str`):
Task name, should be either "stable-diffusion" or "stable-diffusion-xl".
text_encoder_input_shapes (`Dict[str, int]`):
Static shapes used for compiling text encoder.
unet_input_shapes (`Dict[str, int]`):
Expand Down Expand Up @@ -165,7 +167,6 @@ def get_stable_diffusion_models_for_export(
"""
models_for_export = get_submodels_for_export_stable_diffusion(
pipeline=pipeline,
task=task,
lora_model_ids=lora_model_ids,
lora_weight_names=lora_weight_names,
lora_adapter_names=lora_adapter_names,
Expand Down Expand Up @@ -225,8 +226,10 @@ def get_stable_diffusion_models_for_export(
dynamic_batch_size=dynamic_batch_size,
**unet_input_shapes,
)
if task == "stable-diffusion-xl":
unet_neuron_config.is_sdxl = True
is_stable_diffusion_xl = isinstance(
pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline)
)
unet_neuron_config.is_sdxl = is_stable_diffusion_xl

unet_neuron_config.with_controlnet = True if controlnet_ids else False

Expand Down Expand Up @@ -295,7 +298,7 @@ def get_stable_diffusion_models_for_export(


def _load_lora_weights_to_pipeline(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
lora_model_ids: Optional[Union[str, List[str]]] = None,
weight_names: Optional[Union[str, List[str]]] = None,
adapter_names: Optional[Union[str, List[str]]] = None,
Expand Down Expand Up @@ -349,8 +352,7 @@ def load_controlnets(controlnet_ids: Optional[Union[str, List[str]]] = None):


def get_submodels_for_export_stable_diffusion(
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLImg2ImgPipeline"],
task: str,
pipeline: Union["StableDiffusionPipeline", "StableDiffusionXLPipeline"],
output_hidden_states: bool = False,
lora_model_ids: Optional[Union[str, List[str]]] = None,
lora_weight_names: Optional[Union[str, List[str]]] = None,
Expand All @@ -361,7 +363,9 @@ def get_submodels_for_export_stable_diffusion(
"""
Returns the components of a Stable Diffusion model.
"""
is_sdxl = "xl" in task
is_stable_diffusion_xl = isinstance(
pipeline, (StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline, StableDiffusionXLPipeline)
)

# Lora
pipeline = _load_lora_weights_to_pipeline(
Expand All @@ -380,7 +384,7 @@ def get_submodels_for_export_stable_diffusion(

# Text encoders
if pipeline.text_encoder is not None:
if is_sdxl or output_hidden_states:
if is_stable_diffusion_xl or output_hidden_states:
pipeline.text_encoder.config.output_hidden_states = True
models_for_export.append((DIFFUSION_MODEL_TEXT_ENCODER_NAME, copy.deepcopy(pipeline.text_encoder)))

Expand All @@ -399,7 +403,7 @@ def get_submodels_for_export_stable_diffusion(
# Replace original cross-attention module with custom cross-attention module for better performance
# For applying optimized attention score, we need to set env variable `NEURON_FUSE_SOFTMAX=1`
if os.environ.get("NEURON_FUSE_SOFTMAX") == "1":
if is_sdxl:
if is_stable_diffusion_xl:
logger.info("Applying optimized attention score computation for sdxl.")
Attention.get_attention_scores = get_attention_scores_sdxl
else:
Expand Down
1 change: 1 addition & 0 deletions optimum/neuron/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,7 @@ def _export(
trust_remote_code=trust_remote_code,
subfolder=subfolder,
revision=revision,
library_name=cls.library_name,
force_download=force_download,
local_files_only=local_files_only,
token=token,
Expand Down
6 changes: 1 addition & 5 deletions optimum/neuron/modeling_traced.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ def _export(
config: "PretrainedConfig",
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
library_name: Optional[str] = None,
force_download: bool = False,
cache_dir: Optional[str] = None,
compiler_workdir: Optional[Union[str, Path]] = None,
Expand Down Expand Up @@ -275,7 +274,6 @@ def _export(
if task is None:
task = TasksManager.infer_task_from_model(cls.auto_model_class)
task = TasksManager.map_from_synonym(task)
library_name = TasksManager.infer_library_from_model(model_id, subfolder=subfolder, library_name=library_name)

# Get compilation arguments
if is_neuron_available() and dynamic_batch_size is True and "batch_size" in kwargs_shapes:
Expand Down Expand Up @@ -320,10 +318,9 @@ def _export(
model_name_or_path=model_id,
subfolder=subfolder,
revision=revision,
framework="pt",
library_name=library_name,
cache_dir=cache_dir,
token=token,
framework="pt",
local_files_only=local_files_only,
force_download=force_download,
trust_remote_code=trust_remote_code,
Expand Down Expand Up @@ -361,7 +358,6 @@ def _export(
local_files_only=local_files_only,
token=token,
do_validation=False,
library_name=library_name,
**kwargs_shapes,
)
config = AutoConfig.from_pretrained(save_dir_path)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
INSTALL_REQUIRES = [
"transformers == 4.43.2",
"accelerate == 0.29.2",
"optimum ~= 1.21.0",
"optimum ~= 1.22.0",
"huggingface_hub >= 0.20.1",
"numpy>=1.22.2, <=1.25.2",
"protobuf<4",
Expand Down
Loading
Loading