From 24f0893d3c81eb09b2e8c91793329d45adc27570 Mon Sep 17 00:00:00 2001
From: Kaz Nishimura <kazssym@linuxfront.com>
Date: Mon, 11 Sep 2023 08:50:50 +0900
Subject: [PATCH] Enable optimize_by_onnxruntime call for float32 unet model
 (#17483)

This makes it possible to call `optimize_by_onnxruntime` for float32 unet if `--use_external_data_format` is also used.

### Motivation and Context
When using `optimize_pipeline.py` without `--float16`, `optimize_by_onnxruntime` was not called for unet.
---
 .../models/stable_diffusion/optimize_pipeline.py    | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py
index a8e3c6933233..22fee4bfeab2 100644
--- a/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py
+++ b/onnxruntime/python/tools/transformers/models/stable_diffusion/optimize_pipeline.py
@@ -150,18 +150,19 @@ def optimize_sd_pipeline(
                 op_block_list=op_block_list + force_fp32_operators[name],
             )
 
-        if enable_runtime_optimization and (float16 or (name not in ["unet"])):
+        if enable_runtime_optimization:
             # Use this step to see the final graph that executed by Onnx Runtime.
-            # Note that ORT cannot save model larger than 2GB so we exclude unet float32 model.
-            # This step is optional since it has no impact on performance except model loading time.
             with tempfile.TemporaryDirectory() as tmp_dir:
                 # Save to a temporary file so that we can load it with Onnx Runtime.
                 logger.info("Saving a temporary model to run OnnxRuntime graph optimizations...")
                 tmp_model_path = Path(tmp_dir) / "model.onnx"
-                m.save_model_to_file(str(tmp_model_path))
-                ort_optimized_model_path = tmp_model_path
+                m.save_model_to_file(str(tmp_model_path), use_external_data_format=use_external_data_format)
+                ort_optimized_model_path = Path(tmp_dir) / "optimized.onnx"
                 optimize_by_onnxruntime(
-                    str(tmp_model_path), use_gpu=True, optimized_model_path=str(ort_optimized_model_path)
+                    str(tmp_model_path),
+                    use_gpu=True,
+                    optimized_model_path=str(ort_optimized_model_path),
+                    save_as_external_data=use_external_data_format,
                 )
                 model = onnx.load(str(ort_optimized_model_path), load_external_data=True)
                 m = model_type_class_mapping[model_type](model)