-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Flux] Add advanced training script + support textual inversion infer…
…ence (#9434) * add ostris trainer to README & add cache latents of vae * add ostris trainer to README & add cache latents of vae * style * readme * add test for latent caching * add ostris noise scheduler https://github.com/ostris/ai-toolkit/blob/9ee1ef2a0a2a9a02b92d114a95f21312e5906e54/toolkit/samplers/custom_flowmatch_sampler.py#L95 * style * fix import * style * fix tests * style * --change upcasting of transformer? * update readme according to main * add pivotal tuning for CLIP * fix imports, encode_prompt call,add TextualInversionLoaderMixin to FluxPipeline for inference * TextualInversionLoaderMixin support for FluxPipeline for inference * move changes to advanced flux script, revert canonical * add latent caching to canonical script * revert changes to canonical script to keep it separate from #9160 * revert changes to canonical script to keep it separate from #9160 * style * remove redundant line and change code block placement to align with logic * add initializer_token arg * add transformer frac for range support from pure textual inversion to the orig pivotal tuning * support pure textual inversion - wip * adjustments to support pure textual inversion and transformer optimization in only part of the epochs * fix logic when using initializer token * fix pure_textual_inversion_condition * fix ti/pivotal loading of last validation run * remove embeddings loading for ti in final training run (to avoid adding huggingface hub dependency) * support pivotal for t5 * adapt pivotal for T5 encoder * adapt pivotal for T5 encoder and support in flux pipeline * t5 pivotal support + support fo pivotal for clip only or both * fix param chaining * fix param chaining * README first draft * readme * readme * readme * style * fix import * style * add fix from #9419 * add to readme, change function names * te lr changes * readme * change concept tokens logic * fix indices * change arg name * style * dummy test * revert dummy test * reorder pivoting * add warning in case the token abstraction is not the instance prompt * experimental - wip - specific block training * fix documentation and token abstraction processing * remove transformer block specification feature (for now) * style * fix copies * fix indexing issue when --initializer_concept has different amounts * add if TextualInversionLoaderMixin to all flux pipelines * style * fix import * fix imports * address review comments - remove necessary prints & comments, use pin_memory=True, use free_memory utils, unify warning and prints * style * logger info fix * make lora target modules configurable and change the default * make lora target modules configurable and change the default * style * make lora target modules configurable and change the default, add notes to readme * style * add tests * style * fix repo id * add updated requirements for advanced flux * fix indices of t5 pivotal tuning embeddings * fix path in test * remove `pin_memory` * fix filename of embedding * fix filename of embedding --------- Co-authored-by: Sayak Paul <spsayakpaul@gmail.com> Co-authored-by: YiYi Xu <yixu310@gmail.com>
- Loading branch information
1 parent
d9029f2
commit 9a7f824
Showing
10 changed files
with
3,155 additions
and
7 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
accelerate>=0.31.0 | ||
torchvision | ||
transformers>=4.41.2 | ||
ftfy | ||
tensorboard | ||
Jinja2 | ||
peft>=0.11.1 | ||
sentencepiece |
283 changes: 283 additions & 0 deletions
283
examples/advanced_diffusion_training/test_dreambooth_lora_flux_advanced.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,283 @@ | ||
# coding=utf-8 | ||
# Copyright 2024 HuggingFace Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import logging | ||
import os | ||
import sys | ||
import tempfile | ||
|
||
import safetensors | ||
|
||
|
||
sys.path.append("..") | ||
from test_examples_utils import ExamplesTestsAccelerate, run_command # noqa: E402 | ||
|
||
|
||
logging.basicConfig(level=logging.DEBUG) | ||
|
||
logger = logging.getLogger() | ||
stream_handler = logging.StreamHandler(sys.stdout) | ||
logger.addHandler(stream_handler) | ||
|
||
|
||
class DreamBoothLoRAFluxAdvanced(ExamplesTestsAccelerate): | ||
instance_data_dir = "docs/source/en/imgs" | ||
instance_prompt = "photo" | ||
pretrained_model_name_or_path = "hf-internal-testing/tiny-flux-pipe" | ||
script_path = "examples/advanced_diffusion_training/train_dreambooth_lora_flux_advanced.py" | ||
|
||
def test_dreambooth_lora_flux(self): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
test_args = f""" | ||
{self.script_path} | ||
--pretrained_model_name_or_path {self.pretrained_model_name_or_path} | ||
--instance_data_dir {self.instance_data_dir} | ||
--instance_prompt {self.instance_prompt} | ||
--resolution 64 | ||
--train_batch_size 1 | ||
--gradient_accumulation_steps 1 | ||
--max_train_steps 2 | ||
--learning_rate 5.0e-04 | ||
--scale_lr | ||
--lr_scheduler constant | ||
--lr_warmup_steps 0 | ||
--output_dir {tmpdir} | ||
""".split() | ||
|
||
run_command(self._launch_args + test_args) | ||
# save_pretrained smoke test | ||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))) | ||
|
||
# make sure the state_dict has the correct naming in the parameters. | ||
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")) | ||
is_lora = all("lora" in k for k in lora_state_dict.keys()) | ||
self.assertTrue(is_lora) | ||
|
||
# when not training the text encoder, all the parameters in the state dict should start | ||
# with `"transformer"` in their names. | ||
starts_with_transformer = all(key.startswith("transformer") for key in lora_state_dict.keys()) | ||
self.assertTrue(starts_with_transformer) | ||
|
||
def test_dreambooth_lora_text_encoder_flux(self): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
test_args = f""" | ||
{self.script_path} | ||
--pretrained_model_name_or_path {self.pretrained_model_name_or_path} | ||
--instance_data_dir {self.instance_data_dir} | ||
--instance_prompt {self.instance_prompt} | ||
--resolution 64 | ||
--train_batch_size 1 | ||
--train_text_encoder | ||
--gradient_accumulation_steps 1 | ||
--max_train_steps 2 | ||
--learning_rate 5.0e-04 | ||
--scale_lr | ||
--lr_scheduler constant | ||
--lr_warmup_steps 0 | ||
--output_dir {tmpdir} | ||
""".split() | ||
|
||
run_command(self._launch_args + test_args) | ||
# save_pretrained smoke test | ||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))) | ||
|
||
# make sure the state_dict has the correct naming in the parameters. | ||
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")) | ||
is_lora = all("lora" in k for k in lora_state_dict.keys()) | ||
self.assertTrue(is_lora) | ||
|
||
starts_with_expected_prefix = all( | ||
(key.startswith("transformer") or key.startswith("text_encoder")) for key in lora_state_dict.keys() | ||
) | ||
self.assertTrue(starts_with_expected_prefix) | ||
|
||
def test_dreambooth_lora_pivotal_tuning_flux_clip(self): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
test_args = f""" | ||
{self.script_path} | ||
--pretrained_model_name_or_path {self.pretrained_model_name_or_path} | ||
--instance_data_dir {self.instance_data_dir} | ||
--instance_prompt {self.instance_prompt} | ||
--resolution 64 | ||
--train_batch_size 1 | ||
--train_text_encoder_ti | ||
--gradient_accumulation_steps 1 | ||
--max_train_steps 2 | ||
--learning_rate 5.0e-04 | ||
--scale_lr | ||
--lr_scheduler constant | ||
--lr_warmup_steps 0 | ||
--output_dir {tmpdir} | ||
""".split() | ||
|
||
run_command(self._launch_args + test_args) | ||
# save_pretrained smoke test | ||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))) | ||
# make sure embeddings were also saved | ||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, f"{os.path.basename(tmpdir)}_emb.safetensors"))) | ||
|
||
# make sure the state_dict has the correct naming in the parameters. | ||
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")) | ||
is_lora = all("lora" in k for k in lora_state_dict.keys()) | ||
self.assertTrue(is_lora) | ||
|
||
# make sure the state_dict has the correct naming in the parameters. | ||
textual_inversion_state_dict = safetensors.torch.load_file( | ||
os.path.join(tmpdir, f"{os.path.basename(tmpdir)}_emb.safetensors") | ||
) | ||
is_clip = all("clip_l" in k for k in textual_inversion_state_dict.keys()) | ||
self.assertTrue(is_clip) | ||
|
||
# when performing pivotal tuning, all the parameters in the state dict should start | ||
# with `"transformer"` in their names. | ||
starts_with_transformer = all(key.startswith("transformer") for key in lora_state_dict.keys()) | ||
self.assertTrue(starts_with_transformer) | ||
|
||
def test_dreambooth_lora_pivotal_tuning_flux_clip_t5(self): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
test_args = f""" | ||
{self.script_path} | ||
--pretrained_model_name_or_path {self.pretrained_model_name_or_path} | ||
--instance_data_dir {self.instance_data_dir} | ||
--instance_prompt {self.instance_prompt} | ||
--resolution 64 | ||
--train_batch_size 1 | ||
--train_text_encoder_ti | ||
--enable_t5_ti | ||
--gradient_accumulation_steps 1 | ||
--max_train_steps 2 | ||
--learning_rate 5.0e-04 | ||
--scale_lr | ||
--lr_scheduler constant | ||
--lr_warmup_steps 0 | ||
--output_dir {tmpdir} | ||
""".split() | ||
|
||
run_command(self._launch_args + test_args) | ||
# save_pretrained smoke test | ||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))) | ||
# make sure embeddings were also saved | ||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, f"{os.path.basename(tmpdir)}_emb.safetensors"))) | ||
|
||
# make sure the state_dict has the correct naming in the parameters. | ||
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")) | ||
is_lora = all("lora" in k for k in lora_state_dict.keys()) | ||
self.assertTrue(is_lora) | ||
|
||
# make sure the state_dict has the correct naming in the parameters. | ||
textual_inversion_state_dict = safetensors.torch.load_file( | ||
os.path.join(tmpdir, f"{os.path.basename(tmpdir)}_emb.safetensors") | ||
) | ||
is_te = all(("clip_l" in k or "t5" in k) for k in textual_inversion_state_dict.keys()) | ||
self.assertTrue(is_te) | ||
|
||
# when performing pivotal tuning, all the parameters in the state dict should start | ||
# with `"transformer"` in their names. | ||
starts_with_transformer = all(key.startswith("transformer") for key in lora_state_dict.keys()) | ||
self.assertTrue(starts_with_transformer) | ||
|
||
def test_dreambooth_lora_latent_caching(self): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
test_args = f""" | ||
{self.script_path} | ||
--pretrained_model_name_or_path {self.pretrained_model_name_or_path} | ||
--instance_data_dir {self.instance_data_dir} | ||
--instance_prompt {self.instance_prompt} | ||
--resolution 64 | ||
--train_batch_size 1 | ||
--gradient_accumulation_steps 1 | ||
--max_train_steps 2 | ||
--cache_latents | ||
--learning_rate 5.0e-04 | ||
--scale_lr | ||
--lr_scheduler constant | ||
--lr_warmup_steps 0 | ||
--output_dir {tmpdir} | ||
""".split() | ||
|
||
run_command(self._launch_args + test_args) | ||
# save_pretrained smoke test | ||
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))) | ||
|
||
# make sure the state_dict has the correct naming in the parameters. | ||
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")) | ||
is_lora = all("lora" in k for k in lora_state_dict.keys()) | ||
self.assertTrue(is_lora) | ||
|
||
# when not training the text encoder, all the parameters in the state dict should start | ||
# with `"transformer"` in their names. | ||
starts_with_transformer = all(key.startswith("transformer") for key in lora_state_dict.keys()) | ||
self.assertTrue(starts_with_transformer) | ||
|
||
def test_dreambooth_lora_flux_checkpointing_checkpoints_total_limit(self): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
test_args = f""" | ||
{self.script_path} | ||
--pretrained_model_name_or_path={self.pretrained_model_name_or_path} | ||
--instance_data_dir={self.instance_data_dir} | ||
--output_dir={tmpdir} | ||
--instance_prompt={self.instance_prompt} | ||
--resolution=64 | ||
--train_batch_size=1 | ||
--gradient_accumulation_steps=1 | ||
--max_train_steps=6 | ||
--checkpoints_total_limit=2 | ||
--checkpointing_steps=2 | ||
""".split() | ||
|
||
run_command(self._launch_args + test_args) | ||
|
||
self.assertEqual( | ||
{x for x in os.listdir(tmpdir) if "checkpoint" in x}, | ||
{"checkpoint-4", "checkpoint-6"}, | ||
) | ||
|
||
def test_dreambooth_lora_flux_checkpointing_checkpoints_total_limit_removes_multiple_checkpoints(self): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
test_args = f""" | ||
{self.script_path} | ||
--pretrained_model_name_or_path={self.pretrained_model_name_or_path} | ||
--instance_data_dir={self.instance_data_dir} | ||
--output_dir={tmpdir} | ||
--instance_prompt={self.instance_prompt} | ||
--resolution=64 | ||
--train_batch_size=1 | ||
--gradient_accumulation_steps=1 | ||
--max_train_steps=4 | ||
--checkpointing_steps=2 | ||
""".split() | ||
|
||
run_command(self._launch_args + test_args) | ||
|
||
self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-2", "checkpoint-4"}) | ||
|
||
resume_run_args = f""" | ||
{self.script_path} | ||
--pretrained_model_name_or_path={self.pretrained_model_name_or_path} | ||
--instance_data_dir={self.instance_data_dir} | ||
--output_dir={tmpdir} | ||
--instance_prompt={self.instance_prompt} | ||
--resolution=64 | ||
--train_batch_size=1 | ||
--gradient_accumulation_steps=1 | ||
--max_train_steps=8 | ||
--checkpointing_steps=2 | ||
--resume_from_checkpoint=checkpoint-4 | ||
--checkpoints_total_limit=2 | ||
""".split() | ||
|
||
run_command(self._launch_args + resume_run_args) | ||
|
||
self.assertEqual({x for x in os.listdir(tmpdir) if "checkpoint" in x}, {"checkpoint-6", "checkpoint-8"}) |
Oops, something went wrong.