-
Notifications
You must be signed in to change notification settings - Fork 71
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* lazy_inline to reduce compile time * lazy_inline to reduce compile time * leave large mem for communication, max_device_memory 55GB * fix train_steps bug * fix linting * add lazy_inline to vae encoder/decoder * rm lazy inline for vae due to perf drop * rm lazy_inline in vae * only require decord when backend selected * fix logging * fix logging * x1: rm duplicated norm * x-1: use ops.rms_norm, mint.layer_norm * x-2: rm hs list in vae encode * x-3: use self-impl repeat interleave * fix layernorm * record shape * balance bucket config for A+M * revert repeat interleave for safe * increase bs for 256 res * add shape step time analysis script * fix stop * rm pdb * acc by add Symbol * clear mem in the end of epoch * update doc * impr bucket analysis * add stage3 balanced bucket * fix lint * fix linting * Update README.md * add comments
- Loading branch information
1 parent
2ea7619
commit 7057ddb
Showing
16 changed files
with
373 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
examples/opensora_hpcai/configs/opensora-v1-2/train/train_stage2_ms.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# model | ||
model_version: v1.2 | ||
pretrained_model_path: PATH_TO_YOUR_MODEL | ||
model_max_length: 300 | ||
freeze_y_embedder: True | ||
|
||
noise_scheduler: rflow | ||
sample_method: logit-normal | ||
use_timestep_transform: True | ||
|
||
vae_type: OpenSoraVAE_V1_2 | ||
vae_checkpoint: models/OpenSora-VAE-v1.2/model.ckpt | ||
vae_dtype: bf16 | ||
vae_micro_batch_size: 4 | ||
vae_micro_frame_size: 17 # keep it unchanged for the best results | ||
|
||
enable_flash_attention: True | ||
use_recompute: True | ||
|
||
# data | ||
num_parallel_workers: 2 | ||
num_workers_dataset: 2 | ||
prefetch_size: 2 | ||
max_rowsize: 256 | ||
|
||
# mindspore params, refer to https://www.mindspore.cn/docs/zh-CN/r2.3.1/api_python/mindspore/mindspore.set_context.html | ||
max_device_memory: "59GB" | ||
jit_level: "O1" | ||
manual_pad: True | ||
|
||
# precision | ||
amp_level: "O2" | ||
dtype: bf16 | ||
loss_scaler_type: static | ||
init_loss_scale: 1 | ||
|
||
# training hyper-params | ||
scheduler: "constant" | ||
start_learning_rate: 1.e-4 | ||
end_learning_rate: 1.e-4 | ||
# warmup_steps: 1000 | ||
|
||
clip_grad: True | ||
max_grad_norm: 1.0 | ||
use_ema: True | ||
# ema_decay: 0.99 # default 0.9999 gives better result in our experiments | ||
|
||
optim: "adamw_re" | ||
optim_eps: 1e-15 | ||
weight_decay: 0. | ||
|
||
# epochs: 2 | ||
train_steps: 23000 | ||
ckpt_save_steps: 500 | ||
|
||
mask_ratios: | ||
random: 0.005 | ||
interpolate: 0.002 | ||
quarter_random: 0.007 | ||
quarter_head: 0.002 | ||
quarter_tail: 0.002 | ||
quarter_head_tail: 0.002 | ||
image_random: 0.0 | ||
image_head: 0.22 | ||
image_tail: 0.005 | ||
image_head_tail: 0.005 | ||
|
||
bucket_config: | ||
# Structure: "resolution": { num_frames: [ keep_prob, batch_size ] } | ||
# Setting [ keep_prob, batch_size ] to [ 0.0, 0 ] forces longer videos into smaller resolution buckets | ||
"144p": { 1: [ 1.0, 475 ], 51: [ 1.0, 40 ], 102: [ [ 1.0, 0.33 ], 20 ], 204: [ [ 1.0, 0.1 ], 10 ], 408: [ [ 1.0, 0.1 ], 6 ] } | ||
"256": { 1: [ 0.4, 297 ], 51: [ 0.5, 24 ], 102: [ [ 0.5, 0.33 ], 12 ], 204: [ [ 0.5, 1.0 ], 6 ], 408: [ [ 0.5, 1.0 ], 2 ] } | ||
"240p": { 1: [ 0.3, 297 ], 51: [ 0.4, 16 ], 102: [ [ 0.4, 0.33 ], 8 ], 204: [ [ 0.4, 1.0 ], 4 ], 408: [ [ 0.4, 1.0 ], 2 ] } | ||
"360p": { 1: [ 0.5, 141 ], 51: [ 0.15, 6 ], 102: [ [ 0.3, 0.5 ], 3 ], 204: [ [ 0.3, 1.0 ], 2 ], 408: [ [ 0.5, 0.5 ], 1 ] } | ||
"512": { 1: [ 0.4, 141 ], 51: [ 0.15, 6 ], 102: [ [ 0.2, 0.4 ], 3 ], 204: [ [ 0.2, 1.0 ], 1 ], 408: [ [ 0.4, 0.5 ], 1 ] } | ||
"480p": { 1: [ 0.5, 89 ], 51: [ 0.2, 5 ], 102: [ 0.2, 2 ], 204: [ 0.1, 1 ] } | ||
"720p": { 1: [ 0.1, 36 ], 51: [ 0.03, 1 ] } | ||
"1024": { 1: [ 0.1, 36 ], 51: [ 0.02, 1 ] } | ||
"1080p": { 1: [ 0.01, 5 ] } | ||
"2048": { 1: [ 0.01, 5 ] } | ||
|
||
|
||
# ---------- Validation ---------- | ||
validate: False |
83 changes: 83 additions & 0 deletions
83
examples/opensora_hpcai/configs/opensora-v1-2/train/train_stage3_ms.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# model | ||
model_version: v1.2 | ||
pretrained_model_path: PATH_TO_YOUR_MODEL | ||
model_max_length: 300 | ||
freeze_y_embedder: True | ||
|
||
noise_scheduler: rflow | ||
sample_method: logit-normal | ||
use_timestep_transform: True | ||
|
||
vae_type: OpenSoraVAE_V1_2 | ||
vae_checkpoint: models/OpenSora-VAE-v1.2/model.ckpt | ||
vae_dtype: bf16 | ||
vae_micro_batch_size: 4 | ||
vae_micro_frame_size: 17 # keep it unchanged for the best results | ||
|
||
enable_flash_attention: True | ||
use_recompute: True | ||
|
||
# data | ||
num_parallel_workers: 2 | ||
num_workers_dataset: 2 | ||
prefetch_size: 2 | ||
max_rowsize: 256 | ||
|
||
# precision | ||
amp_level: "O2" | ||
dtype: bf16 | ||
loss_scaler_type: static | ||
init_loss_scale: 1 | ||
|
||
# mindspore params, refer to https://www.mindspore.cn/docs/zh-CN/r2.3.1/api_python/mindspore/mindspore.set_context.html | ||
max_device_memory: "59GB" | ||
jit_level: "O1" | ||
manual_pad: True | ||
|
||
# training hyper-params | ||
scheduler: "constant" | ||
start_learning_rate: 1.e-4 | ||
end_learning_rate: 1.e-4 | ||
warmup_steps: 1000 | ||
|
||
clip_grad: True | ||
max_grad_norm: 1.0 | ||
use_ema: True | ||
# ema_decay: 0.99 | ||
|
||
optim: "adamw_re" | ||
optim_eps: 1e-15 | ||
weight_decay: 0. | ||
|
||
# epochs: 15 | ||
train_steps: 15000 | ||
ckpt_save_steps: 500 | ||
|
||
mask_ratios: | ||
random: 0.01 | ||
interpolate: 0.002 | ||
quarter_random: 0.002 | ||
quarter_head: 0.002 | ||
quarter_tail: 0.002 | ||
quarter_head_tail: 0.002 | ||
image_random: 0.0 | ||
image_head: 0.22 | ||
image_tail: 0.005 | ||
image_head_tail: 0.005 | ||
|
||
bucket_config: | ||
# Structure: "resolution": { num_frames: [ keep_prob, batch_size ] } | ||
# Setting [ keep_prob, batch_size ] to [ 0.0, 0 ] forces longer videos into smaller resolution buckets | ||
"144p": {1: [1.0, 475], 51: [1.0, 51], 102: [1.0, 27], 204: [1.0, 13], 408: [1.0, 6]} | ||
"256": {1: [1.0, 297], 51: [0.5, 20], 102: [0.5, 10], 204: [0.5, 6], 408: [[0.5, 0.5], 2]} | ||
"240p": {1: [1.0, 297], 51: [0.5, 20], 102: [0.5, 10], 204: [0.5, 5], 408: [[0.5, 0.4], 2]} | ||
"360p": {1: [1.0, 141], 51: [0.5, 8], 102: [0.5, 4], 204: [0.5, 2], 408: [[0.5, 0.3], 1]} | ||
"512": {1: [1.0, 141], 51: [0.5, 8], 102: [0.5, 4], 204: [0.5, 2], 408: [[0.5, 0.2], 1]} | ||
"480p": {1: [1.0, 89], 51: [0.5, 5], 102: [0.5, 2], 204: [[0.5, 0.5], 1], 408: [0.0, 0]} | ||
"720p": {1: [0.3, 36], 51: [0.2, 2], 102: [0.1, 1], 204: [0.0, 0]} | ||
"1024": {1: [0.3, 36], 51: [0.1, 2], 102: [0.1, 1], 204: [0.0, 0]} | ||
"1080p": {1: [0.1, 5]} | ||
"2048": {1: [0.05, 5]} | ||
|
||
# ---------- Validation ---------- | ||
validate: False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.