mindspore-lab · SamitHuang · Sep 27, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
@@ -35,6 +35,8 @@
 
 
 class STDiT3Block(nn.Cell):
+    # to reduce compilation time
+    @ms.lazy_inline(policy="front")
     def __init__(
         self,
         hidden_size,

@@ -2,6 +2,7 @@
 
 import numpy as np
 
+import mindspore as ms
 from mindspore import nn, ops
 
 _logger = logging.getLogger(__name__)
@@ -177,6 +178,7 @@ def make_attn(in_channels, attn_type="vanilla"):
 
 # used in vae
 class Encoder(nn.Cell):
+    @ms.lazy_inline()
     def __init__(
         self,
         *,
@@ -299,6 +301,7 @@ def construct(self, x):
 
 # used in vae
 class Decoder(nn.Cell):
+    @ms.lazy_inline()
     def __init__(
         self,
         *,

@@ -150,6 +150,7 @@ def get_activation_fn(activation):
 class Encoder(nn.Cell):
     """Encoder Blocks."""
 
+    @ms.lazy_inline()
     def __init__(
         self,
         in_out_channels=4,
@@ -260,6 +261,7 @@ def construct(self, x):
 class Decoder(nn.Cell):
     """Decoder Blocks."""
 
+    @ms.lazy_inline()
     def __init__(
         self,
         in_out_channels=4,

@@ -18,7 +18,7 @@ python scripts/train.py \
 --pretrained_model_path="models/OpenSora-STDiT-v3/opensora_stdit_v3.ckpt" \
 --mode=0 \
 --jit_level O1 \
---max_device_memory 59GB \
+--max_device_memory 55GB \
 --config configs/opensora-v1-2/train/train_stage2.yaml \
 --csv_path datasets/mixkit-100videos/video_caption_train.csv \
 --video_folder datasets/mixkit-100videos/mixkit \

@@ -562,17 +562,19 @@ def main(args):
         )
 
     # compute total steps and data epochs (in unit of data sink size)
+    if args.dataset_sink_mode and args.sink_size != -1:
+        steps_per_sink = args.sink_size
+    else:
+        steps_per_sink = dataset_size
+
     if args.train_steps == -1:
         assert args.epochs != -1
         total_train_steps = args.epochs * dataset_size
+        sink_epochs = math.ceil(total_train_steps / steps_per_sink)
     else:
         total_train_steps = args.train_steps
-
-    if args.dataset_sink_mode and args.sink_size != -1:
-        steps_per_sink = args.sink_size
-    else:
-        steps_per_sink = dataset_size
-    sink_epochs = math.ceil(total_train_steps / steps_per_sink)
+        # asume one step need one whole epoch data to ensure enough batch loading for training
+        sink_epochs = total_train_steps
 
     if args.ckpt_save_steps == -1:
         ckpt_save_interval = args.ckpt_save_interval