initial release

VITA-Group · Dec 29, 2023 · 9b259ff · 9b259ff
1 parent 547f788
commit 9b259ff
Show file tree

Hide file tree

Showing 55 changed files with 9,097 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,23 @@
+*.pyc
+.vscode
+output
+build
+diff_rasterization/diff_rast.egg-info
+diff_rasterization/dist
+tensorboard_3d
+screenshots
+data/
+data
+argument/
+scripts/
+*.mp4
+checkpoint-500/
+*.bin
+__MACOSX/
+**/__MACOSX/
+*.ply
+olddata
+*.out
+*.zip
+output_flow/
+exp_data/
diff --git a/README.md b/README.md
@@ -1 +1,75 @@
-# 4DGen
+# 4DGen: Grounded 4D Content Generation with Spatial-temporal Consistency
+
+[[Project Page]](https://vita-group.github.io/4DGen/) | [[Video]](https://www.youtube.com/watch?v=-bXyBKdpQ1o)
+
+## Setup
+
+Please follow the [3D-GS](https://github.com/graphdeco-inria/gaussian-splatting) and to install the related packages.
+
+```bash
+conda env create -f environment.yml
+conda activate 4DGen
+pip install -r requirements.txt
+
+# 3D Gaussian Splatting modules, skip if you already installed them
+# a modified gaussian splatting (+ depth, alpha rendering)
+git clone --recursive https://github.com/ashawkey/diff-gaussian-rasterization
+pip install ./diff-gaussian-rasterization
+pip install ./simple-knn
+
+# install kaolin for chamfer distance (optional)
+# https://kaolin.readthedocs.io/en/latest/notes/installation.html
+# CHANGE the torch and CUDA toolkit version if yours are different
+pip install kaolin -f https://nvidia-kaolin.s3.us-east-2.amazonaws.com/torch-1.12.1_cu116.html
+```
+
+## Data Preparation
+
+We release our collected data in Google Drive.
+
+Each test cases contains two folders: `{name}_pose0` and `{name}_sync`. `pose0` refers to the monocular video sequence. `sync` refers to the pseudo labels generated by SyncDreamer.
+
+We recommend using [Practical-RIFE](https://github.com/hzwer/Practical-RIFE) if you need to introduce more frames in your video sequence.
+
+To preprocess the images into RGBA format, one can use `preprocess.py` or `preprocess_sync.py`
+
+```bash
+# for monocular image sequence
+python preprocess.py --path xxx
+# for images generated by syncdreamer
+python preprocess_sync.py --path xxx
+```
+
+## Training
+
+```bash
+python train.py --configs arguments/i2v.py -e rose
+```
+
+## Rendering
+
+```bash
+python render.py --skip_train --configs arguments/ours/i2v_xdj.py --skip_test --model_path "./output/xxxx/"
+```
+
+## Acknowledgement
+
+This work is built on many amazing research works and open-source projects, thanks a lot to all the authors for sharing!
+
+- https://github.com/dreamgaussian/dreamgaussian
+- https://github.com/hustvl/4DGaussians
+- https://github.com/graphdeco-inria/gaussian-splatting
+- https://github.com/graphdeco-inria/diff-gaussian-rasterization
+- https://github.com/threestudio-project/threestudio
+
+## Citation
+If you find this repository/work helpful in your research, please consider citing the paper and starring the repo ⭐.
+
+```
+@article{yin20234dgen,
+  title={4DGen: Grounded 4D Content Generation with Spatial-temporal Consistency},
+  author={},
+  journal={arXiv preprint},
+  year={2023}
+}}
+```
diff --git a/arguments/__init__.py b/arguments/__init__.py
@@ -0,0 +1,172 @@
+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use 
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  george.drettakis@inria.fr
+#
+
+from argparse import ArgumentParser, Namespace
+import sys
+import os
+
+class GroupParams:
+    pass
+
+class ParamGroup:
+    def __init__(self, parser: ArgumentParser, name : str, fill_none = False):
+        group = parser.add_argument_group(name)
+        for key, value in vars(self).items():
+            shorthand = False
+            if key.startswith("_"):
+                shorthand = True
+                key = key[1:]
+            t = type(value)
+            value = value if not fill_none else None 
+            if shorthand:
+                if t == bool:
+                    group.add_argument("--" + key, ("-" + key[0:1]), default=value, action="store_true")
+                else:
+                    group.add_argument("--" + key, ("-" + key[0:1]), default=value, type=t)
+            else:
+                if t == bool:
+                    group.add_argument("--" + key, default=value, action="store_true")
+                else:
+                    group.add_argument("--" + key, default=value, type=t)
+
+    def extract(self, args):
+        group = GroupParams()
+        for arg in vars(args).items():
+            if arg[0] in vars(self) or ("_" + arg[0]) in vars(self):
+                setattr(group, arg[0], arg[1])
+        return group
+
+class ModelParams(ParamGroup): 
+    def __init__(self, parser, sentinel=False):
+        self.frame_num = 8
+        self.sh_degree = 0 # NOTE: we don't need sh
+        self._source_path = ""
+        self._model_path = ""
+        self._images = "images"
+        self._resolution = -1
+        self._white_background = True
+        self.data_device = "cuda"
+        self.eval = True
+        self.render_process=False
+        self.name="panda"
+        self.rife=False
+        self.imagedream=False
+        self.static=False
+        super().__init__(parser, "Loading Parameters", sentinel)
+
+    def extract(self, args):
+        g = super().extract(args)
+        g.source_path = os.path.abspath(g.source_path)
+        return g
+
+class PipelineParams(ParamGroup):
+    def __init__(self, parser):
+        self.convert_SHs_python = False
+        self.compute_cov3D_python = False
+        self.debug = False
+        super().__init__(parser, "Pipeline Parameters")
+class ModelHiddenParams(ParamGroup):
+    def __init__(self, parser):
+        self.net_width = 64
+        self.timebase_pe = 4
+        self.defor_depth = 1
+        self.posebase_pe = 10
+        self.scale_rotation_pe = 2
+        self.opacity_pe = 2
+        self.timenet_width = 64
+        self.timenet_output = 32
+        self.bounds = 1.6
+        self.plane_tv_weight = 0.0001
+        self.time_smoothness_weight = 0.01
+        self.l1_time_planes = 0.0001
+        self.grid_merge = 'mul'
+        self.kplanes_config = {
+                             'grid_dimensions': 2,
+                             'input_coordinate_dim': 4,
+                             'output_coordinate_dim': 32,
+                             'resolution': [64, 64, 64, 25]
+                            }
+        self.multires = [1, 2, 4, 8]
+        self.no_grid=False
+        self.no_ds=False
+        self.no_dr=False
+        self.no_do=True
+        self.no_dc=True
+
+
+        super().__init__(parser, "ModelHiddenParams")
+
+class OptimizationParams(ParamGroup):
+    def __init__(self, parser):
+        self.dataloader=False
+        self.iterations = 30_000
+        self.coarse_iterations = 3000
+        self.static_iterations = 700
+        self.position_lr_init = 0.00016
+        self.position_lr_final = 0.0000016
+        self.position_lr_delay_mult = 0.01
+        self.position_lr_max_steps = 20_000
+        self.deformation_lr_init = 0.00016
+        self.deformation_lr_final = 0.000016
+        self.deformation_lr_delay_mult = 0.01
+        self.grid_lr_init = 0.0016
+        self.grid_lr_final = 0.00016
+
+        self.feature_lr = 0.0025
+        self.opacity_lr = 0.05
+        self.scaling_lr = 0.005
+        self.rotation_lr = 0.001
+        self.percent_dense = 0.01
+        self.lambda_dssim = 0
+        self.lambda_pts = 0
+        self.lambda_zero123 = 0.5
+        self.lambda_lpips = 0
+        self.fine_rand_rate=1
+        self.weight_constraint_init= 1
+        self.weight_constraint_after = 0.2
+        self.weight_decay_iteration = 5000
+        self.opacity_reset_interval = 3000
+        self.densification_interval = 100
+        self.densify_from_iter = 500
+        self.densify_until_iter = 15_000
+        self.densify_grad_threshold_coarse = 0.0002
+        self.densify_grad_threshold_fine_init = 0.0002
+        self.densify_grad_threshold_after = 0.0002
+        self.pruning_from_iter = 500
+        self.pruning_interval = 100
+        self.pruning_interval_fine = 100
+        self.opacity_threshold_coarse = 0.005
+        self.opacity_threshold_fine_init = 0.005
+        self.opacity_threshold_fine_after = 0.005
+
+        super().__init__(parser, "Optimization Parameters")
+
+def get_combined_args(parser : ArgumentParser):
+    cmdlne_string = sys.argv[1:]
+    cfgfile_string = "Namespace()"
+    args_cmdline = parser.parse_args(cmdlne_string)
+
+    try:
+        cfgfilepath = os.path.join(args_cmdline.model_path, "cfg_args")
+        print("Looking for config file in", cfgfilepath)
+        with open(cfgfilepath) as cfg_file:
+            print("Config file found: {}".format(cfgfilepath))
+            cfgfile_string = cfg_file.read()
+    except TypeError:
+        print("Config file not found at")
+        pass
+    args_cfgfile = eval(cfgfile_string)
+
+    merged_dict = vars(args_cfgfile).copy()
+    for k,v in vars(args_cmdline).items():
+        if v != None:
+            merged_dict[k] = v
+    return Namespace(**merged_dict)
diff --git a/arguments/i2v.py b/arguments/i2v.py
@@ -0,0 +1,51 @@
+OptimizationParams = dict(
+    static_iterations = 1099,
+    coarse_iterations = 1000,
+    iterations = 3000, # don't set it to 0 !!!
+    position_lr_max_steps = 3000,
+    position_lr_delay_mult = 1,  #1,
+    pruning_interval = 100,
+    pruning_interval_fine = 100000,
+    percent_dense = 0.01,
+    densify_grad_threshold_fine_init = 0.5,
+    densify_grad_threshold_coarse = 0.01,
+    densify_grad_threshold_after = 0.1,
+    densification_interval = 100,
+    opacity_reset_interval = 100, # not used
+    lambda_lpips = 2,
+    lambda_dssim = 2,
+    lambda_pts = 0,
+    lambda_zero123 = 0.5, # default 0.5
+    fine_rand_rate = 0.8
+)
+
+ModelParams = dict(
+    frame_num = 16,
+    name="rose",
+    rife=False,
+)
+
+ModelHiddenParams = dict(
+    grid_merge = 'cat',
+    # grid_merge = 'mul',
+    multires = [1, 2, 4, 8 ],
+    defor_depth = 2,
+    net_width = 256,
+    plane_tv_weight = 0,
+    time_smoothness_weight = 0,
+    l1_time_planes =  0,
+    weight_decay_iteration=0,
+    bounds=2,
+    no_ds=True,
+    # no_dr=True,
+    no_do=True,
+    no_dc=True,
+    kplanes_config = {
+     'grid_dimensions': 2,
+     'input_coordinate_dim': 4,
+     'output_coordinate_dim': 32,
+     #'resolution': [32,32,32,32],
+     'resolution': [64, 64, 64, 64]
+    #  'resolution': [64, 64, 64, 150]
+    }
+)