From b78627e7cf6ac844f3c93d899d5d93202a1f98ef Mon Sep 17 00:00:00 2001
From: Tianyu Liu <lty@fb.com>
Date: Tue, 17 Dec 2024 16:15:04 -0800
Subject: [PATCH] [BE] restructure tests and assets folders

[ghstack-poisoned]
---
 .github/workflows/integration_test_4gpu.yaml               | 2 +-
 .github/workflows/integration_test_8gpu.yaml               | 2 +-
 .github/workflows/unit_test_cpu.yaml                       | 2 +-
 .pre-commit-config.yaml                                    | 2 +-
 assets/images/readme.md                                    | 1 -
 {docs => assets}/license_header.txt                        | 0
 version.txt => assets/version.txt                          | 0
 pyproject.toml                                             | 2 +-
 {test => tests}/__init__.py                                | 0
 {test => tests}/assets/c4_test/data.json                   | 0
 {test => tests}/assets/custom_schedule.csv                 | 0
 {test => tests}/assets/test_tiktoken.model                 | 0
 test_runner.py => tests/integration_tests.py               | 2 +-
 {test/datasets => tests/unit_tests}/__init__.py            | 0
 {test => tests/unit_tests}/multimodal_model/__init__.py    | 0
 .../unit_tests}/multimodal_model/test_multimodal_model.py  | 5 ++++-
 {test => tests/unit_tests}/multimodal_model/test_utils.py  | 0
 .../unit_tests/test_dataset_checkpointing.py               | 7 +++----
 .../unit_tests/test_fused_rms_norm_dtensor.py              | 0
 {test => tests/unit_tests}/test_job_config.py              | 0
 torchtitan/datasets/hf_datasets.py                         | 2 +-
 train_configs/debug_model.toml                             | 2 +-
 22 files changed, 15 insertions(+), 14 deletions(-)
 delete mode 100644 assets/images/readme.md
 rename {docs => assets}/license_header.txt (100%)
 rename version.txt => assets/version.txt (100%)
 rename {test => tests}/__init__.py (100%)
 rename {test => tests}/assets/c4_test/data.json (100%)
 rename {test => tests}/assets/custom_schedule.csv (100%)
 rename {test => tests}/assets/test_tiktoken.model (100%)
 rename test_runner.py => tests/integration_tests.py (99%)
 rename {test/datasets => tests/unit_tests}/__init__.py (100%)
 rename {test => tests/unit_tests}/multimodal_model/__init__.py (100%)
 rename {test => tests/unit_tests}/multimodal_model/test_multimodal_model.py (97%)
 rename {test => tests/unit_tests}/multimodal_model/test_utils.py (100%)
 rename test/datasets/test_checkpoint.py => tests/unit_tests/test_dataset_checkpointing.py (89%)
 rename test/test_fused_rms_norm.py => tests/unit_tests/test_fused_rms_norm_dtensor.py (100%)
 rename {test => tests/unit_tests}/test_job_config.py (100%)

diff --git a/.github/workflows/integration_test_4gpu.yaml b/.github/workflows/integration_test_4gpu.yaml
index 6c506887..61f3e20b 100644
--- a/.github/workflows/integration_test_4gpu.yaml
+++ b/.github/workflows/integration_test_4gpu.yaml
@@ -43,4 +43,4 @@ jobs:
         python -m pip install -e .
 
         mkdir artifacts-to-be-uploaded
-        python ./test_runner.py artifacts-to-be-uploaded --ngpu 4
+        python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu 4
diff --git a/.github/workflows/integration_test_8gpu.yaml b/.github/workflows/integration_test_8gpu.yaml
index 0b8f2a1f..dd657e42 100644
--- a/.github/workflows/integration_test_8gpu.yaml
+++ b/.github/workflows/integration_test_8gpu.yaml
@@ -38,4 +38,4 @@ jobs:
 
         python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124
         mkdir artifacts-to-be-uploaded
-        python ./test_runner.py artifacts-to-be-uploaded --ngpu 8
+        python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu 8
diff --git a/.github/workflows/unit_test_cpu.yaml b/.github/workflows/unit_test_cpu.yaml
index 329f8619..4d513311 100644
--- a/.github/workflows/unit_test_cpu.yaml
+++ b/.github/workflows/unit_test_cpu.yaml
@@ -25,4 +25,4 @@ jobs:
         pip config --user set global.progress_bar off
 
         pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
-        pytest test --cov=. --cov-report=xml --durations=20 -vv
+        pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 318f7ef2..bea1d715 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -24,7 +24,7 @@ repos:
         files: \.py$
         args:
         - --license-filepath
-        - docs/license_header.txt
+        - assets/license_header.txt
 
 -   repo: https://github.com/pycqa/flake8
     rev: 34cbf8ef3950f43d09b85e2e45c15ae5717dc37b
diff --git a/assets/images/readme.md b/assets/images/readme.md
deleted file mode 100644
index 1868b2bb..00000000
--- a/assets/images/readme.md
+++ /dev/null
@@ -1 +0,0 @@
-images folder for main repo
diff --git a/docs/license_header.txt b/assets/license_header.txt
similarity index 100%
rename from docs/license_header.txt
rename to assets/license_header.txt
diff --git a/version.txt b/assets/version.txt
similarity index 100%
rename from version.txt
rename to assets/version.txt
diff --git a/pyproject.toml b/pyproject.toml
index 16079266..8ac6e546 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,7 +36,7 @@ dev = [
 ]
 
 [tool.setuptools.dynamic]
-version = {file = "version.txt"}
+version = {file = "assets/version.txt"}
 
 
 # ---- Explicit project build information ---- #
diff --git a/test/__init__.py b/tests/__init__.py
similarity index 100%
rename from test/__init__.py
rename to tests/__init__.py
diff --git a/test/assets/c4_test/data.json b/tests/assets/c4_test/data.json
similarity index 100%
rename from test/assets/c4_test/data.json
rename to tests/assets/c4_test/data.json
diff --git a/test/assets/custom_schedule.csv b/tests/assets/custom_schedule.csv
similarity index 100%
rename from test/assets/custom_schedule.csv
rename to tests/assets/custom_schedule.csv
diff --git a/test/assets/test_tiktoken.model b/tests/assets/test_tiktoken.model
similarity index 100%
rename from test/assets/test_tiktoken.model
rename to tests/assets/test_tiktoken.model
diff --git a/test_runner.py b/tests/integration_tests.py
similarity index 99%
rename from test_runner.py
rename to tests/integration_tests.py
index e392a1af..a81e2a68 100755
--- a/test_runner.py
+++ b/tests/integration_tests.py
@@ -254,7 +254,7 @@ def build_test_list():
                 [
                     "--experimental.pipeline_parallel_degree 2",
                     "--experimental.pipeline_parallel_schedule PipelineScheduleMulti",
-                    "--experimental.pipeline_parallel_schedule_csv ./test/assets/custom_schedule.csv",
+                    "--experimental.pipeline_parallel_schedule_csv ./tests/assets/custom_schedule.csv",
                     "--experimental.pipeline_parallel_microbatches 8",
                 ],
             ],
diff --git a/test/datasets/__init__.py b/tests/unit_tests/__init__.py
similarity index 100%
rename from test/datasets/__init__.py
rename to tests/unit_tests/__init__.py
diff --git a/test/multimodal_model/__init__.py b/tests/unit_tests/multimodal_model/__init__.py
similarity index 100%
rename from test/multimodal_model/__init__.py
rename to tests/unit_tests/multimodal_model/__init__.py
diff --git a/test/multimodal_model/test_multimodal_model.py b/tests/unit_tests/multimodal_model/test_multimodal_model.py
similarity index 97%
rename from test/multimodal_model/test_multimodal_model.py
rename to tests/unit_tests/multimodal_model/test_multimodal_model.py
index 5414e439..acd18723 100644
--- a/test/multimodal_model/test_multimodal_model.py
+++ b/tests/unit_tests/multimodal_model/test_multimodal_model.py
@@ -12,7 +12,10 @@
     VisionEncoder,
 )
 
-from test.multimodal_model.test_utils import fixed_init_model, fixed_init_tensor
+from tests.unit_tests.multimodal_model.test_utils import (
+    fixed_init_model,
+    fixed_init_tensor,
+)
 
 
 @pytest.fixture
diff --git a/test/multimodal_model/test_utils.py b/tests/unit_tests/multimodal_model/test_utils.py
similarity index 100%
rename from test/multimodal_model/test_utils.py
rename to tests/unit_tests/multimodal_model/test_utils.py
diff --git a/test/datasets/test_checkpoint.py b/tests/unit_tests/test_dataset_checkpointing.py
similarity index 89%
rename from test/datasets/test_checkpoint.py
rename to tests/unit_tests/test_dataset_checkpointing.py
index b1950b99..0e614112 100644
--- a/test/datasets/test_checkpoint.py
+++ b/tests/unit_tests/test_dataset_checkpointing.py
@@ -9,10 +9,10 @@
 from torchtitan.datasets.tokenizer import build_tokenizer
 
 
-class TestCheckpoint:
+class TestDatasetCheckpointing:
     def test_c4_resumption(self):
         dataset_name = "c4_test"
-        dataset_path = "./test/assets/c4_test"
+        dataset_path = "./tests/assets/c4_test"
         batch_size = 1
         seq_len = 1024
         world_size = 4
@@ -41,8 +41,7 @@ def test_c4_resumption(self):
     def _build_dataloader(
         self, dataset_name, dataset_path, batch_size, seq_len, world_size, rank
     ):
-        tokenizer_type = "tiktoken"
-        tokenizer = build_tokenizer("tiktoken", "./test/assets/test_tiktoken.model")
+        tokenizer = build_tokenizer("tiktoken", "./tests/assets/test_tiktoken.model")
         return build_hf_data_loader(
             dataset_name=dataset_name,
             dataset_path=dataset_path,
diff --git a/test/test_fused_rms_norm.py b/tests/unit_tests/test_fused_rms_norm_dtensor.py
similarity index 100%
rename from test/test_fused_rms_norm.py
rename to tests/unit_tests/test_fused_rms_norm_dtensor.py
diff --git a/test/test_job_config.py b/tests/unit_tests/test_job_config.py
similarity index 100%
rename from test/test_job_config.py
rename to tests/unit_tests/test_job_config.py
diff --git a/torchtitan/datasets/hf_datasets.py b/torchtitan/datasets/hf_datasets.py
index 745cf40f..f22223e2 100644
--- a/torchtitan/datasets/hf_datasets.py
+++ b/torchtitan/datasets/hf_datasets.py
@@ -45,7 +45,7 @@ class DatasetConfig:
         text_processor=_process_c4_text,
     ),
     "c4_test": DatasetConfig(
-        path="test/assets/c4_test",
+        path="tests/assets/c4_test",
         loader=lambda path: load_dataset(path, split="train"),
         text_processor=_process_c4_text,
     ),
diff --git a/train_configs/debug_model.toml b/train_configs/debug_model.toml
index ca8bf031..07fcd338 100644
--- a/train_configs/debug_model.toml
+++ b/train_configs/debug_model.toml
@@ -24,7 +24,7 @@ name = "llama3"
 flavor = "debugmodel"
 norm_type = "rmsnorm"  # layernorm / np_layernorm / rmsnorm / fused_rmsnorm
 # test tokenizer.model, for debug purpose only
-tokenizer_path = "./test/assets/test_tiktoken.model"
+tokenizer_path = "./tests/assets/test_tiktoken.model"
 
 [optimizer]
 name = "AdamW"