From b78627e7cf6ac844f3c93d899d5d93202a1f98ef Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Tue, 17 Dec 2024 16:15:04 -0800 Subject: [PATCH] [BE] restructure tests and assets folders [ghstack-poisoned] --- .github/workflows/integration_test_4gpu.yaml | 2 +- .github/workflows/integration_test_8gpu.yaml | 2 +- .github/workflows/unit_test_cpu.yaml | 2 +- .pre-commit-config.yaml | 2 +- assets/images/readme.md | 1 - {docs => assets}/license_header.txt | 0 version.txt => assets/version.txt | 0 pyproject.toml | 2 +- {test => tests}/__init__.py | 0 {test => tests}/assets/c4_test/data.json | 0 {test => tests}/assets/custom_schedule.csv | 0 {test => tests}/assets/test_tiktoken.model | 0 test_runner.py => tests/integration_tests.py | 2 +- {test/datasets => tests/unit_tests}/__init__.py | 0 {test => tests/unit_tests}/multimodal_model/__init__.py | 0 .../unit_tests}/multimodal_model/test_multimodal_model.py | 5 ++++- {test => tests/unit_tests}/multimodal_model/test_utils.py | 0 .../unit_tests/test_dataset_checkpointing.py | 7 +++---- .../unit_tests/test_fused_rms_norm_dtensor.py | 0 {test => tests/unit_tests}/test_job_config.py | 0 torchtitan/datasets/hf_datasets.py | 2 +- train_configs/debug_model.toml | 2 +- 22 files changed, 15 insertions(+), 14 deletions(-) delete mode 100644 assets/images/readme.md rename {docs => assets}/license_header.txt (100%) rename version.txt => assets/version.txt (100%) rename {test => tests}/__init__.py (100%) rename {test => tests}/assets/c4_test/data.json (100%) rename {test => tests}/assets/custom_schedule.csv (100%) rename {test => tests}/assets/test_tiktoken.model (100%) rename test_runner.py => tests/integration_tests.py (99%) rename {test/datasets => tests/unit_tests}/__init__.py (100%) rename {test => tests/unit_tests}/multimodal_model/__init__.py (100%) rename {test => tests/unit_tests}/multimodal_model/test_multimodal_model.py (97%) rename {test => tests/unit_tests}/multimodal_model/test_utils.py (100%) rename test/datasets/test_checkpoint.py => tests/unit_tests/test_dataset_checkpointing.py (89%) rename test/test_fused_rms_norm.py => tests/unit_tests/test_fused_rms_norm_dtensor.py (100%) rename {test => tests/unit_tests}/test_job_config.py (100%) diff --git a/.github/workflows/integration_test_4gpu.yaml b/.github/workflows/integration_test_4gpu.yaml index 6c506887..61f3e20b 100644 --- a/.github/workflows/integration_test_4gpu.yaml +++ b/.github/workflows/integration_test_4gpu.yaml @@ -43,4 +43,4 @@ jobs: python -m pip install -e . mkdir artifacts-to-be-uploaded - python ./test_runner.py artifacts-to-be-uploaded --ngpu 4 + python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu 4 diff --git a/.github/workflows/integration_test_8gpu.yaml b/.github/workflows/integration_test_8gpu.yaml index 0b8f2a1f..dd657e42 100644 --- a/.github/workflows/integration_test_8gpu.yaml +++ b/.github/workflows/integration_test_8gpu.yaml @@ -38,4 +38,4 @@ jobs: python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124 mkdir artifacts-to-be-uploaded - python ./test_runner.py artifacts-to-be-uploaded --ngpu 8 + python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu 8 diff --git a/.github/workflows/unit_test_cpu.yaml b/.github/workflows/unit_test_cpu.yaml index 329f8619..4d513311 100644 --- a/.github/workflows/unit_test_cpu.yaml +++ b/.github/workflows/unit_test_cpu.yaml @@ -25,4 +25,4 @@ jobs: pip config --user set global.progress_bar off pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu - pytest test --cov=. --cov-report=xml --durations=20 -vv + pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 318f7ef2..bea1d715 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: files: \.py$ args: - --license-filepath - - docs/license_header.txt + - assets/license_header.txt - repo: https://github.com/pycqa/flake8 rev: 34cbf8ef3950f43d09b85e2e45c15ae5717dc37b diff --git a/assets/images/readme.md b/assets/images/readme.md deleted file mode 100644 index 1868b2bb..00000000 --- a/assets/images/readme.md +++ /dev/null @@ -1 +0,0 @@ -images folder for main repo diff --git a/docs/license_header.txt b/assets/license_header.txt similarity index 100% rename from docs/license_header.txt rename to assets/license_header.txt diff --git a/version.txt b/assets/version.txt similarity index 100% rename from version.txt rename to assets/version.txt diff --git a/pyproject.toml b/pyproject.toml index 16079266..8ac6e546 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ dev = [ ] [tool.setuptools.dynamic] -version = {file = "version.txt"} +version = {file = "assets/version.txt"} # ---- Explicit project build information ---- # diff --git a/test/__init__.py b/tests/__init__.py similarity index 100% rename from test/__init__.py rename to tests/__init__.py diff --git a/test/assets/c4_test/data.json b/tests/assets/c4_test/data.json similarity index 100% rename from test/assets/c4_test/data.json rename to tests/assets/c4_test/data.json diff --git a/test/assets/custom_schedule.csv b/tests/assets/custom_schedule.csv similarity index 100% rename from test/assets/custom_schedule.csv rename to tests/assets/custom_schedule.csv diff --git a/test/assets/test_tiktoken.model b/tests/assets/test_tiktoken.model similarity index 100% rename from test/assets/test_tiktoken.model rename to tests/assets/test_tiktoken.model diff --git a/test_runner.py b/tests/integration_tests.py similarity index 99% rename from test_runner.py rename to tests/integration_tests.py index e392a1af..a81e2a68 100755 --- a/test_runner.py +++ b/tests/integration_tests.py @@ -254,7 +254,7 @@ def build_test_list(): [ "--experimental.pipeline_parallel_degree 2", "--experimental.pipeline_parallel_schedule PipelineScheduleMulti", - "--experimental.pipeline_parallel_schedule_csv ./test/assets/custom_schedule.csv", + "--experimental.pipeline_parallel_schedule_csv ./tests/assets/custom_schedule.csv", "--experimental.pipeline_parallel_microbatches 8", ], ], diff --git a/test/datasets/__init__.py b/tests/unit_tests/__init__.py similarity index 100% rename from test/datasets/__init__.py rename to tests/unit_tests/__init__.py diff --git a/test/multimodal_model/__init__.py b/tests/unit_tests/multimodal_model/__init__.py similarity index 100% rename from test/multimodal_model/__init__.py rename to tests/unit_tests/multimodal_model/__init__.py diff --git a/test/multimodal_model/test_multimodal_model.py b/tests/unit_tests/multimodal_model/test_multimodal_model.py similarity index 97% rename from test/multimodal_model/test_multimodal_model.py rename to tests/unit_tests/multimodal_model/test_multimodal_model.py index 5414e439..acd18723 100644 --- a/test/multimodal_model/test_multimodal_model.py +++ b/tests/unit_tests/multimodal_model/test_multimodal_model.py @@ -12,7 +12,10 @@ VisionEncoder, ) -from test.multimodal_model.test_utils import fixed_init_model, fixed_init_tensor +from tests.unit_tests.multimodal_model.test_utils import ( + fixed_init_model, + fixed_init_tensor, +) @pytest.fixture diff --git a/test/multimodal_model/test_utils.py b/tests/unit_tests/multimodal_model/test_utils.py similarity index 100% rename from test/multimodal_model/test_utils.py rename to tests/unit_tests/multimodal_model/test_utils.py diff --git a/test/datasets/test_checkpoint.py b/tests/unit_tests/test_dataset_checkpointing.py similarity index 89% rename from test/datasets/test_checkpoint.py rename to tests/unit_tests/test_dataset_checkpointing.py index b1950b99..0e614112 100644 --- a/test/datasets/test_checkpoint.py +++ b/tests/unit_tests/test_dataset_checkpointing.py @@ -9,10 +9,10 @@ from torchtitan.datasets.tokenizer import build_tokenizer -class TestCheckpoint: +class TestDatasetCheckpointing: def test_c4_resumption(self): dataset_name = "c4_test" - dataset_path = "./test/assets/c4_test" + dataset_path = "./tests/assets/c4_test" batch_size = 1 seq_len = 1024 world_size = 4 @@ -41,8 +41,7 @@ def test_c4_resumption(self): def _build_dataloader( self, dataset_name, dataset_path, batch_size, seq_len, world_size, rank ): - tokenizer_type = "tiktoken" - tokenizer = build_tokenizer("tiktoken", "./test/assets/test_tiktoken.model") + tokenizer = build_tokenizer("tiktoken", "./tests/assets/test_tiktoken.model") return build_hf_data_loader( dataset_name=dataset_name, dataset_path=dataset_path, diff --git a/test/test_fused_rms_norm.py b/tests/unit_tests/test_fused_rms_norm_dtensor.py similarity index 100% rename from test/test_fused_rms_norm.py rename to tests/unit_tests/test_fused_rms_norm_dtensor.py diff --git a/test/test_job_config.py b/tests/unit_tests/test_job_config.py similarity index 100% rename from test/test_job_config.py rename to tests/unit_tests/test_job_config.py diff --git a/torchtitan/datasets/hf_datasets.py b/torchtitan/datasets/hf_datasets.py index 745cf40f..f22223e2 100644 --- a/torchtitan/datasets/hf_datasets.py +++ b/torchtitan/datasets/hf_datasets.py @@ -45,7 +45,7 @@ class DatasetConfig: text_processor=_process_c4_text, ), "c4_test": DatasetConfig( - path="test/assets/c4_test", + path="tests/assets/c4_test", loader=lambda path: load_dataset(path, split="train"), text_processor=_process_c4_text, ), diff --git a/train_configs/debug_model.toml b/train_configs/debug_model.toml index ca8bf031..07fcd338 100644 --- a/train_configs/debug_model.toml +++ b/train_configs/debug_model.toml @@ -24,7 +24,7 @@ name = "llama3" flavor = "debugmodel" norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm / fused_rmsnorm # test tokenizer.model, for debug purpose only -tokenizer_path = "./test/assets/test_tiktoken.model" +tokenizer_path = "./tests/assets/test_tiktoken.model" [optimizer] name = "AdamW"