pytorch · tianyu-l · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024 · XilunWu
diff --git a/.github/workflows/integration_test_4gpu.yaml b/.github/workflows/integration_test_4gpu.yaml
diff --git a/.github/workflows/integration_test_8gpu.yaml b/.github/workflows/integration_test_8gpu.yaml
@@ -5,8 +5,8 @@ on:
     branches: [ main ]
   pull_request:
   schedule:
-    # Runs nightly
-    - cron: '0 0 * * *'
+    # Runs every 6 hours
+    - cron: '0 */6 * * *'
 concurrency:
   group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
   cancel-in-progress: true
@@ -21,7 +21,7 @@ jobs:
     with:
       runner: linux.g5.48xlarge.nvidia.gpu
       gpu-arch-type: cuda
-      gpu-arch-version: "12.1"
+      gpu-arch-version: "12.4"
       # This image is faster to clone than the default, but it lacks CC needed by triton
       # (1m25s vs 2m37s).
       docker-image: torchtitan-ubuntu-20.04-clang12
@@ -37,5 +37,9 @@ jobs:
         pip config --user set global.progress_bar off
 
         python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu124
+
+        # install torchtitan to test the files in ./scripts
+        python -m pip install -e .
+
         mkdir artifacts-to-be-uploaded
         python ./tests/integration_tests.py artifacts-to-be-uploaded --ngpu 8
diff --git a/README.md b/README.md
@@ -1,4 +1,3 @@
-[![4 GPU Integration Test](https://github.com/pytorch/torchtitan/actions/workflows/integration_test_4gpu.yaml/badge.svg?branch=main)](https://github.com/pytorch/torchtitan/actions/workflows/integration_test_4gpu.yaml?query=branch%3Amain)
 [![8 GPU Integration Test](https://github.com/pytorch/torchtitan/actions/workflows/integration_test_8gpu.yaml/badge.svg?branch=main)](https://github.com/pytorch/torchtitan/actions/workflows/integration_test_8gpu.yaml?query=branch%3Amain)
 
 # torchtitan

diff --git a/tests/integration_tests.py b/tests/integration_tests.py
@@ -471,10 +471,6 @@ def run_tests(args):
                                     f"Skipping test {test_flavor.test_name} that requires {test_flavor.ngpu} gpus,"
                                     f" because --ngpu arg is {args.ngpu}"
                                 )
-                            elif args.ngpu == 8 and test_flavor.ngpu != 8:
-                                logger.info(
-                                    f"Skipping non-8gpu test {test_flavor.test_name} on 8-gpu runner"
-                                )
                             else:
                                 run_test(test_flavor, full_path, args.output_dir)
 
@@ -488,7 +484,7 @@ def main():
         default="all",
         help="test to run, acceptable values: `test_name` in `build_test_list` (default: all)",
     )
-    parser.add_argument("--ngpu", default=4, type=int)
+    parser.add_argument("--ngpu", default=8, type=int)
     args = parser.parse_args()
 
     if not os.path.exists(args.output_dir):