Add the A100 bisection workflow (#2054)

Summary: We are adding the generic A100 bisection workflow for bisecting any userbenchmark. The workflow requires 4 arguments: 1. The start pytorch commit hash on the main branch 2. The treatment pytorch commit hash on the main branch 3. The userbenchmark name 4. The userbenchmark arguments to trigger the regression Pull Request resolved: #2054 Test Plan: Automatic bisection of the `test_bench` userbenchmark on the accuracy issue of llama_v2_7b_16h: Start commit hash (2023-11-15): `a5a404865c01f86881f6b3ab0cd9a562d0b420de` End commit hash (2023-11-16): `690c805c8b539501aad5fbf18914ac92afb65d5a` Userbenchmark name: `test_bench` Userbenchmark arguments: `llama_v2_7b_16h -d cuda -t eval --accuracy` Known root cause commit: `12b2dd16b050e6495910fc564517fbb51dde1f20` Reviewed By: aaronenyeshi Differential Revision: D51551127 Pulled By: xuzhao9 fbshipit-source-id: 45385f95a0f30d9eb93efccb32aaa8cf32297d4c
pytorch · Nov 24, 2023 · 8e5e3a0 · 8e5e3a0
1 parent 7a8b39c
commit 8e5e3a0
Show file tree

Hide file tree

Showing 5 changed files with 146 additions and 14 deletions.
diff --git a/.github/workflows/userbenchmark-a100-bisection.yml b/.github/workflows/userbenchmark-a100-bisection.yml
@@ -0,0 +1,101 @@
+name: TorchBench A100 bisection
+on:
+  workflow_dispatch:
+    inputs:
+      start_commit:
+        description: "Start PyTorch commit hash"
+        required: true
+      end_commit:
+        description: "End PyTorch commit hash"
+        required: true
+      userbenchmark:
+        description: "Userbenchmark name"
+        required: true
+      userbenchmark_args:
+        description: "Userbenchmark arguments"
+        required: true
+
+jobs:
+  bisection:
+    environment: docker-s3-upload
+    env:
+      BASE_CONDA_ENV: "torchbench"
+      CONDA_ENV: "bisection-ci-a100"
+      PLATFORM_NAME: "gcp_a100"
+      SETUP_SCRIPT: "/workspace/setup_instance.sh"
+      BISECT_WORKDIR: ".userbenchmark/${{ github.env.userbenchmark }}/bisection"
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+    if: ${{ github.repository_owner == 'pytorch' }}
+    runs-on: [self-hosted, a100-runner]
+    timeout-minutes: 2880 # 48 hours
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          path: benchmark
+      - name: Checkout pytorch
+        uses: actions/checkout@v3
+        with:
+          repository: pytorch/pytorch
+          path: srcs/pytorch
+          fetch-depth: 0
+      - name: Checkout torchvision
+        uses: actions/checkout@v3
+        with:
+          repository: pytorch/vision
+          path: srcs/vision
+          fetch-depth: 0
+      - name: Checkout torchaudio
+        uses: actions/checkout@v3
+        with:
+          repository: pytorch/audio
+          path: srcs/audio
+          fetch-depth: 0
+      - name: Tune Nvidia GPU
+        run: |
+          sudo nvidia-smi -pm 1
+          sudo nvidia-smi -ac 1215,1410
+          nvidia-smi
+      - name: Install Deps
+        run: |
+          sudo apt-get -y update && sudo apt -y update
+      - name: Setup conda env
+        run: |
+          CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
+          cd benchmark
+          python ./utils/python_utils.py --create-conda-env "${CONDA_ENV}"
+      - name: Setup bisection environment
+        run: |
+          . "${SETUP_SCRIPT}"; cd benchmark
+          python utils/cuda_utils.py --install-torch-build-deps
+          python utils/cuda_utils.py --install-torchbench-deps
+          mkdir -p "${BISECT_WORKDIR}"
+          python utils/cuda_utils.py --install-torch-nightly
+          python run_benchmark.py ${{ github.event.inputs.userbenchmark }} ${{ github.event.inputs.userbenchmark_args }} --dryrun \
+                 --output "${BISECT_WORKDIR}/metric-control.json"
+          python run_benchmark.py ${{ github.event.inputs.userbenchmark }} ${{ github.event.inputs.userbenchmark_args }} --dryrun \
+                 --output "${BISECT_WORKDIR}/metric-treatment.json"
+          python regression_detector.py \
+                 --control "${BISECT_WORKDIR}/metrics-control.json" --treatment "${BISECT_WORKDIR}/metrics-treatment.json" \
+                 --output "${BISECT_WORKDIR}/regression-gh${GITHUB_RUN_ID}.yaml"
+          pip uninstall -y torch torchvision torchaudio torch_tensorrt
+      - name: Bisection
+        run: |
+          . "${SETUP_SCRIPT}"; cd benchmark
+          python bisection.py --work-dir "${BISECT_WORKDIR}" --torch-repos-path "${PWD}/../srcs" \
+                --torchbench-repo-path "${PWD}" --config "${BISECT_WORKDIR}/regression-gh${GITHUB_RUN_ID}.yaml" \
+                --output "${BISECT_WORKDIR}/bisect-output-gh${GITHUB_RUN_ID}.json"
+          cp -r "${BISECT_WORKDIR}" ../bisection-result
+      - name: Upload artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: Bisection result
+          path: bisection-result/
+      - name: Clean up Conda env
+        if: always()
+        run: |
+          . "${SETUP_SCRIPT}"
+          conda deactivate && conda deactivate
+          conda remove -n "${CONDA_ENV}" --all
diff --git a/regression_detector.py b/regression_detector.py
@@ -104,9 +104,11 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult,
     troubled_tests = ""
     for test, stats in regressions_dict["details"].items():
         delta = stats["delta"]
-        if delta != 0:
+        if not isinstance(delta, str):
             sign = "+" if delta > 0 else ""
             troubled_tests += f"- {test}: {sign}{delta:.5%}\n"
+        else:
+            troubled_tests += f"- {test}: {delta}\n"
 
     control_only_tests = ""
     for test, stat in regressions_dict["control_only_metrics"].items():

diff --git a/userbenchmark/test_bench/regression_detector.py b/userbenchmark/test_bench/regression_detector.py
@@ -0,0 +1,32 @@
+from ..utils import TorchBenchABTestResult, TorchBenchABTestMetric
+from . import BM_NAME
+
+DEFAULT_REGRESSION_DELTA_THRESHOLD = 0.07
+
+def run(control, treatment) -> TorchBenchABTestResult:
+    control_env = control["environ"]
+    control_env["git_commit_hash"] = control["environ"]["pytorch_git_version"]
+    control_metrics = control["metrics"]
+    treatment_env = treatment["environ"]
+    treatment_env["git_commit_hash"] = treatment["environ"]["pytorch_git_version"]
+    treatment_metrics = treatment["metrics"]
+    details = {}
+    for metric_names in control_metrics.keys():
+        control_metric = control_metrics[metric_names]
+        treatment_metric = treatment_metrics[metric_names]
+        if (isinstance(control_metric, str) or isinstance(treatment_metric, str)):
+            if control_metric == "skip_by_dryrun" or not control_metric == treatment_metric:
+                delta = f"{control_metric} -> {treatment_metric}"
+                details[metric_names] = TorchBenchABTestMetric(control=control_metric, treatment=treatment_metric, delta=delta)
+        else:
+            delta = (treatment_metric - control_metric) / control_metric
+            if abs(delta) > DEFAULT_REGRESSION_DELTA_THRESHOLD:
+                details[metric_names] = TorchBenchABTestMetric(control=control_metric, treatment=treatment_metric, delta=delta)
+    return TorchBenchABTestResult(name=BM_NAME,
+                                  control_env=control_env, \
+                                  treatment_env=treatment_env, \
+                                  details=details, \
+                                  control_only_metrics={}, \
+                                  treatment_only_metrics={}, \
+                                  bisection="pytorch")
+
diff --git a/userbenchmark/test_bench/run.py b/userbenchmark/test_bench/run.py
@@ -155,10 +155,9 @@ def run(args: List[str]):
                 results[f"{config_str}, metric={metric}"] = metrics_dict[metric]
     except KeyboardInterrupt:
         print("User keyboard interrupted!")
-    if not args.dryrun:
-        result = get_output_json(BM_NAME, results)
-        if args.device == 'cuda':
-            import torch
-            result["environ"]["device"] = torch.cuda.get_device_name()
-        with open(args.output, 'w') as f:
-            json.dump(result, f, indent=4)
+    result = get_output_json(BM_NAME, results)
+    if args.device == 'cuda':
+        import torch
+        result["environ"]["device"] = torch.cuda.get_device_name()
+    with open(args.output, 'w') as f:
+        json.dump(result, f, indent=4)
diff --git a/userbenchmark/utils.py b/userbenchmark/utils.py
@@ -4,10 +4,9 @@
 from datetime import datetime, timedelta
 import time
 import json
-import yaml
 from pathlib import Path
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Callable
+from typing import Any, Dict, List, Optional, Callable, Union
 
 REPO_PATH = Path(os.path.abspath(__file__)).parent.parent
 USERBENCHMARK_OUTPUT_PREFIX = ".userbenchmark"
@@ -38,10 +37,9 @@ def __exit__(self, exc_type, exc_value, traceback):
 
 @dataclass
 class TorchBenchABTestMetric:
-    control: float
-    treatment: float
-    delta: float
-
+    control: Union[float, str]
+    treatment: Union[float, str]
+    delta: Union[float, str]
 
 @dataclass
 class TorchBenchABTestResult: