From 8e5e3a0ca924587cac5d93b2d38ebad44498ee63 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 23 Nov 2023 16:11:30 -0800 Subject: [PATCH] Add the A100 bisection workflow (#2054) Summary: We are adding the generic A100 bisection workflow for bisecting any userbenchmark. The workflow requires 4 arguments: 1. The start pytorch commit hash on the main branch 2. The treatment pytorch commit hash on the main branch 3. The userbenchmark name 4. The userbenchmark arguments to trigger the regression Pull Request resolved: https://github.com/pytorch/benchmark/pull/2054 Test Plan: Automatic bisection of the `test_bench` userbenchmark on the accuracy issue of llama_v2_7b_16h: Start commit hash (2023-11-15): `a5a404865c01f86881f6b3ab0cd9a562d0b420de` End commit hash (2023-11-16): `690c805c8b539501aad5fbf18914ac92afb65d5a` Userbenchmark name: `test_bench` Userbenchmark arguments: `llama_v2_7b_16h -d cuda -t eval --accuracy` Known root cause commit: `12b2dd16b050e6495910fc564517fbb51dde1f20` Reviewed By: aaronenyeshi Differential Revision: D51551127 Pulled By: xuzhao9 fbshipit-source-id: 45385f95a0f30d9eb93efccb32aaa8cf32297d4c --- .../userbenchmark-a100-bisection.yml | 101 ++++++++++++++++++ regression_detector.py | 4 +- .../test_bench/regression_detector.py | 32 ++++++ userbenchmark/test_bench/run.py | 13 ++- userbenchmark/utils.py | 10 +- 5 files changed, 146 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/userbenchmark-a100-bisection.yml diff --git a/.github/workflows/userbenchmark-a100-bisection.yml b/.github/workflows/userbenchmark-a100-bisection.yml new file mode 100644 index 0000000000..dbcd5435e6 --- /dev/null +++ b/.github/workflows/userbenchmark-a100-bisection.yml @@ -0,0 +1,101 @@ +name: TorchBench A100 bisection +on: + workflow_dispatch: + inputs: + start_commit: + description: "Start PyTorch commit hash" + required: true + end_commit: + description: "End PyTorch commit hash" + required: true + userbenchmark: + description: "Userbenchmark name" + required: true + userbenchmark_args: + description: "Userbenchmark arguments" + required: true + +jobs: + bisection: + environment: docker-s3-upload + env: + BASE_CONDA_ENV: "torchbench" + CONDA_ENV: "bisection-ci-a100" + PLATFORM_NAME: "gcp_a100" + SETUP_SCRIPT: "/workspace/setup_instance.sh" + BISECT_WORKDIR: ".userbenchmark/${{ github.env.userbenchmark }}/bisection" + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + if: ${{ github.repository_owner == 'pytorch' }} + runs-on: [self-hosted, a100-runner] + timeout-minutes: 2880 # 48 hours + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + path: benchmark + - name: Checkout pytorch + uses: actions/checkout@v3 + with: + repository: pytorch/pytorch + path: srcs/pytorch + fetch-depth: 0 + - name: Checkout torchvision + uses: actions/checkout@v3 + with: + repository: pytorch/vision + path: srcs/vision + fetch-depth: 0 + - name: Checkout torchaudio + uses: actions/checkout@v3 + with: + repository: pytorch/audio + path: srcs/audio + fetch-depth: 0 + - name: Tune Nvidia GPU + run: | + sudo nvidia-smi -pm 1 + sudo nvidia-smi -ac 1215,1410 + nvidia-smi + - name: Install Deps + run: | + sudo apt-get -y update && sudo apt -y update + - name: Setup conda env + run: | + CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" + cd benchmark + python ./utils/python_utils.py --create-conda-env "${CONDA_ENV}" + - name: Setup bisection environment + run: | + . "${SETUP_SCRIPT}"; cd benchmark + python utils/cuda_utils.py --install-torch-build-deps + python utils/cuda_utils.py --install-torchbench-deps + mkdir -p "${BISECT_WORKDIR}" + python utils/cuda_utils.py --install-torch-nightly + python run_benchmark.py ${{ github.event.inputs.userbenchmark }} ${{ github.event.inputs.userbenchmark_args }} --dryrun \ + --output "${BISECT_WORKDIR}/metric-control.json" + python run_benchmark.py ${{ github.event.inputs.userbenchmark }} ${{ github.event.inputs.userbenchmark_args }} --dryrun \ + --output "${BISECT_WORKDIR}/metric-treatment.json" + python regression_detector.py \ + --control "${BISECT_WORKDIR}/metrics-control.json" --treatment "${BISECT_WORKDIR}/metrics-treatment.json" \ + --output "${BISECT_WORKDIR}/regression-gh${GITHUB_RUN_ID}.yaml" + pip uninstall -y torch torchvision torchaudio torch_tensorrt + - name: Bisection + run: | + . "${SETUP_SCRIPT}"; cd benchmark + python bisection.py --work-dir "${BISECT_WORKDIR}" --torch-repos-path "${PWD}/../srcs" \ + --torchbench-repo-path "${PWD}" --config "${BISECT_WORKDIR}/regression-gh${GITHUB_RUN_ID}.yaml" \ + --output "${BISECT_WORKDIR}/bisect-output-gh${GITHUB_RUN_ID}.json" + cp -r "${BISECT_WORKDIR}" ../bisection-result + - name: Upload artifact + if: always() + uses: actions/upload-artifact@v3 + with: + name: Bisection result + path: bisection-result/ + - name: Clean up Conda env + if: always() + run: | + . "${SETUP_SCRIPT}" + conda deactivate && conda deactivate + conda remove -n "${CONDA_ENV}" --all diff --git a/regression_detector.py b/regression_detector.py index ca6110b1bc..7a0cbe535a 100644 --- a/regression_detector.py +++ b/regression_detector.py @@ -104,9 +104,11 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult, troubled_tests = "" for test, stats in regressions_dict["details"].items(): delta = stats["delta"] - if delta != 0: + if not isinstance(delta, str): sign = "+" if delta > 0 else "" troubled_tests += f"- {test}: {sign}{delta:.5%}\n" + else: + troubled_tests += f"- {test}: {delta}\n" control_only_tests = "" for test, stat in regressions_dict["control_only_metrics"].items(): diff --git a/userbenchmark/test_bench/regression_detector.py b/userbenchmark/test_bench/regression_detector.py index e69de29bb2..0f413b0e98 100644 --- a/userbenchmark/test_bench/regression_detector.py +++ b/userbenchmark/test_bench/regression_detector.py @@ -0,0 +1,32 @@ +from ..utils import TorchBenchABTestResult, TorchBenchABTestMetric +from . import BM_NAME + +DEFAULT_REGRESSION_DELTA_THRESHOLD = 0.07 + +def run(control, treatment) -> TorchBenchABTestResult: + control_env = control["environ"] + control_env["git_commit_hash"] = control["environ"]["pytorch_git_version"] + control_metrics = control["metrics"] + treatment_env = treatment["environ"] + treatment_env["git_commit_hash"] = treatment["environ"]["pytorch_git_version"] + treatment_metrics = treatment["metrics"] + details = {} + for metric_names in control_metrics.keys(): + control_metric = control_metrics[metric_names] + treatment_metric = treatment_metrics[metric_names] + if (isinstance(control_metric, str) or isinstance(treatment_metric, str)): + if control_metric == "skip_by_dryrun" or not control_metric == treatment_metric: + delta = f"{control_metric} -> {treatment_metric}" + details[metric_names] = TorchBenchABTestMetric(control=control_metric, treatment=treatment_metric, delta=delta) + else: + delta = (treatment_metric - control_metric) / control_metric + if abs(delta) > DEFAULT_REGRESSION_DELTA_THRESHOLD: + details[metric_names] = TorchBenchABTestMetric(control=control_metric, treatment=treatment_metric, delta=delta) + return TorchBenchABTestResult(name=BM_NAME, + control_env=control_env, \ + treatment_env=treatment_env, \ + details=details, \ + control_only_metrics={}, \ + treatment_only_metrics={}, \ + bisection="pytorch") + diff --git a/userbenchmark/test_bench/run.py b/userbenchmark/test_bench/run.py index 02d8229148..cd81195075 100644 --- a/userbenchmark/test_bench/run.py +++ b/userbenchmark/test_bench/run.py @@ -155,10 +155,9 @@ def run(args: List[str]): results[f"{config_str}, metric={metric}"] = metrics_dict[metric] except KeyboardInterrupt: print("User keyboard interrupted!") - if not args.dryrun: - result = get_output_json(BM_NAME, results) - if args.device == 'cuda': - import torch - result["environ"]["device"] = torch.cuda.get_device_name() - with open(args.output, 'w') as f: - json.dump(result, f, indent=4) + result = get_output_json(BM_NAME, results) + if args.device == 'cuda': + import torch + result["environ"]["device"] = torch.cuda.get_device_name() + with open(args.output, 'w') as f: + json.dump(result, f, indent=4) diff --git a/userbenchmark/utils.py b/userbenchmark/utils.py index 25b3cf3664..c0e41167ba 100644 --- a/userbenchmark/utils.py +++ b/userbenchmark/utils.py @@ -4,10 +4,9 @@ from datetime import datetime, timedelta import time import json -import yaml from pathlib import Path from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Callable +from typing import Any, Dict, List, Optional, Callable, Union REPO_PATH = Path(os.path.abspath(__file__)).parent.parent USERBENCHMARK_OUTPUT_PREFIX = ".userbenchmark" @@ -38,10 +37,9 @@ def __exit__(self, exc_type, exc_value, traceback): @dataclass class TorchBenchABTestMetric: - control: float - treatment: float - delta: float - + control: Union[float, str] + treatment: Union[float, str] + delta: Union[float, str] @dataclass class TorchBenchABTestResult: