diff --git a/.github/workflows/userbenchmark-a100-bisection.yml b/.github/workflows/userbenchmark-a100-bisection.yml new file mode 100644 index 0000000000..dbcd5435e6 --- /dev/null +++ b/.github/workflows/userbenchmark-a100-bisection.yml @@ -0,0 +1,101 @@ +name: TorchBench A100 bisection +on: + workflow_dispatch: + inputs: + start_commit: + description: "Start PyTorch commit hash" + required: true + end_commit: + description: "End PyTorch commit hash" + required: true + userbenchmark: + description: "Userbenchmark name" + required: true + userbenchmark_args: + description: "Userbenchmark arguments" + required: true + +jobs: + bisection: + environment: docker-s3-upload + env: + BASE_CONDA_ENV: "torchbench" + CONDA_ENV: "bisection-ci-a100" + PLATFORM_NAME: "gcp_a100" + SETUP_SCRIPT: "/workspace/setup_instance.sh" + BISECT_WORKDIR: ".userbenchmark/${{ github.env.userbenchmark }}/bisection" + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + if: ${{ github.repository_owner == 'pytorch' }} + runs-on: [self-hosted, a100-runner] + timeout-minutes: 2880 # 48 hours + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + path: benchmark + - name: Checkout pytorch + uses: actions/checkout@v3 + with: + repository: pytorch/pytorch + path: srcs/pytorch + fetch-depth: 0 + - name: Checkout torchvision + uses: actions/checkout@v3 + with: + repository: pytorch/vision + path: srcs/vision + fetch-depth: 0 + - name: Checkout torchaudio + uses: actions/checkout@v3 + with: + repository: pytorch/audio + path: srcs/audio + fetch-depth: 0 + - name: Tune Nvidia GPU + run: | + sudo nvidia-smi -pm 1 + sudo nvidia-smi -ac 1215,1410 + nvidia-smi + - name: Install Deps + run: | + sudo apt-get -y update && sudo apt -y update + - name: Setup conda env + run: | + CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" + cd benchmark + python ./utils/python_utils.py --create-conda-env "${CONDA_ENV}" + - name: Setup bisection environment + run: | + . "${SETUP_SCRIPT}"; cd benchmark + python utils/cuda_utils.py --install-torch-build-deps + python utils/cuda_utils.py --install-torchbench-deps + mkdir -p "${BISECT_WORKDIR}" + python utils/cuda_utils.py --install-torch-nightly + python run_benchmark.py ${{ github.event.inputs.userbenchmark }} ${{ github.event.inputs.userbenchmark_args }} --dryrun \ + --output "${BISECT_WORKDIR}/metric-control.json" + python run_benchmark.py ${{ github.event.inputs.userbenchmark }} ${{ github.event.inputs.userbenchmark_args }} --dryrun \ + --output "${BISECT_WORKDIR}/metric-treatment.json" + python regression_detector.py \ + --control "${BISECT_WORKDIR}/metrics-control.json" --treatment "${BISECT_WORKDIR}/metrics-treatment.json" \ + --output "${BISECT_WORKDIR}/regression-gh${GITHUB_RUN_ID}.yaml" + pip uninstall -y torch torchvision torchaudio torch_tensorrt + - name: Bisection + run: | + . "${SETUP_SCRIPT}"; cd benchmark + python bisection.py --work-dir "${BISECT_WORKDIR}" --torch-repos-path "${PWD}/../srcs" \ + --torchbench-repo-path "${PWD}" --config "${BISECT_WORKDIR}/regression-gh${GITHUB_RUN_ID}.yaml" \ + --output "${BISECT_WORKDIR}/bisect-output-gh${GITHUB_RUN_ID}.json" + cp -r "${BISECT_WORKDIR}" ../bisection-result + - name: Upload artifact + if: always() + uses: actions/upload-artifact@v3 + with: + name: Bisection result + path: bisection-result/ + - name: Clean up Conda env + if: always() + run: | + . "${SETUP_SCRIPT}" + conda deactivate && conda deactivate + conda remove -n "${CONDA_ENV}" --all diff --git a/regression_detector.py b/regression_detector.py index ca6110b1bc..7a0cbe535a 100644 --- a/regression_detector.py +++ b/regression_detector.py @@ -104,9 +104,11 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult, troubled_tests = "" for test, stats in regressions_dict["details"].items(): delta = stats["delta"] - if delta != 0: + if not isinstance(delta, str): sign = "+" if delta > 0 else "" troubled_tests += f"- {test}: {sign}{delta:.5%}\n" + else: + troubled_tests += f"- {test}: {delta}\n" control_only_tests = "" for test, stat in regressions_dict["control_only_metrics"].items(): diff --git a/userbenchmark/test_bench/regression_detector.py b/userbenchmark/test_bench/regression_detector.py index e69de29bb2..0f413b0e98 100644 --- a/userbenchmark/test_bench/regression_detector.py +++ b/userbenchmark/test_bench/regression_detector.py @@ -0,0 +1,32 @@ +from ..utils import TorchBenchABTestResult, TorchBenchABTestMetric +from . import BM_NAME + +DEFAULT_REGRESSION_DELTA_THRESHOLD = 0.07 + +def run(control, treatment) -> TorchBenchABTestResult: + control_env = control["environ"] + control_env["git_commit_hash"] = control["environ"]["pytorch_git_version"] + control_metrics = control["metrics"] + treatment_env = treatment["environ"] + treatment_env["git_commit_hash"] = treatment["environ"]["pytorch_git_version"] + treatment_metrics = treatment["metrics"] + details = {} + for metric_names in control_metrics.keys(): + control_metric = control_metrics[metric_names] + treatment_metric = treatment_metrics[metric_names] + if (isinstance(control_metric, str) or isinstance(treatment_metric, str)): + if control_metric == "skip_by_dryrun" or not control_metric == treatment_metric: + delta = f"{control_metric} -> {treatment_metric}" + details[metric_names] = TorchBenchABTestMetric(control=control_metric, treatment=treatment_metric, delta=delta) + else: + delta = (treatment_metric - control_metric) / control_metric + if abs(delta) > DEFAULT_REGRESSION_DELTA_THRESHOLD: + details[metric_names] = TorchBenchABTestMetric(control=control_metric, treatment=treatment_metric, delta=delta) + return TorchBenchABTestResult(name=BM_NAME, + control_env=control_env, \ + treatment_env=treatment_env, \ + details=details, \ + control_only_metrics={}, \ + treatment_only_metrics={}, \ + bisection="pytorch") + diff --git a/userbenchmark/test_bench/run.py b/userbenchmark/test_bench/run.py index 02d8229148..cd81195075 100644 --- a/userbenchmark/test_bench/run.py +++ b/userbenchmark/test_bench/run.py @@ -155,10 +155,9 @@ def run(args: List[str]): results[f"{config_str}, metric={metric}"] = metrics_dict[metric] except KeyboardInterrupt: print("User keyboard interrupted!") - if not args.dryrun: - result = get_output_json(BM_NAME, results) - if args.device == 'cuda': - import torch - result["environ"]["device"] = torch.cuda.get_device_name() - with open(args.output, 'w') as f: - json.dump(result, f, indent=4) + result = get_output_json(BM_NAME, results) + if args.device == 'cuda': + import torch + result["environ"]["device"] = torch.cuda.get_device_name() + with open(args.output, 'w') as f: + json.dump(result, f, indent=4) diff --git a/userbenchmark/utils.py b/userbenchmark/utils.py index 25b3cf3664..c0e41167ba 100644 --- a/userbenchmark/utils.py +++ b/userbenchmark/utils.py @@ -4,10 +4,9 @@ from datetime import datetime, timedelta import time import json -import yaml from pathlib import Path from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Callable +from typing import Any, Dict, List, Optional, Callable, Union REPO_PATH = Path(os.path.abspath(__file__)).parent.parent USERBENCHMARK_OUTPUT_PREFIX = ".userbenchmark" @@ -38,10 +37,9 @@ def __exit__(self, exc_type, exc_value, traceback): @dataclass class TorchBenchABTestMetric: - control: float - treatment: float - delta: float - + control: Union[float, str] + treatment: Union[float, str] + delta: Union[float, str] @dataclass class TorchBenchABTestResult: