Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve bisection routine by creating GitHub issue only when the bisection is successful #2050

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/v3-bisection.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ jobs:
--torchbench-repo-path "${PWD}" --config "${BISECT_WORKDIR}/regression-${REGRESSION_DATE}.yaml" \
--output "${BISECT_WORKDIR}/bisect-output-gh${GITHUB_RUN_ID}.json"
cp -r "${BISECT_WORKDIR}" ../bisection-result
- name: Create the github issue
continue-on-error: true
if: env.TORCHBENCH_BISECTION_COMMIT_FOUND
uses: peter-evans/create-issue-from-file@v4
with:
title: V3 Performance Signal Detected by TorchBench Userbenchmark "torch-nightly" on ${{ env.TORCHBENCH_BISECTION_COMMIT_FOUND }}
content-filepath: ./benchmark/gh-issue.md
labels: |
torchbench-perf-report
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v3
Expand Down
19 changes: 9 additions & 10 deletions .github/workflows/v3-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,6 @@ jobs:
done
rm -r ../benchmark-output || true
cp -r ./.userbenchmark/torch-nightly ../benchmark-output
- name: Create the github issue
continue-on-error: true
if: env.TORCHBENCH_REGRESSION_DETECTED
uses: peter-evans/create-issue-from-file@v4
with:
title: V3 Performance Signal Detected by TorchBench Userbenchmark "torch-nightly" on ${{ env.TORCHBENCH_REGRESSION_DETECTED }}
content-filepath: ./benchmark/gh-issue.md
labels: |
torchbench-perf-report
- name: Copy artifact and upload to scribe and Amazon S3
run: |
. "${SETUP_SCRIPT}"
Expand All @@ -89,14 +80,22 @@ jobs:
python ./scripts/userbenchmark/upload_scribe.py --userbenchmark_json "${LATEST_RESULT}" --userbenchmark_platform "${PLATFORM_NAME}"
# Upload the result json to Amazon S3
python ./scripts/userbenchmark/upload_s3.py --upload-file "${LATEST_RESULT}" --userbenchmark_platform "${PLATFORM_NAME}"
- name: Copy regression results to Amazon S3
- name: Copy regression results to Amazon S3 and kick off bisection
if: env.TORCHBENCH_REGRESSION_DETECTED
run: |
. "${SETUP_SCRIPT}"
pushd benchmark
LATEST_REGRESSION_RESULT=$(find ../benchmark-output/ -name "regression-*.yaml" | sort -r | head -1)
# Upload the regression json to Amazon S3
python ./scripts/userbenchmark/upload_s3.py --upload-file "${LATEST_REGRESSION_RESULT}" --userbenchmark_platform "${PLATFORM_NAME}"
# Get the workflow ID from
# https://api.github.com/repos/pytorch/benchmark/actions/workflows
# And dispatch the bisection workflow
curl -u xuzhao9:${{ secrets.TORCHBENCH_ACCESS_TOKEN }} \
-X POST \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/pytorch/benchmark/actions/workflows/57994037/dispatches \
-d '{"ref": "main", "inputs": {"regression_date": "${{ env.TORCHBENCH_REGRESSION_DETECTED }}" } }'
- name: Upload result to GH Actions Artifact
uses: actions/upload-artifact@v3
with:
Expand Down
30 changes: 23 additions & 7 deletions bisection.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
TorchRepo,
)
from utils.cuda_utils import DEFAULT_CUDA_VERSION, prepare_cuda_env

from utils.github import process_bisection_into_gh_issue
IS_FBCODE = False
except (ImportError, ModuleNotFoundError):
# Meta-Internal imports
Expand Down Expand Up @@ -173,21 +173,26 @@ def __str__(self):

class BisectionTargetRepo:
repo: TorchRepo
# Start and end git hash
start: str
end: str
# Start and end version
start_version: str
end_version: str
non_target_repos: List[TorchRepo]
# generated in prep()
bisection_env: os._Environ
commits: List[Commit]
# Map from commit SHA to its index in commits
commit_dict: Dict[str, int]

def __init__(
self, repo: TorchRepo, start: str, end: str, non_target_repos: List[TorchRepo]
):
def __init__(self, repo: TorchRepo, start: str, end: str,
start_version: str, end_version: str,
non_target_repos: List[TorchRepo]):
self.repo = repo
self.start = start
self.end = end
self.start_version = start_version
self.end_version = end_version
self.non_target_repos = non_target_repos
self.commits = []
self.commit_dict = dict()
Expand Down Expand Up @@ -488,7 +493,9 @@ def output(self):
json_obj = dict()
json_obj["target_repo"] = self.target_repo.repo.name
json_obj["start"] = self.target_repo.start
json_obj["start_version"] = self.target_repo.start_version
json_obj["end"] = self.target_repo.end
json_obj["end_version"] = self.target_repo.end_version
json_obj["result"] = []
for res in self.result:
r = dict()
Expand All @@ -501,7 +508,7 @@ def output(self):
json_obj["result"].append(r)
with open(self.output_json, "w") as outfile:
json.dump(json_obj, outfile, indent=2)
print(f"Bisection successful. Result saved to {self.output_json}:")
print(f"Bisection successful. Result saved to {self.output_json}.")
print(json_obj)


Expand Down Expand Up @@ -544,6 +551,11 @@ def main() -> None:
default="torchbench",
help="Repositories to skip update.",
)
parser.add_argument(
"--gh-issue-path",
default="gh-issue.md",
help="Output path to print the issue body"
)
# by default, debug mode is disabled
parser.add_argument(
"--debug",
Expand Down Expand Up @@ -610,6 +622,8 @@ def main() -> None:
target_repo=target_repo,
start=start_hash,
end=end_hash,
start_version=bisect_config.control_env.get("pytorch_version", "N/A"),
end_version=bisect_config.treatment_env.get("pytorch_version", "N/A"),
bisect_config=bisect_config,
output_json=args.output,
debug=args.debug,
Expand All @@ -625,7 +639,9 @@ def main() -> None:
)
bisection.run()
bisection.output()

# Format the output into a github issue if the bisector finds the root cause commit
if bisection.result:
process_bisection_into_gh_issue(bisection.output_json, args.gh_issue_path)

if __name__ == "__main__":
main() # pragma: no cover
38 changes: 29 additions & 9 deletions regression_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
import importlib
from dataclasses import asdict
import os
import re
import yaml
from pathlib import Path
import time
from datetime import datetime
from typing import Any, List, Dict, Optional
from typing import Any, List, Dict, Tuple, Optional
from userbenchmark.utils import PLATFORMS, USERBENCHMARK_OUTPUT_PREFIX, REPO_PATH, \
TorchBenchABTestResult, get_date_from_metrics, \
get_ub_name, get_latest_files_in_s3_from_last_n_days, get_date_from_metrics_s3_key
Expand All @@ -19,17 +20,19 @@
GITHUB_ISSUE_TEMPLATE = """
TorchBench CI has detected a performance signal or runtime regression.

Base PyTorch commit: {start}
Control PyTorch commit: {control_commit}
Control PyTorch version: {control_version}

Affected PyTorch commit: {end}
Treatment PyTorch commit: {treatment_commit}
Treatment PyTorch version: {treatment_version}

Affected Tests:
{test_details}

Tests that were no longer run on affected commit:
Tests that were no longer run on treatment commit:
{control_only_tests}

Tests that were newly added on affected commit:
Tests that were newly added on treatment commit:
{treatment_only_tests}

Runtime regressions found?
Expand Down Expand Up @@ -103,6 +106,15 @@ def process_regressions_into_yaml(regression_result: TorchBenchABTestResult, out


def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult, owner: str, output_path: str, errors_path: str) -> None:
def _parse_date_from_pytorch_version(pytorch_version: str) -> Optional[str]:
# example pytorch nightly version: "2.2.0.dev20231116+cu118"
# return a date string like "2023-11-16"
ver_regex = "dev[0-9+]\+"
s = re.search(ver_regex, pytorch_version)
if not s or not s.groups():
return None
return datetime.strftime(datetime.strptime(s.groups[0], "%Y%m%d"), "%Y-%m-%d")

regressions_dict = asdict(regression_result)
troubled_tests = ""
for test, stats in regressions_dict["details"].items():
Expand All @@ -122,7 +134,9 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult,
treatment_only_tests += f"- {test}: {stat}\n"

control_commit = regressions_dict["control_env"]["pytorch_git_version"]
control_version = regressions_dict["control_env"]["pytorch_version"]
treatment_commit = regressions_dict["treatment_env"]["pytorch_git_version"]
treatment_version = regressions_dict["treatment_env"]["pytorch_version"]

runtime_regressions_msg = "No runtime errors were found in the " + \
"new benchmarks run--you are all good there!"
Expand All @@ -138,7 +152,11 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult,

if "GITHUB_ENV" in os.environ:
fname = os.environ["GITHUB_ENV"]
content = f"TORCHBENCH_REGRESSION_DETECTED='{treatment_commit}'\n"
treatment_date = _parse_date_from_pytorch_version(treatment_version)
# If can't parse the version date from pytorch version, use today
if not treatment_date:
treatment_date = datetime.today().strftime("%Y-%m-%d")
content = f"TORCHBENCH_REGRESSION_DETECTED='{treatment_date}'\n"
with open(fname, 'a') as fo:
fo.write(content)

Expand All @@ -149,8 +167,10 @@ def process_regressions_into_gh_issue(regression_result: TorchBenchABTestResult,
github_run_url = f"https://github.com/pytorch/benchmark/actions/runs/{github_run_id}"

issue_config: Dict[str, str] = {
"start": control_commit,
"end": treatment_commit,
"control_commit": control_commit,
"treatment_commit": treatment_commit,
"control_version": control_version,
"treatment_version": treatment_version,
"test_details": troubled_tests,
"control_only_tests": control_only_tests,
"treatment_only_tests": treatment_only_tests,
Expand All @@ -174,7 +194,7 @@ def get_best_start_date(latest_metrics_jsons: List[str], end_date: datetime) ->
return None


def get_metrics_by_date(latest_metrics_jsons: List[str], pick_date: datetime):
def get_metrics_by_date(latest_metrics_jsons: List[str], pick_date: datetime) -> Tuple[Any, str]:
pick_metrics_json_key: Optional[str] = None
for metrics_json_key in latest_metrics_jsons:
metric_datetime = get_date_from_metrics_s3_key(metrics_json_key)
Expand Down
61 changes: 61 additions & 0 deletions utils/github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import json
import os

from typing import Dict

GITHUB_ISSUE_TEMPLATE = """
TorchBench CI has detected a performance signal or runtime regression, and bisected its result.

Control PyTorch commit: {control_commit}
Control PyTorch version: {control_version}

Treatment PyTorch commit: {treatment_commit}
Treatment PyTorch version: {treatment_version}

Bisection result:

```
{result}
```

cc {owner}
"""

DEFAULT_GH_ISSUE_OWNER = "@xuzhao9"

def process_bisection_into_gh_issue(bisection_output_json: str, output_path: str) -> None:
with open(bisection_output_json, "r") as fp:
bisection = json.load(fp)

result = json.dump(bisection, indent=4)
control_commit = bisection["start"]
control_version = bisection["start_version"]
treatment_commit = bisection["end"]
treatment_version = bisection["end_version"]

if "GITHUB_ENV" in os.environ:
fname = os.environ["GITHUB_ENV"]
content = f"TORCHBENCH_BISECTION_COMMIT_FOUND_OR_FAILED='{bisection.target_repo.end}'\n"
with open(fname, 'a') as fo:
fo.write(content)
process_bisection_into_gh_issue(bisection.output_json)

github_run_id = os.environ.get("GITHUB_RUN_ID", None)
github_run_url = "No URL found, please look for the failing action in " + \
"https://github.com/pytorch/benchmark/actions"
if github_run_id is not None:
github_run_url = f"https://github.com/pytorch/benchmark/actions/runs/{github_run_id}"

issue_config: Dict[str, str] = {
"control_commit": control_commit,
"treatment_commit": treatment_commit,
"control_version": control_version,
"treatment_version": treatment_version,
"result": result,
"github_run_url": github_run_url,
"owner": DEFAULT_GH_ISSUE_OWNER
}

issue_body = GITHUB_ISSUE_TEMPLATE.format(**issue_config)
with open(output_path, "w") as f:
f.write(issue_body)