-
Notifications
You must be signed in to change notification settings - Fork 278
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added torchao nightly workflow (#2273)
Summary: X-link: pytorch/pytorch#128152 Add torchao benchmark workflow, upload the artifacts to GHA. Pull Request resolved: #2273 Test Plan: ``` python run_benchmark.py torchao --ci ``` Reviewed By: jerryzh168 Differential Revision: D58140479 Pulled By: xuzhao9 fbshipit-source-id: b274edb417f880df9b149bd5afc1acbe95737433
- Loading branch information
1 parent
f7b4bcc
commit cfae89c
Showing
11 changed files
with
441 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
name: Torchao nightly workflow (A100) | ||
on: | ||
workflow_dispatch: | ||
|
||
|
||
jobs: | ||
run-benchmark: | ||
environment: docker-s3-upload | ||
env: | ||
BASE_CONDA_ENV: "torchbench" | ||
CONDA_ENV: "torchao-nightly" | ||
PLATFORM_NAME: "gcp_a100" | ||
SETUP_SCRIPT: "/workspace/setup_instance.sh" | ||
TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }} | ||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
IS_GHA: 1 | ||
BUILD_ENVIRONMENT: benchmark-nightly | ||
if: ${{ github.repository_owner == 'pytorch' }} | ||
runs-on: [a100-runner] | ||
timeout-minutes: 1440 # 24 hours | ||
steps: | ||
- name: Checkout TorchBench | ||
uses: actions/checkout@v3 | ||
with: | ||
path: benchmark | ||
- name: Tune Nvidia GPU | ||
run: | | ||
sudo nvidia-smi -pm 1 | ||
sudo nvidia-smi -ac 1215,1410 | ||
nvidia-smi | ||
sudo ldconfig | ||
- name: Clone and setup conda env | ||
run: | | ||
CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" | ||
conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}" | ||
- name: Run the torchao userbenchmark | ||
env: | ||
WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} | ||
WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} | ||
run: | | ||
. "${SETUP_SCRIPT}" | ||
set -x | ||
# remove old results if exists | ||
if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi | ||
pushd benchmark | ||
if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi | ||
# Install torchao | ||
echo "Installing torchao" | ||
pip uninstall -y torchao | ||
python install.py --userbenchmark torchao | ||
echo "Running the torchao userbenchmark" | ||
python run_benchmark.py torchao --ci --dashboard | ||
- name: Copy the benchmark logs to benchmark-output | ||
if: always() | ||
run: | | ||
pushd benchmark | ||
cp -r ./.userbenchmark/torchao ../benchmark-output | ||
- name: Upload result to GH Actions Artifact | ||
uses: actions/upload-artifact@v3 | ||
if: always() | ||
with: | ||
name: Torchao nightly result | ||
path: benchmark-output/ | ||
- name: Copy artifact and upload to scribe and Amazon S3 | ||
env: | ||
WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} | ||
WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }} | ||
run: | | ||
. "${SETUP_SCRIPT}" | ||
pushd benchmark | ||
# Upload the result json to Amazon S3 | ||
python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark torchao \ | ||
--upload-path ../benchmark-output --match-filename "^torchao_.*\.csv" | ||
- name: Clean up Conda env | ||
if: always() | ||
run: | | ||
. "${SETUP_SCRIPT}" | ||
conda deactivate && conda deactivate | ||
conda remove -n "${CONDA_ENV}" --all |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import argparse | ||
import sys | ||
import os | ||
import re | ||
from pathlib import Path | ||
from datetime import datetime | ||
|
||
REPO_ROOT = Path(__file__).parent.parent.parent.resolve() | ||
|
||
class add_path: | ||
def __init__(self, path): | ||
self.path = path | ||
|
||
def __enter__(self): | ||
sys.path.insert(0, self.path) | ||
|
||
def __exit__(self, exc_type, exc_value, traceback): | ||
try: | ||
sys.path.remove(self.path) | ||
except ValueError: | ||
pass | ||
|
||
|
||
with add_path(str(REPO_ROOT)): | ||
from utils.s3_utils import ( | ||
S3Client, | ||
USERBENCHMARK_S3_BUCKET, | ||
) | ||
|
||
|
||
def upload_s3(s3_object: str, | ||
ub_name: str, | ||
workflow_run_id: str, | ||
workflow_run_attempt: str, | ||
file_path: Path, | ||
dryrun: bool): | ||
"""S3 path: | ||
s3://ossci-metrics/<s3_object>/<ub_name>/<workflow_run_id>/<workflow_run_attempt>/file_name | ||
""" | ||
s3client = S3Client(USERBENCHMARK_S3_BUCKET, s3_object) | ||
prefix = f"{ub_name}/{workflow_run_id}/{workflow_run_attempt}" | ||
print(f"Uploading to prefix: {prefix}") | ||
if not dryrun: | ||
s3client.upload_file(prefix=prefix, file_path=file_path) | ||
|
||
|
||
def _get_files_to_upload(file_path: str, match_filename: str): | ||
filename_regex = re.compile(match_filename) | ||
return [ file_name for file_name in os.listdir(file_path) if filename_regex.match(file_name) ] | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument( | ||
"--s3-prefix", | ||
required=True, | ||
help="S3 path prefix", | ||
) | ||
parser.add_argument( | ||
"--userbenchmark", | ||
required=True, | ||
help="Name of the userbenchmark.", | ||
) | ||
parser.add_argument( | ||
"--upload-path", | ||
required=True, | ||
help="Local directory contains files to upload.", | ||
) | ||
parser.add_argument( | ||
"--match-filename", | ||
required=True, | ||
help="Filename regex matched to upload.", | ||
) | ||
parser.add_argument( | ||
"--dryrun", | ||
action="store_true", | ||
help="Dryrun the upload", | ||
) | ||
args = parser.parse_args() | ||
|
||
files_to_upload = _get_files_to_upload(args.upload_path, args.match_filename) | ||
workflow_run_id = os.environ.get("WORKFLOW_RUN_ID", 0) | ||
workflow_run_attempt = os.environ.get("WORKFLOW_RUN_ATTEMPT", 0) | ||
|
||
for file in files_to_upload: | ||
file_path = Path(args.upload_path).joinpath(file) | ||
upload_s3(s3_object=args.s3_prefix, | ||
ub_name=args.userbenchmark, | ||
workflow_run_id=workflow_run_id, | ||
workflow_run_attempt=workflow_run_attempt, | ||
file_path=file_path, | ||
dryrun=args.dryrun) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 91 additions & 0 deletions
91
torchbenchmark/util/framework/huggingface/list_extended_configs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import torch | ||
import os | ||
from torchbenchmark import REPO_PATH | ||
|
||
from typing import List | ||
|
||
DYNAMOBENCH_PATH = REPO_PATH.joinpath("userbenchmark", "dynamo", "dynamobench") | ||
|
||
# These models contain the models present in huggingface_models_list. It is a | ||
# combination of models supported by HF Fx parser and some manually supplied | ||
# models. For these models, we already know the largest batch size that can fit | ||
# on A100 GPUs - 40 GB. | ||
BATCH_SIZE_KNOWN_MODELS = dict() | ||
|
||
# Get the list of models and their batch sizes | ||
# Only load the extended models in OSS | ||
if hasattr(torch.version, "git_version"): | ||
MODELS_FILENAME = os.path.join(DYNAMOBENCH_PATH, "huggingface_models_list.txt") | ||
else: | ||
from libfb.py import parutil | ||
MODELS_FILENAME = parutil.get_file_path("caffe2/benchmarks/dynamo/huggingface_models_list.txt") | ||
assert os.path.exists(MODELS_FILENAME) | ||
with open(MODELS_FILENAME, "r") as fh: | ||
lines = fh.readlines() | ||
lines = [line.rstrip() for line in lines] | ||
for line in lines: | ||
model_name, batch_size = line.split(",") | ||
batch_size = int(batch_size) | ||
BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size | ||
assert len(BATCH_SIZE_KNOWN_MODELS) | ||
|
||
def is_extended_huggingface_models(model_name: str) -> bool: | ||
return model_name in BATCH_SIZE_KNOWN_MODELS | ||
|
||
def list_extended_huggingface_models() -> List[str]: | ||
return list(BATCH_SIZE_KNOWN_MODELS.keys()) | ||
|
||
# TODO - Fails even after fake tensors | ||
BATCH_SIZE_DIVISORS = { | ||
"AlbertForMaskedLM": 2, | ||
"AlbertForQuestionAnswering": 2, | ||
"AllenaiLongformerBase": 2, | ||
"BartForCausalLM": 2, | ||
"BartForConditionalGeneration": 2, | ||
"BertForMaskedLM": 2, | ||
"BertForQuestionAnswering": 2, | ||
"BlenderbotForCausalLM": 8, | ||
# "BlenderbotForConditionalGeneration" : 16, | ||
"BlenderbotSmallForCausalLM": 4, | ||
"BlenderbotSmallForConditionalGeneration": 2, | ||
"CamemBert": 2, | ||
"DebertaForMaskedLM": 4, | ||
"DebertaForQuestionAnswering": 2, | ||
"DebertaV2ForMaskedLM": 4, | ||
"DebertaV2ForQuestionAnswering": 8, | ||
"DistilBertForMaskedLM": 2, | ||
"DistilBertForQuestionAnswering": 2, | ||
"DistillGPT2": 2, | ||
"ElectraForCausalLM": 2, | ||
"ElectraForQuestionAnswering": 2, | ||
"GPT2ForSequenceClassification": 2, | ||
# "GPTJForCausalLM" : 2, | ||
# "GPTJForQuestionAnswering" : 2, | ||
# "GPTNeoForCausalLM" : 32, | ||
# "GPTNeoForSequenceClassification" : 2, | ||
"GoogleFnet": 2, | ||
"LayoutLMForMaskedLM": 2, | ||
"LayoutLMForSequenceClassification": 2, | ||
"M2M100ForConditionalGeneration": 4, | ||
"MBartForCausalLM": 2, | ||
"MBartForConditionalGeneration": 2, | ||
"MT5ForConditionalGeneration": 2, | ||
"MegatronBertForCausalLM": 4, | ||
"MegatronBertForQuestionAnswering": 2, | ||
"MobileBertForMaskedLM": 2, | ||
"MobileBertForQuestionAnswering": 2, | ||
"OPTForCausalLM": 2, | ||
"PLBartForCausalLM": 2, | ||
"PLBartForConditionalGeneration": 2, | ||
"PegasusForCausalLM": 4, | ||
"PegasusForConditionalGeneration": 2, | ||
"RobertaForCausalLM": 2, | ||
"RobertaForQuestionAnswering": 2, | ||
"Speech2Text2ForCausalLM": 4, | ||
"T5ForConditionalGeneration": 2, | ||
"T5Small": 2, | ||
"TrOCRForCausalLM": 2, | ||
"XGLMForCausalLM": 4, | ||
"XLNetLMHeadModel": 2, | ||
"YituTechConvBert": 2, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.