Skip to content

Commit

Permalink
Added torchao nightly workflow (#2273)
Browse files Browse the repository at this point in the history
Summary:
X-link: pytorch/pytorch#128152

Add torchao benchmark workflow, upload the artifacts to GHA.

Pull Request resolved: #2273

Test Plan:
```
python run_benchmark.py torchao --ci
```

Reviewed By: jerryzh168

Differential Revision: D58140479

Pulled By: xuzhao9

fbshipit-source-id: b274edb417f880df9b149bd5afc1acbe95737433
  • Loading branch information
xuzhao9 authored and facebook-github-bot committed Jun 6, 2024
1 parent f7b4bcc commit cfae89c
Show file tree
Hide file tree
Showing 11 changed files with 441 additions and 110 deletions.
80 changes: 80 additions & 0 deletions .github/workflows/torchao.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
name: Torchao nightly workflow (A100)
on:
workflow_dispatch:


jobs:
run-benchmark:
environment: docker-s3-upload
env:
BASE_CONDA_ENV: "torchbench"
CONDA_ENV: "torchao-nightly"
PLATFORM_NAME: "gcp_a100"
SETUP_SCRIPT: "/workspace/setup_instance.sh"
TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
IS_GHA: 1
BUILD_ENVIRONMENT: benchmark-nightly
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: [a100-runner]
timeout-minutes: 1440 # 24 hours
steps:
- name: Checkout TorchBench
uses: actions/checkout@v3
with:
path: benchmark
- name: Tune Nvidia GPU
run: |
sudo nvidia-smi -pm 1
sudo nvidia-smi -ac 1215,1410
nvidia-smi
sudo ldconfig
- name: Clone and setup conda env
run: |
CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
- name: Run the torchao userbenchmark
env:
WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
run: |
. "${SETUP_SCRIPT}"
set -x
# remove old results if exists
if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
pushd benchmark
if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
# Install torchao
echo "Installing torchao"
pip uninstall -y torchao
python install.py --userbenchmark torchao
echo "Running the torchao userbenchmark"
python run_benchmark.py torchao --ci --dashboard
- name: Copy the benchmark logs to benchmark-output
if: always()
run: |
pushd benchmark
cp -r ./.userbenchmark/torchao ../benchmark-output
- name: Upload result to GH Actions Artifact
uses: actions/upload-artifact@v3
if: always()
with:
name: Torchao nightly result
path: benchmark-output/
- name: Copy artifact and upload to scribe and Amazon S3
env:
WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
run: |
. "${SETUP_SCRIPT}"
pushd benchmark
# Upload the result json to Amazon S3
python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark torchao \
--upload-path ../benchmark-output --match-filename "^torchao_.*\.csv"
- name: Clean up Conda env
if: always()
run: |
. "${SETUP_SCRIPT}"
conda deactivate && conda deactivate
conda remove -n "${CONDA_ENV}" --all
91 changes: 91 additions & 0 deletions scripts/userbenchmark/upload_s3_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import argparse
import sys
import os
import re
from pathlib import Path
from datetime import datetime

REPO_ROOT = Path(__file__).parent.parent.parent.resolve()

class add_path:
def __init__(self, path):
self.path = path

def __enter__(self):
sys.path.insert(0, self.path)

def __exit__(self, exc_type, exc_value, traceback):
try:
sys.path.remove(self.path)
except ValueError:
pass


with add_path(str(REPO_ROOT)):
from utils.s3_utils import (
S3Client,
USERBENCHMARK_S3_BUCKET,
)


def upload_s3(s3_object: str,
ub_name: str,
workflow_run_id: str,
workflow_run_attempt: str,
file_path: Path,
dryrun: bool):
"""S3 path:
s3://ossci-metrics/<s3_object>/<ub_name>/<workflow_run_id>/<workflow_run_attempt>/file_name
"""
s3client = S3Client(USERBENCHMARK_S3_BUCKET, s3_object)
prefix = f"{ub_name}/{workflow_run_id}/{workflow_run_attempt}"
print(f"Uploading to prefix: {prefix}")
if not dryrun:
s3client.upload_file(prefix=prefix, file_path=file_path)


def _get_files_to_upload(file_path: str, match_filename: str):
filename_regex = re.compile(match_filename)
return [ file_name for file_name in os.listdir(file_path) if filename_regex.match(file_name) ]

if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--s3-prefix",
required=True,
help="S3 path prefix",
)
parser.add_argument(
"--userbenchmark",
required=True,
help="Name of the userbenchmark.",
)
parser.add_argument(
"--upload-path",
required=True,
help="Local directory contains files to upload.",
)
parser.add_argument(
"--match-filename",
required=True,
help="Filename regex matched to upload.",
)
parser.add_argument(
"--dryrun",
action="store_true",
help="Dryrun the upload",
)
args = parser.parse_args()

files_to_upload = _get_files_to_upload(args.upload_path, args.match_filename)
workflow_run_id = os.environ.get("WORKFLOW_RUN_ID", 0)
workflow_run_attempt = os.environ.get("WORKFLOW_RUN_ATTEMPT", 0)

for file in files_to_upload:
file_path = Path(args.upload_path).joinpath(file)
upload_s3(s3_object=args.s3_prefix,
ub_name=args.userbenchmark,
workflow_run_id=workflow_run_id,
workflow_run_attempt=workflow_run_attempt,
file_path=file_path,
dryrun=args.dryrun)
91 changes: 0 additions & 91 deletions torchbenchmark/util/framework/huggingface/extended_configs.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,8 @@
# Extended huggingface model configs from Dynamobench
import importlib
import logging
import os
from typing import List

import torch
from torchbenchmark import REPO_PATH

DYNAMOBENCH_PATH = REPO_PATH.joinpath("userbenchmark", "dynamo", "dynamobench")
# These models contain the models present in huggingface_models_list. It is a
# combination of models supported by HF Fx parser and some manually supplied
# models. For these models, we already know the largest batch size that can fit
# on A100 GPUs - 40 GB.
BATCH_SIZE_KNOWN_MODELS = dict()

# Get the list of models and their batch sizes
# Only load the extended models in OSS
if hasattr(torch.version, "git_version"):
MODELS_FILENAME = os.path.join(DYNAMOBENCH_PATH, "huggingface_models_list.txt")
else:
from libfb.py import parutil
MODELS_FILENAME = parutil.get_file_path("caffe2/benchmarks/dynamo/huggingface_models_list.txt")
assert os.path.exists(MODELS_FILENAME)
with open(MODELS_FILENAME, "r") as fh:
lines = fh.readlines()
lines = [line.rstrip() for line in lines]
for line in lines:
model_name, batch_size = line.split(",")
batch_size = int(batch_size)
BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size
assert len(BATCH_SIZE_KNOWN_MODELS)


def is_extended_huggingface_models(model_name: str) -> bool:
return model_name in BATCH_SIZE_KNOWN_MODELS


def list_extended_huggingface_models() -> List[str]:
return list(BATCH_SIZE_KNOWN_MODELS.keys())


imports = [
"AlbertForPreTraining",
Expand Down Expand Up @@ -161,61 +125,6 @@ def list_extended_huggingface_models() -> List[str]:
"tinynet_a",
}

# TODO - Fails even after fake tensors
BATCH_SIZE_DIVISORS = {
"AlbertForMaskedLM": 2,
"AlbertForQuestionAnswering": 2,
"AllenaiLongformerBase": 2,
"BartForCausalLM": 2,
"BartForConditionalGeneration": 2,
"BertForMaskedLM": 2,
"BertForQuestionAnswering": 2,
"BlenderbotForCausalLM": 8,
# "BlenderbotForConditionalGeneration" : 16,
"BlenderbotSmallForCausalLM": 4,
"BlenderbotSmallForConditionalGeneration": 2,
"CamemBert": 2,
"DebertaForMaskedLM": 4,
"DebertaForQuestionAnswering": 2,
"DebertaV2ForMaskedLM": 4,
"DebertaV2ForQuestionAnswering": 8,
"DistilBertForMaskedLM": 2,
"DistilBertForQuestionAnswering": 2,
"DistillGPT2": 2,
"ElectraForCausalLM": 2,
"ElectraForQuestionAnswering": 2,
"GPT2ForSequenceClassification": 2,
# "GPTJForCausalLM" : 2,
# "GPTJForQuestionAnswering" : 2,
# "GPTNeoForCausalLM" : 32,
# "GPTNeoForSequenceClassification" : 2,
"GoogleFnet": 2,
"LayoutLMForMaskedLM": 2,
"LayoutLMForSequenceClassification": 2,
"M2M100ForConditionalGeneration": 4,
"MBartForCausalLM": 2,
"MBartForConditionalGeneration": 2,
"MT5ForConditionalGeneration": 2,
"MegatronBertForCausalLM": 4,
"MegatronBertForQuestionAnswering": 2,
"MobileBertForMaskedLM": 2,
"MobileBertForQuestionAnswering": 2,
"OPTForCausalLM": 2,
"PLBartForCausalLM": 2,
"PLBartForConditionalGeneration": 2,
"PegasusForCausalLM": 4,
"PegasusForConditionalGeneration": 2,
"RobertaForCausalLM": 2,
"RobertaForQuestionAnswering": 2,
"Speech2Text2ForCausalLM": 4,
"T5ForConditionalGeneration": 2,
"T5Small": 2,
"TrOCRForCausalLM": 2,
"XGLMForCausalLM": 4,
"XLNetLMHeadModel": 2,
"YituTechConvBert": 2,
}

try:
EXTRA_MODELS = {
"AllenaiLongformerBase": (
Expand Down
91 changes: 91 additions & 0 deletions torchbenchmark/util/framework/huggingface/list_extended_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import torch
import os
from torchbenchmark import REPO_PATH

from typing import List

DYNAMOBENCH_PATH = REPO_PATH.joinpath("userbenchmark", "dynamo", "dynamobench")

# These models contain the models present in huggingface_models_list. It is a
# combination of models supported by HF Fx parser and some manually supplied
# models. For these models, we already know the largest batch size that can fit
# on A100 GPUs - 40 GB.
BATCH_SIZE_KNOWN_MODELS = dict()

# Get the list of models and their batch sizes
# Only load the extended models in OSS
if hasattr(torch.version, "git_version"):
MODELS_FILENAME = os.path.join(DYNAMOBENCH_PATH, "huggingface_models_list.txt")
else:
from libfb.py import parutil
MODELS_FILENAME = parutil.get_file_path("caffe2/benchmarks/dynamo/huggingface_models_list.txt")
assert os.path.exists(MODELS_FILENAME)
with open(MODELS_FILENAME, "r") as fh:
lines = fh.readlines()
lines = [line.rstrip() for line in lines]
for line in lines:
model_name, batch_size = line.split(",")
batch_size = int(batch_size)
BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size
assert len(BATCH_SIZE_KNOWN_MODELS)

def is_extended_huggingface_models(model_name: str) -> bool:
return model_name in BATCH_SIZE_KNOWN_MODELS

def list_extended_huggingface_models() -> List[str]:
return list(BATCH_SIZE_KNOWN_MODELS.keys())

# TODO - Fails even after fake tensors
BATCH_SIZE_DIVISORS = {
"AlbertForMaskedLM": 2,
"AlbertForQuestionAnswering": 2,
"AllenaiLongformerBase": 2,
"BartForCausalLM": 2,
"BartForConditionalGeneration": 2,
"BertForMaskedLM": 2,
"BertForQuestionAnswering": 2,
"BlenderbotForCausalLM": 8,
# "BlenderbotForConditionalGeneration" : 16,
"BlenderbotSmallForCausalLM": 4,
"BlenderbotSmallForConditionalGeneration": 2,
"CamemBert": 2,
"DebertaForMaskedLM": 4,
"DebertaForQuestionAnswering": 2,
"DebertaV2ForMaskedLM": 4,
"DebertaV2ForQuestionAnswering": 8,
"DistilBertForMaskedLM": 2,
"DistilBertForQuestionAnswering": 2,
"DistillGPT2": 2,
"ElectraForCausalLM": 2,
"ElectraForQuestionAnswering": 2,
"GPT2ForSequenceClassification": 2,
# "GPTJForCausalLM" : 2,
# "GPTJForQuestionAnswering" : 2,
# "GPTNeoForCausalLM" : 32,
# "GPTNeoForSequenceClassification" : 2,
"GoogleFnet": 2,
"LayoutLMForMaskedLM": 2,
"LayoutLMForSequenceClassification": 2,
"M2M100ForConditionalGeneration": 4,
"MBartForCausalLM": 2,
"MBartForConditionalGeneration": 2,
"MT5ForConditionalGeneration": 2,
"MegatronBertForCausalLM": 4,
"MegatronBertForQuestionAnswering": 2,
"MobileBertForMaskedLM": 2,
"MobileBertForQuestionAnswering": 2,
"OPTForCausalLM": 2,
"PLBartForCausalLM": 2,
"PLBartForConditionalGeneration": 2,
"PegasusForCausalLM": 4,
"PegasusForConditionalGeneration": 2,
"RobertaForCausalLM": 2,
"RobertaForQuestionAnswering": 2,
"Speech2Text2ForCausalLM": 4,
"T5ForConditionalGeneration": 2,
"T5Small": 2,
"TrOCRForCausalLM": 2,
"XGLMForCausalLM": 4,
"XLNetLMHeadModel": 2,
"YituTechConvBert": 2,
}
2 changes: 1 addition & 1 deletion torchbenchmark/util/framework/huggingface/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from transformers import GenerationConfig

from .basic_configs import is_basic_huggingface_models
from .extended_configs import (
from .list_extended_configs import (
BATCH_SIZE_DIVISORS,
BATCH_SIZE_KNOWN_MODELS,
is_extended_huggingface_models,
Expand Down
7 changes: 5 additions & 2 deletions userbenchmark/dynamo/dynamobench/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3974,9 +3974,12 @@ def run(runner, args, original_dir=None):
assert "cuda" in args.devices, "Quantization requires CUDA device."
assert args.bfloat16, "Quantization requires dtype bfloat16."
try:
from .torchao_backend import setup_baseline, torchao_optimize_ctx
except ImportError:
from torchao_backend import setup_baseline, torchao_optimize_ctx
except ImportError:
from userbenchmark.dynamo.dynamobench.torchao_backend import (
setup_baseline,
torchao_optimize_ctx,
)

setup_baseline()
baseline_ctx = functools.partial(
Expand Down
Loading

0 comments on commit cfae89c

Please sign in to comment.