Added torchao nightly workflow (#2273)

Summary: X-link: pytorch/pytorch#128152 Add torchao benchmark workflow, upload the artifacts to GHA. Pull Request resolved: #2273 Test Plan: ``` python run_benchmark.py torchao --ci ``` Reviewed By: jerryzh168 Differential Revision: D58140479 Pulled By: xuzhao9 fbshipit-source-id: b274edb417f880df9b149bd5afc1acbe95737433
pytorch · Jun 6, 2024 · cfae89c · cfae89c
1 parent f7b4bcc
commit cfae89c
Show file tree

Hide file tree

Showing 11 changed files with 441 additions and 110 deletions.
diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml
@@ -0,0 +1,80 @@
+name: Torchao nightly workflow (A100)
+on:
+  workflow_dispatch:
+
+
+jobs:
+  run-benchmark:
+    environment: docker-s3-upload
+    env:
+      BASE_CONDA_ENV: "torchbench"
+      CONDA_ENV:  "torchao-nightly"
+      PLATFORM_NAME: "gcp_a100"
+      SETUP_SCRIPT: "/workspace/setup_instance.sh"
+      TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+      IS_GHA: 1
+      BUILD_ENVIRONMENT: benchmark-nightly
+    if: ${{ github.repository_owner == 'pytorch' }}
+    runs-on: [a100-runner]
+    timeout-minutes: 1440 # 24 hours
+    steps:
+      - name: Checkout TorchBench
+        uses: actions/checkout@v3
+        with:
+          path: benchmark
+      - name: Tune Nvidia GPU
+        run: |
+          sudo nvidia-smi -pm 1
+          sudo nvidia-smi -ac 1215,1410
+          nvidia-smi
+          sudo ldconfig
+      - name: Clone and setup conda env
+        run: |
+          CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
+          conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
+      - name: Run the torchao userbenchmark
+        env:
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
+        run: |
+          . "${SETUP_SCRIPT}"
+          set -x
+          # remove old results if exists
+          if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
+          pushd benchmark
+          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
+          # Install torchao
+          echo "Installing torchao"
+          pip uninstall -y torchao
+          python install.py --userbenchmark torchao
+          echo "Running the torchao userbenchmark"
+          python run_benchmark.py torchao --ci --dashboard
+      - name: Copy the benchmark logs to benchmark-output
+        if: always()
+        run: |
+          pushd benchmark
+          cp -r ./.userbenchmark/torchao ../benchmark-output
+      - name: Upload result to GH Actions Artifact
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: Torchao nightly result
+          path: benchmark-output/
+      - name: Copy artifact and upload to scribe and Amazon S3
+        env:
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
+        run: |
+          . "${SETUP_SCRIPT}"
+          pushd benchmark
+          # Upload the result json to Amazon S3
+          python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark torchao \
+                                                          --upload-path ../benchmark-output --match-filename "^torchao_.*\.csv"
+      - name: Clean up Conda env
+        if: always()
+        run: |
+          . "${SETUP_SCRIPT}"
+          conda deactivate && conda deactivate
+          conda remove -n "${CONDA_ENV}" --all
diff --git a/scripts/userbenchmark/upload_s3_csv.py b/scripts/userbenchmark/upload_s3_csv.py
@@ -0,0 +1,91 @@
+import argparse
+import sys
+import os
+import re
+from pathlib import Path
+from datetime import datetime
+
+REPO_ROOT = Path(__file__).parent.parent.parent.resolve()
+
+class add_path:
+    def __init__(self, path):
+        self.path = path
+
+    def __enter__(self):
+        sys.path.insert(0, self.path)
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        try:
+            sys.path.remove(self.path)
+        except ValueError:
+            pass
+
+
+with add_path(str(REPO_ROOT)):
+    from utils.s3_utils import (
+        S3Client,
+        USERBENCHMARK_S3_BUCKET,
+    )
+
+
+def upload_s3(s3_object: str, 
+              ub_name: str,
+              workflow_run_id: str,
+              workflow_run_attempt: str,
+              file_path: Path,
+              dryrun: bool):
+    """S3 path:
+    s3://ossci-metrics/<s3_object>/<ub_name>/<workflow_run_id>/<workflow_run_attempt>/file_name
+    """
+    s3client = S3Client(USERBENCHMARK_S3_BUCKET, s3_object)
+    prefix = f"{ub_name}/{workflow_run_id}/{workflow_run_attempt}"
+    print(f"Uploading to prefix: {prefix}")
+    if not dryrun:
+        s3client.upload_file(prefix=prefix, file_path=file_path)
+
+
+def _get_files_to_upload(file_path: str, match_filename: str):
+    filename_regex = re.compile(match_filename)
+    return [ file_name for file_name in os.listdir(file_path) if filename_regex.match(file_name) ]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--s3-prefix",
+        required=True,
+        help="S3 path prefix",
+    )
+    parser.add_argument(
+        "--userbenchmark",
+        required=True,
+        help="Name of the userbenchmark.",
+    )
+    parser.add_argument(
+        "--upload-path",
+        required=True,
+        help="Local directory contains files to upload.",
+    )
+    parser.add_argument(
+        "--match-filename",
+        required=True,
+        help="Filename regex matched to upload.",
+    )
+    parser.add_argument(
+        "--dryrun",
+        action="store_true",
+        help="Dryrun the upload",
+    )
+    args = parser.parse_args()
+
+    files_to_upload = _get_files_to_upload(args.upload_path, args.match_filename)
+    workflow_run_id = os.environ.get("WORKFLOW_RUN_ID", 0)
+    workflow_run_attempt = os.environ.get("WORKFLOW_RUN_ATTEMPT", 0)
+
+    for file in files_to_upload:
+        file_path = Path(args.upload_path).joinpath(file)
+        upload_s3(s3_object=args.s3_prefix, 
+                  ub_name=args.userbenchmark,
+                  workflow_run_id=workflow_run_id,
+                  workflow_run_attempt=workflow_run_attempt,
+                  file_path=file_path,
+                  dryrun=args.dryrun)
diff --git a/torchbenchmark/util/framework/huggingface/extended_configs.py b/torchbenchmark/util/framework/huggingface/extended_configs.py
@@ -1,44 +1,8 @@
 # Extended huggingface model configs from Dynamobench
 import importlib
 import logging
-import os
-from typing import List
 
 import torch
-from torchbenchmark import REPO_PATH
-
-DYNAMOBENCH_PATH = REPO_PATH.joinpath("userbenchmark", "dynamo", "dynamobench")
-# These models contain the models present in huggingface_models_list. It is a
-# combination of models supported by HF Fx parser and some manually supplied
-# models. For these models, we already know the largest batch size that can fit
-# on A100 GPUs - 40 GB.
-BATCH_SIZE_KNOWN_MODELS = dict()
-
-# Get the list of models and their batch sizes
-# Only load the extended models in OSS
-if hasattr(torch.version, "git_version"):
-    MODELS_FILENAME = os.path.join(DYNAMOBENCH_PATH, "huggingface_models_list.txt")
-else:
-    from libfb.py import parutil
-    MODELS_FILENAME = parutil.get_file_path("caffe2/benchmarks/dynamo/huggingface_models_list.txt")
-assert os.path.exists(MODELS_FILENAME)
-with open(MODELS_FILENAME, "r") as fh:
-    lines = fh.readlines()
-    lines = [line.rstrip() for line in lines]
-    for line in lines:
-        model_name, batch_size = line.split(",")
-        batch_size = int(batch_size)
-        BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size
-assert len(BATCH_SIZE_KNOWN_MODELS)
-
-
-def is_extended_huggingface_models(model_name: str) -> bool:
-    return model_name in BATCH_SIZE_KNOWN_MODELS
-
-
-def list_extended_huggingface_models() -> List[str]:
-    return list(BATCH_SIZE_KNOWN_MODELS.keys())
-
 
 imports = [
     "AlbertForPreTraining",
@@ -161,61 +125,6 @@ def list_extended_huggingface_models() -> List[str]:
     "tinynet_a",
 }
 
-# TODO - Fails even after fake tensors
-BATCH_SIZE_DIVISORS = {
-    "AlbertForMaskedLM": 2,
-    "AlbertForQuestionAnswering": 2,
-    "AllenaiLongformerBase": 2,
-    "BartForCausalLM": 2,
-    "BartForConditionalGeneration": 2,
-    "BertForMaskedLM": 2,
-    "BertForQuestionAnswering": 2,
-    "BlenderbotForCausalLM": 8,
-    # "BlenderbotForConditionalGeneration" : 16,
-    "BlenderbotSmallForCausalLM": 4,
-    "BlenderbotSmallForConditionalGeneration": 2,
-    "CamemBert": 2,
-    "DebertaForMaskedLM": 4,
-    "DebertaForQuestionAnswering": 2,
-    "DebertaV2ForMaskedLM": 4,
-    "DebertaV2ForQuestionAnswering": 8,
-    "DistilBertForMaskedLM": 2,
-    "DistilBertForQuestionAnswering": 2,
-    "DistillGPT2": 2,
-    "ElectraForCausalLM": 2,
-    "ElectraForQuestionAnswering": 2,
-    "GPT2ForSequenceClassification": 2,
-    # "GPTJForCausalLM" : 2,
-    # "GPTJForQuestionAnswering" : 2,
-    # "GPTNeoForCausalLM" : 32,
-    # "GPTNeoForSequenceClassification" : 2,
-    "GoogleFnet": 2,
-    "LayoutLMForMaskedLM": 2,
-    "LayoutLMForSequenceClassification": 2,
-    "M2M100ForConditionalGeneration": 4,
-    "MBartForCausalLM": 2,
-    "MBartForConditionalGeneration": 2,
-    "MT5ForConditionalGeneration": 2,
-    "MegatronBertForCausalLM": 4,
-    "MegatronBertForQuestionAnswering": 2,
-    "MobileBertForMaskedLM": 2,
-    "MobileBertForQuestionAnswering": 2,
-    "OPTForCausalLM": 2,
-    "PLBartForCausalLM": 2,
-    "PLBartForConditionalGeneration": 2,
-    "PegasusForCausalLM": 4,
-    "PegasusForConditionalGeneration": 2,
-    "RobertaForCausalLM": 2,
-    "RobertaForQuestionAnswering": 2,
-    "Speech2Text2ForCausalLM": 4,
-    "T5ForConditionalGeneration": 2,
-    "T5Small": 2,
-    "TrOCRForCausalLM": 2,
-    "XGLMForCausalLM": 4,
-    "XLNetLMHeadModel": 2,
-    "YituTechConvBert": 2,
-}
-
 try:
     EXTRA_MODELS = {
         "AllenaiLongformerBase": (

diff --git a/torchbenchmark/util/framework/huggingface/list_extended_configs.py b/torchbenchmark/util/framework/huggingface/list_extended_configs.py
@@ -0,0 +1,91 @@
+import torch
+import os
+from torchbenchmark import REPO_PATH
+
+from typing import List
+
+DYNAMOBENCH_PATH = REPO_PATH.joinpath("userbenchmark", "dynamo", "dynamobench")
+
+# These models contain the models present in huggingface_models_list. It is a
+# combination of models supported by HF Fx parser and some manually supplied
+# models. For these models, we already know the largest batch size that can fit
+# on A100 GPUs - 40 GB.
+BATCH_SIZE_KNOWN_MODELS = dict()
+
+# Get the list of models and their batch sizes
+# Only load the extended models in OSS
+if hasattr(torch.version, "git_version"):
+    MODELS_FILENAME = os.path.join(DYNAMOBENCH_PATH, "huggingface_models_list.txt")
+else:
+    from libfb.py import parutil
+    MODELS_FILENAME = parutil.get_file_path("caffe2/benchmarks/dynamo/huggingface_models_list.txt")
+assert os.path.exists(MODELS_FILENAME)
+with open(MODELS_FILENAME, "r") as fh:
+    lines = fh.readlines()
+    lines = [line.rstrip() for line in lines]
+    for line in lines:
+        model_name, batch_size = line.split(",")
+        batch_size = int(batch_size)
+        BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size
+assert len(BATCH_SIZE_KNOWN_MODELS)
+
+def is_extended_huggingface_models(model_name: str) -> bool:
+    return model_name in BATCH_SIZE_KNOWN_MODELS
+
+def list_extended_huggingface_models() -> List[str]:
+    return list(BATCH_SIZE_KNOWN_MODELS.keys())
+
+# TODO - Fails even after fake tensors
+BATCH_SIZE_DIVISORS = {
+    "AlbertForMaskedLM": 2,
+    "AlbertForQuestionAnswering": 2,
+    "AllenaiLongformerBase": 2,
+    "BartForCausalLM": 2,
+    "BartForConditionalGeneration": 2,
+    "BertForMaskedLM": 2,
+    "BertForQuestionAnswering": 2,
+    "BlenderbotForCausalLM": 8,
+    # "BlenderbotForConditionalGeneration" : 16,
+    "BlenderbotSmallForCausalLM": 4,
+    "BlenderbotSmallForConditionalGeneration": 2,
+    "CamemBert": 2,
+    "DebertaForMaskedLM": 4,
+    "DebertaForQuestionAnswering": 2,
+    "DebertaV2ForMaskedLM": 4,
+    "DebertaV2ForQuestionAnswering": 8,
+    "DistilBertForMaskedLM": 2,
+    "DistilBertForQuestionAnswering": 2,
+    "DistillGPT2": 2,
+    "ElectraForCausalLM": 2,
+    "ElectraForQuestionAnswering": 2,
+    "GPT2ForSequenceClassification": 2,
+    # "GPTJForCausalLM" : 2,
+    # "GPTJForQuestionAnswering" : 2,
+    # "GPTNeoForCausalLM" : 32,
+    # "GPTNeoForSequenceClassification" : 2,
+    "GoogleFnet": 2,
+    "LayoutLMForMaskedLM": 2,
+    "LayoutLMForSequenceClassification": 2,
+    "M2M100ForConditionalGeneration": 4,
+    "MBartForCausalLM": 2,
+    "MBartForConditionalGeneration": 2,
+    "MT5ForConditionalGeneration": 2,
+    "MegatronBertForCausalLM": 4,
+    "MegatronBertForQuestionAnswering": 2,
+    "MobileBertForMaskedLM": 2,
+    "MobileBertForQuestionAnswering": 2,
+    "OPTForCausalLM": 2,
+    "PLBartForCausalLM": 2,
+    "PLBartForConditionalGeneration": 2,
+    "PegasusForCausalLM": 4,
+    "PegasusForConditionalGeneration": 2,
+    "RobertaForCausalLM": 2,
+    "RobertaForQuestionAnswering": 2,
+    "Speech2Text2ForCausalLM": 4,
+    "T5ForConditionalGeneration": 2,
+    "T5Small": 2,
+    "TrOCRForCausalLM": 2,
+    "XGLMForCausalLM": 4,
+    "XLNetLMHeadModel": 2,
+    "YituTechConvBert": 2,
+}
diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py
@@ -12,7 +12,7 @@
 from transformers import GenerationConfig
 
 from .basic_configs import is_basic_huggingface_models
-from .extended_configs import (
+from .list_extended_configs import (
     BATCH_SIZE_DIVISORS,
     BATCH_SIZE_KNOWN_MODELS,
     is_extended_huggingface_models,

diff --git a/userbenchmark/dynamo/dynamobench/common.py b/userbenchmark/dynamo/dynamobench/common.py
@@ -3974,9 +3974,12 @@ def run(runner, args, original_dir=None):
             assert "cuda" in args.devices, "Quantization requires CUDA device."
             assert args.bfloat16, "Quantization requires dtype bfloat16."
             try:
-                from .torchao_backend import setup_baseline, torchao_optimize_ctx
-            except ImportError:
                 from torchao_backend import setup_baseline, torchao_optimize_ctx
+            except ImportError:
+                from userbenchmark.dynamo.dynamobench.torchao_backend import (
+                    setup_baseline,
+                    torchao_optimize_ctx,
+                )
 
             setup_baseline()
             baseline_ctx = functools.partial(