From cfae89c5552329cba48722c52484ba25afb50ee7 Mon Sep 17 00:00:00 2001
From: Xu Zhao <xzhao9@meta.com>
Date: Thu, 6 Jun 2024 16:27:29 -0700
Subject: [PATCH] Added torchao nightly workflow (#2273)

Summary:
X-link: https://github.com/pytorch/pytorch/pull/128152

Add torchao benchmark workflow, upload the artifacts to GHA.

Pull Request resolved: https://github.com/pytorch/benchmark/pull/2273

Test Plan:
```
python run_benchmark.py torchao --ci
```

Reviewed By: jerryzh168

Differential Revision: D58140479

Pulled By: xuzhao9

fbshipit-source-id: b274edb417f880df9b149bd5afc1acbe95737433
---
 .github/workflows/torchao.yml                 | 80 ++++++++++++++++
 scripts/userbenchmark/upload_s3_csv.py        | 91 +++++++++++++++++++
 .../framework/huggingface/extended_configs.py | 91 -------------------
 .../huggingface/list_extended_configs.py      | 91 +++++++++++++++++++
 .../framework/huggingface/model_factory.py    |  2 +-
 userbenchmark/dynamo/dynamobench/common.py    |  7 +-
 userbenchmark/dynamo/run.py                   | 51 +++++++----
 userbenchmark/torchao/__init__.py             |  1 +
 userbenchmark/torchao/install.py              | 13 +++
 userbenchmark/torchao/run.py                  | 53 +++++++++++
 userbenchmark/torchao/upload.py               | 71 +++++++++++++++
 11 files changed, 441 insertions(+), 110 deletions(-)
 create mode 100644 .github/workflows/torchao.yml
 create mode 100644 scripts/userbenchmark/upload_s3_csv.py
 create mode 100644 torchbenchmark/util/framework/huggingface/list_extended_configs.py
 create mode 100644 userbenchmark/torchao/__init__.py
 create mode 100644 userbenchmark/torchao/install.py
 create mode 100644 userbenchmark/torchao/run.py
 create mode 100644 userbenchmark/torchao/upload.py

diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml
new file mode 100644
index 0000000000..f82b7fb0f2
--- /dev/null
+++ b/.github/workflows/torchao.yml
@@ -0,0 +1,80 @@
+name: Torchao nightly workflow (A100)
+on:
+  workflow_dispatch:
+
+
+jobs:
+  run-benchmark:
+    environment: docker-s3-upload
+    env:
+      BASE_CONDA_ENV: "torchbench"
+      CONDA_ENV:  "torchao-nightly"
+      PLATFORM_NAME: "gcp_a100"
+      SETUP_SCRIPT: "/workspace/setup_instance.sh"
+      TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+      IS_GHA: 1
+      BUILD_ENVIRONMENT: benchmark-nightly
+    if: ${{ github.repository_owner == 'pytorch' }}
+    runs-on: [a100-runner]
+    timeout-minutes: 1440 # 24 hours
+    steps:
+      - name: Checkout TorchBench
+        uses: actions/checkout@v3
+        with:
+          path: benchmark
+      - name: Tune Nvidia GPU
+        run: |
+          sudo nvidia-smi -pm 1
+          sudo nvidia-smi -ac 1215,1410
+          nvidia-smi
+          sudo ldconfig
+      - name: Clone and setup conda env
+        run: |
+          CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
+          conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
+      - name: Run the torchao userbenchmark
+        env:
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
+        run: |
+          . "${SETUP_SCRIPT}"
+          set -x
+          # remove old results if exists
+          if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
+          pushd benchmark
+          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
+          # Install torchao
+          echo "Installing torchao"
+          pip uninstall -y torchao
+          python install.py --userbenchmark torchao
+          echo "Running the torchao userbenchmark"
+          python run_benchmark.py torchao --ci --dashboard
+      - name: Copy the benchmark logs to benchmark-output
+        if: always()
+        run: |
+          pushd benchmark
+          cp -r ./.userbenchmark/torchao ../benchmark-output
+      - name: Upload result to GH Actions Artifact
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: Torchao nightly result
+          path: benchmark-output/
+      - name: Copy artifact and upload to scribe and Amazon S3
+        env:
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          WORKFLOW_RUN_ATTEMPT: ${{ github.event.workflow_run.run_attempt }}
+        run: |
+          . "${SETUP_SCRIPT}"
+          pushd benchmark
+          # Upload the result json to Amazon S3
+          python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark torchao \
+                                                          --upload-path ../benchmark-output --match-filename "^torchao_.*\.csv"
+      - name: Clean up Conda env
+        if: always()
+        run: |
+          . "${SETUP_SCRIPT}"
+          conda deactivate && conda deactivate
+          conda remove -n "${CONDA_ENV}" --all
diff --git a/scripts/userbenchmark/upload_s3_csv.py b/scripts/userbenchmark/upload_s3_csv.py
new file mode 100644
index 0000000000..27c49d04d8
--- /dev/null
+++ b/scripts/userbenchmark/upload_s3_csv.py
@@ -0,0 +1,91 @@
+import argparse
+import sys
+import os
+import re
+from pathlib import Path
+from datetime import datetime
+
+REPO_ROOT = Path(__file__).parent.parent.parent.resolve()
+
+class add_path:
+    def __init__(self, path):
+        self.path = path
+
+    def __enter__(self):
+        sys.path.insert(0, self.path)
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        try:
+            sys.path.remove(self.path)
+        except ValueError:
+            pass
+
+
+with add_path(str(REPO_ROOT)):
+    from utils.s3_utils import (
+        S3Client,
+        USERBENCHMARK_S3_BUCKET,
+    )
+
+
+def upload_s3(s3_object: str, 
+              ub_name: str,
+              workflow_run_id: str,
+              workflow_run_attempt: str,
+              file_path: Path,
+              dryrun: bool):
+    """S3 path:
+    s3://ossci-metrics/<s3_object>/<ub_name>/<workflow_run_id>/<workflow_run_attempt>/file_name
+    """
+    s3client = S3Client(USERBENCHMARK_S3_BUCKET, s3_object)
+    prefix = f"{ub_name}/{workflow_run_id}/{workflow_run_attempt}"
+    print(f"Uploading to prefix: {prefix}")
+    if not dryrun:
+        s3client.upload_file(prefix=prefix, file_path=file_path)
+
+
+def _get_files_to_upload(file_path: str, match_filename: str):
+    filename_regex = re.compile(match_filename)
+    return [ file_name for file_name in os.listdir(file_path) if filename_regex.match(file_name) ]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--s3-prefix",
+        required=True,
+        help="S3 path prefix",
+    )
+    parser.add_argument(
+        "--userbenchmark",
+        required=True,
+        help="Name of the userbenchmark.",
+    )
+    parser.add_argument(
+        "--upload-path",
+        required=True,
+        help="Local directory contains files to upload.",
+    )
+    parser.add_argument(
+        "--match-filename",
+        required=True,
+        help="Filename regex matched to upload.",
+    )
+    parser.add_argument(
+        "--dryrun",
+        action="store_true",
+        help="Dryrun the upload",
+    )
+    args = parser.parse_args()
+
+    files_to_upload = _get_files_to_upload(args.upload_path, args.match_filename)
+    workflow_run_id = os.environ.get("WORKFLOW_RUN_ID", 0)
+    workflow_run_attempt = os.environ.get("WORKFLOW_RUN_ATTEMPT", 0)
+
+    for file in files_to_upload:
+        file_path = Path(args.upload_path).joinpath(file)
+        upload_s3(s3_object=args.s3_prefix, 
+                  ub_name=args.userbenchmark,
+                  workflow_run_id=workflow_run_id,
+                  workflow_run_attempt=workflow_run_attempt,
+                  file_path=file_path,
+                  dryrun=args.dryrun)
diff --git a/torchbenchmark/util/framework/huggingface/extended_configs.py b/torchbenchmark/util/framework/huggingface/extended_configs.py
index 7fc840e7f5..c149905687 100644
--- a/torchbenchmark/util/framework/huggingface/extended_configs.py
+++ b/torchbenchmark/util/framework/huggingface/extended_configs.py
@@ -1,44 +1,8 @@
 # Extended huggingface model configs from Dynamobench
 import importlib
 import logging
-import os
-from typing import List
 
 import torch
-from torchbenchmark import REPO_PATH
-
-DYNAMOBENCH_PATH = REPO_PATH.joinpath("userbenchmark", "dynamo", "dynamobench")
-# These models contain the models present in huggingface_models_list. It is a
-# combination of models supported by HF Fx parser and some manually supplied
-# models. For these models, we already know the largest batch size that can fit
-# on A100 GPUs - 40 GB.
-BATCH_SIZE_KNOWN_MODELS = dict()
-
-# Get the list of models and their batch sizes
-# Only load the extended models in OSS
-if hasattr(torch.version, "git_version"):
-    MODELS_FILENAME = os.path.join(DYNAMOBENCH_PATH, "huggingface_models_list.txt")
-else:
-    from libfb.py import parutil
-    MODELS_FILENAME = parutil.get_file_path("caffe2/benchmarks/dynamo/huggingface_models_list.txt")
-assert os.path.exists(MODELS_FILENAME)
-with open(MODELS_FILENAME, "r") as fh:
-    lines = fh.readlines()
-    lines = [line.rstrip() for line in lines]
-    for line in lines:
-        model_name, batch_size = line.split(",")
-        batch_size = int(batch_size)
-        BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size
-assert len(BATCH_SIZE_KNOWN_MODELS)
-
-
-def is_extended_huggingface_models(model_name: str) -> bool:
-    return model_name in BATCH_SIZE_KNOWN_MODELS
-
-
-def list_extended_huggingface_models() -> List[str]:
-    return list(BATCH_SIZE_KNOWN_MODELS.keys())
-
 
 imports = [
     "AlbertForPreTraining",
@@ -161,61 +125,6 @@ def list_extended_huggingface_models() -> List[str]:
     "tinynet_a",
 }
 
-# TODO - Fails even after fake tensors
-BATCH_SIZE_DIVISORS = {
-    "AlbertForMaskedLM": 2,
-    "AlbertForQuestionAnswering": 2,
-    "AllenaiLongformerBase": 2,
-    "BartForCausalLM": 2,
-    "BartForConditionalGeneration": 2,
-    "BertForMaskedLM": 2,
-    "BertForQuestionAnswering": 2,
-    "BlenderbotForCausalLM": 8,
-    # "BlenderbotForConditionalGeneration" : 16,
-    "BlenderbotSmallForCausalLM": 4,
-    "BlenderbotSmallForConditionalGeneration": 2,
-    "CamemBert": 2,
-    "DebertaForMaskedLM": 4,
-    "DebertaForQuestionAnswering": 2,
-    "DebertaV2ForMaskedLM": 4,
-    "DebertaV2ForQuestionAnswering": 8,
-    "DistilBertForMaskedLM": 2,
-    "DistilBertForQuestionAnswering": 2,
-    "DistillGPT2": 2,
-    "ElectraForCausalLM": 2,
-    "ElectraForQuestionAnswering": 2,
-    "GPT2ForSequenceClassification": 2,
-    # "GPTJForCausalLM" : 2,
-    # "GPTJForQuestionAnswering" : 2,
-    # "GPTNeoForCausalLM" : 32,
-    # "GPTNeoForSequenceClassification" : 2,
-    "GoogleFnet": 2,
-    "LayoutLMForMaskedLM": 2,
-    "LayoutLMForSequenceClassification": 2,
-    "M2M100ForConditionalGeneration": 4,
-    "MBartForCausalLM": 2,
-    "MBartForConditionalGeneration": 2,
-    "MT5ForConditionalGeneration": 2,
-    "MegatronBertForCausalLM": 4,
-    "MegatronBertForQuestionAnswering": 2,
-    "MobileBertForMaskedLM": 2,
-    "MobileBertForQuestionAnswering": 2,
-    "OPTForCausalLM": 2,
-    "PLBartForCausalLM": 2,
-    "PLBartForConditionalGeneration": 2,
-    "PegasusForCausalLM": 4,
-    "PegasusForConditionalGeneration": 2,
-    "RobertaForCausalLM": 2,
-    "RobertaForQuestionAnswering": 2,
-    "Speech2Text2ForCausalLM": 4,
-    "T5ForConditionalGeneration": 2,
-    "T5Small": 2,
-    "TrOCRForCausalLM": 2,
-    "XGLMForCausalLM": 4,
-    "XLNetLMHeadModel": 2,
-    "YituTechConvBert": 2,
-}
-
 try:
     EXTRA_MODELS = {
         "AllenaiLongformerBase": (
diff --git a/torchbenchmark/util/framework/huggingface/list_extended_configs.py b/torchbenchmark/util/framework/huggingface/list_extended_configs.py
new file mode 100644
index 0000000000..67b6241b85
--- /dev/null
+++ b/torchbenchmark/util/framework/huggingface/list_extended_configs.py
@@ -0,0 +1,91 @@
+import torch
+import os
+from torchbenchmark import REPO_PATH
+
+from typing import List
+
+DYNAMOBENCH_PATH = REPO_PATH.joinpath("userbenchmark", "dynamo", "dynamobench")
+
+# These models contain the models present in huggingface_models_list. It is a
+# combination of models supported by HF Fx parser and some manually supplied
+# models. For these models, we already know the largest batch size that can fit
+# on A100 GPUs - 40 GB.
+BATCH_SIZE_KNOWN_MODELS = dict()
+
+# Get the list of models and their batch sizes
+# Only load the extended models in OSS
+if hasattr(torch.version, "git_version"):
+    MODELS_FILENAME = os.path.join(DYNAMOBENCH_PATH, "huggingface_models_list.txt")
+else:
+    from libfb.py import parutil
+    MODELS_FILENAME = parutil.get_file_path("caffe2/benchmarks/dynamo/huggingface_models_list.txt")
+assert os.path.exists(MODELS_FILENAME)
+with open(MODELS_FILENAME, "r") as fh:
+    lines = fh.readlines()
+    lines = [line.rstrip() for line in lines]
+    for line in lines:
+        model_name, batch_size = line.split(",")
+        batch_size = int(batch_size)
+        BATCH_SIZE_KNOWN_MODELS[model_name] = batch_size
+assert len(BATCH_SIZE_KNOWN_MODELS)
+
+def is_extended_huggingface_models(model_name: str) -> bool:
+    return model_name in BATCH_SIZE_KNOWN_MODELS
+
+def list_extended_huggingface_models() -> List[str]:
+    return list(BATCH_SIZE_KNOWN_MODELS.keys())
+
+# TODO - Fails even after fake tensors
+BATCH_SIZE_DIVISORS = {
+    "AlbertForMaskedLM": 2,
+    "AlbertForQuestionAnswering": 2,
+    "AllenaiLongformerBase": 2,
+    "BartForCausalLM": 2,
+    "BartForConditionalGeneration": 2,
+    "BertForMaskedLM": 2,
+    "BertForQuestionAnswering": 2,
+    "BlenderbotForCausalLM": 8,
+    # "BlenderbotForConditionalGeneration" : 16,
+    "BlenderbotSmallForCausalLM": 4,
+    "BlenderbotSmallForConditionalGeneration": 2,
+    "CamemBert": 2,
+    "DebertaForMaskedLM": 4,
+    "DebertaForQuestionAnswering": 2,
+    "DebertaV2ForMaskedLM": 4,
+    "DebertaV2ForQuestionAnswering": 8,
+    "DistilBertForMaskedLM": 2,
+    "DistilBertForQuestionAnswering": 2,
+    "DistillGPT2": 2,
+    "ElectraForCausalLM": 2,
+    "ElectraForQuestionAnswering": 2,
+    "GPT2ForSequenceClassification": 2,
+    # "GPTJForCausalLM" : 2,
+    # "GPTJForQuestionAnswering" : 2,
+    # "GPTNeoForCausalLM" : 32,
+    # "GPTNeoForSequenceClassification" : 2,
+    "GoogleFnet": 2,
+    "LayoutLMForMaskedLM": 2,
+    "LayoutLMForSequenceClassification": 2,
+    "M2M100ForConditionalGeneration": 4,
+    "MBartForCausalLM": 2,
+    "MBartForConditionalGeneration": 2,
+    "MT5ForConditionalGeneration": 2,
+    "MegatronBertForCausalLM": 4,
+    "MegatronBertForQuestionAnswering": 2,
+    "MobileBertForMaskedLM": 2,
+    "MobileBertForQuestionAnswering": 2,
+    "OPTForCausalLM": 2,
+    "PLBartForCausalLM": 2,
+    "PLBartForConditionalGeneration": 2,
+    "PegasusForCausalLM": 4,
+    "PegasusForConditionalGeneration": 2,
+    "RobertaForCausalLM": 2,
+    "RobertaForQuestionAnswering": 2,
+    "Speech2Text2ForCausalLM": 4,
+    "T5ForConditionalGeneration": 2,
+    "T5Small": 2,
+    "TrOCRForCausalLM": 2,
+    "XGLMForCausalLM": 4,
+    "XLNetLMHeadModel": 2,
+    "YituTechConvBert": 2,
+}
\ No newline at end of file
diff --git a/torchbenchmark/util/framework/huggingface/model_factory.py b/torchbenchmark/util/framework/huggingface/model_factory.py
index 4c9f7df688..bf49fc4c14 100644
--- a/torchbenchmark/util/framework/huggingface/model_factory.py
+++ b/torchbenchmark/util/framework/huggingface/model_factory.py
@@ -12,7 +12,7 @@
 from transformers import GenerationConfig
 
 from .basic_configs import is_basic_huggingface_models
-from .extended_configs import (
+from .list_extended_configs import (
     BATCH_SIZE_DIVISORS,
     BATCH_SIZE_KNOWN_MODELS,
     is_extended_huggingface_models,
diff --git a/userbenchmark/dynamo/dynamobench/common.py b/userbenchmark/dynamo/dynamobench/common.py
index 466e6b30d0..9d4401aa59 100644
--- a/userbenchmark/dynamo/dynamobench/common.py
+++ b/userbenchmark/dynamo/dynamobench/common.py
@@ -3974,9 +3974,12 @@ def run(runner, args, original_dir=None):
             assert "cuda" in args.devices, "Quantization requires CUDA device."
             assert args.bfloat16, "Quantization requires dtype bfloat16."
             try:
-                from .torchao_backend import setup_baseline, torchao_optimize_ctx
-            except ImportError:
                 from torchao_backend import setup_baseline, torchao_optimize_ctx
+            except ImportError:
+                from userbenchmark.dynamo.dynamobench.torchao_backend import (
+                    setup_baseline,
+                    torchao_optimize_ctx,
+                )
 
             setup_baseline()
             baseline_ctx = functools.partial(
diff --git a/userbenchmark/dynamo/run.py b/userbenchmark/dynamo/run.py
index 558d5d6fe5..a58adeb0ee 100644
--- a/userbenchmark/dynamo/run.py
+++ b/userbenchmark/dynamo/run.py
@@ -1,9 +1,8 @@
-import logging
-import warnings
+import re
 import sys
 
 from torchbenchmark import add_path, REPO_PATH
-from torchbenchmark.util.framework.huggingface.extended_configs import (
+from torchbenchmark.util.framework.huggingface.list_extended_configs import (
     list_extended_huggingface_models,
 )
 from torchbenchmark.util.framework.timm.extended_configs import (
@@ -15,6 +14,16 @@
 from typing import List, Optional
 
 def _get_model_set_by_model_name(args: List[str]) -> str:
+    def _get_only_arg(args):
+        if "--only" in args:
+            only_index = args.index("--only")
+            return args[only_index + 1]
+        only_reg = "--only=(.*)"
+        only_args = [o for o in args if re.match(only_reg, o)]
+        if only_args:
+            only_model = re.match(only_reg, only_args[0]).groups()[0]
+            return only_model
+        return None
     if "--huggingface" in args:
         args.remove("--huggingface")
         return "huggingface"
@@ -24,12 +33,10 @@ def _get_model_set_by_model_name(args: List[str]) -> str:
     if "--torchbench" in args:
         args.remove("--torchbench")
         return "torchbench"
-    if "--only" in args:
-        only_index = args.index("--only")
-        model_name = args[only_index + 1]
-        if model_name in list_extended_huggingface_models():
+    if only_model := _get_only_arg(args):
+        if only_model in list_extended_huggingface_models():
             return "huggingface"
-        if model_name in list_extended_timm_models():
+        if only_model in list_extended_timm_models():
             return "timm"
     return "torchbench"
 
@@ -73,15 +80,27 @@ def _run_torchbench(args: List[str]) -> None:
     main(TorchBenchmarkRunner(), original_dir, args)
 
 
+class PT2SysArgvManager:
+
+    def __init__(self, args):
+        self.args = args
+
+    def __enter__(self):
+        self.original_sys_argv = sys.argv
+        sys.argv = ["run_benchmark.py", "dynamo"]
+        sys.argv.extend(self.args.copy())
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        sys.argv = self.original_sys_argv
+
 def run(args: Optional[List[str]]=None):
     if args is None:
         args = sys.argv[1:]
     model_set = _get_model_set_by_model_name(args)
-    logging.basicConfig(level=logging.WARNING)
-    warnings.filterwarnings("ignore")
-    if model_set == "huggingface":
-        _run_huggingface(args)
-    elif model_set == "timm":
-        _run_timm(args)
-    else:
-        _run_torchbench(args)
+    with PT2SysArgvManager(args):
+        if model_set == "huggingface":
+            _run_huggingface(args)
+        elif model_set == "timm":
+            _run_timm(args)
+        else:
+            _run_torchbench(args)
diff --git a/userbenchmark/torchao/__init__.py b/userbenchmark/torchao/__init__.py
new file mode 100644
index 0000000000..5f04e41781
--- /dev/null
+++ b/userbenchmark/torchao/__init__.py
@@ -0,0 +1 @@
+BM_NAME = "torchao"
\ No newline at end of file
diff --git a/userbenchmark/torchao/install.py b/userbenchmark/torchao/install.py
new file mode 100644
index 0000000000..9d491f6322
--- /dev/null
+++ b/userbenchmark/torchao/install.py
@@ -0,0 +1,13 @@
+import os
+import subprocess
+
+def install_torchao():
+    # Set ARCH list so that we can build fp16 with SM75+, the logic is copied from
+    # pytorch/builder
+    # https://github.com/pytorch/ao/blob/main/packaging/env_var_script_linux.sh#L16C1-L19
+    torchao_env = os.environ
+    torchao_env["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
+    subprocess.check_call(["pip", "install", "--pre", "git+https://github.com/pytorch/ao.git"], env=torchao_env)
+
+if __name__ == "__main__":
+    install_torchao()
\ No newline at end of file
diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py
new file mode 100644
index 0000000000..ad55fb8afc
--- /dev/null
+++ b/userbenchmark/torchao/run.py
@@ -0,0 +1,53 @@
+import argparse
+
+from userbenchmark.utils import get_output_dir
+from typing import List
+
+from . import BM_NAME
+from .upload import post_ci_process
+OUTPUT_DIR = get_output_dir(BM_NAME)
+OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
+
+CI_ARGS = [
+    # TIMM
+    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
+    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "noquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_noquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
+    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
+    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8dynamic", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8dynamic_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
+    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
+    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "int8weightonly", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_int8weightonly_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
+    ["--progress", "--timm", "--performance", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_performance.csv').resolve())}"],
+    ["--progress", "--timm", "--accuracy", "--inference", "--bfloat16", "--quantization", "autoquant", "--output", f"{str(OUTPUT_DIR.joinpath('torchao_autoquant_timm_models_bfloat16_inference_cuda_accuracy.csv').resolve())}"],
+]
+
+
+def _get_output(pt2_args):
+    if "--output" in pt2_args:
+        output_index = pt2_args.index("--output")
+        return pt2_args[output_index + 1]
+    return "not_available"
+
+
+
+def _run_pt2_args(pt2_args: List[str]) -> str:
+    from userbenchmark.dynamo.run import run as run_pt2_benchmark
+    print(f"=================== [TORCHAO] Running PT2 Benchmark Runner with Args: {pt2_args} ===================")
+    run_pt2_benchmark(pt2_args)
+    return _get_output(pt2_args)
+
+def run(args: List[str]):
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--ci", action="store_true", help="Run the CI workflow")
+    parser.add_argument("--dashboard", action="store_true", help="Update the output files to prepare the S3 upload and dashboard.")
+    args, pt2_args = parser.parse_known_args(args)
+
+    if args.ci:
+        group_pt2_args = CI_ARGS
+    else:
+        group_pt2_args = [pt2_args]
+    
+    output_files = [_run_pt2_args(pt2_args) for pt2_args in group_pt2_args]
+    # Post-processing
+    if args.dashboard:
+        post_ci_process(output_files)
+    print("\n".join(output_files))
diff --git a/userbenchmark/torchao/upload.py b/userbenchmark/torchao/upload.py
new file mode 100644
index 0000000000..53dce17481
--- /dev/null
+++ b/userbenchmark/torchao/upload.py
@@ -0,0 +1,71 @@
+import argparse
+import os
+import csv
+import subprocess
+from pathlib import Path
+from typing import List
+
+def _get_torchao_head_sha():
+    cmd_args = ["git", "ls-remote", "https://github.com/pytorch/ao.git", "HEAD"]
+    sha = subprocess.check_output(cmd_args).decode().split("\t")[0]
+    return sha
+
+def _get_model_set(filename: str):
+    if "timm_models" in filename:
+        return "timm"
+    if "huggingface" in filename:
+        return "huggingface"
+    if "torchbench" in filename:
+        return "torchbench"
+    raise RuntimeError(f"Unknown model set from filename: {filename}")
+
+def post_ci_process(output_files: List[str]):
+    for path in output_files:
+        perf_stats = []
+        path = Path(path).absolute()
+        modelset = _get_model_set(path.name)
+        test_name = f"torchao_{modelset}_perf"
+        runner = "gcp_a100"
+        job_id = 0
+        workflow_run_id = os.environ.get("WORKFLOW_RUN_ID", 0)
+        workflow_run_attempt = os.environ.get("WORKFLOW_RUN_ATTEMPT", 0)
+        filename = os.path.splitext(os.path.basename(path))[0]
+        head_repo = "pytorch/ao"
+        head_branch = "main"
+        head_sha = _get_torchao_head_sha()
+        print(f"Processing file {path} ")
+        with open(path) as csvfile:
+            reader = csv.DictReader(csvfile, delimiter=",")
+
+            for row in reader:
+                row.update(
+                    {
+                        "workflow_id": workflow_run_id,  # type: ignore[dict-item]
+                        "run_attempt": workflow_run_attempt,  # type: ignore[dict-item]
+                        "test_name": test_name,
+                        "runner": runner,
+                        "job_id": job_id,
+                        "filename": filename,
+                        "head_repo": head_repo,
+                        "head_branch": head_branch,
+                        "head_sha": head_sha,
+                    }
+                )
+                perf_stats.append(row)
+
+        # Write the decorated CSV file
+        with open(path, "w") as csvfile:
+            writer = csv.DictWriter(csvfile, fieldnames=None)
+
+            for i, row in enumerate(perf_stats):
+                if i == 0:
+                    writer.fieldnames = row.keys()
+                    writer.writeheader()
+                writer.writerow(row)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--test-file", type=str, help="Add file to test.")
+    args = parser.parse_args()
+    post_ci_process([args.test_file])