one file

huggingface · May 11, 2024 · 241dc66 · 241dc66
1 parent cf7fe8d
commit 241dc66
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 36 deletions.
diff --git a/.github/workflows/update_llm_perf_leaderboard.yaml b/.github/workflows/update_llm_perf_leaderboard.yaml
@@ -6,22 +6,15 @@ on:
     branches:
       - gather-llm-perf-benchmarks
   schedule:
-    - cron: "0 0 * * *"
+    - cron: "0 */6 * * *"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
 jobs:
   update_llm_perf_leaderboard:
-    strategy:
-      fail-fast: false
-      matrix:
-        subset: [unquantized, bnb, awq, gptq]
-        machine: [1xA10]
-
     runs-on: ubuntu-latest
-
     steps:
       - name: Checkout
         uses: actions/checkout@v3
@@ -39,8 +32,6 @@ jobs:
 
       - name: Update Open LLM Leaderboard
         env:
-          SUBSET: ${{ matrix.subset }}
-          MACHINE: ${{ matrix.machine }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_HUB_ENABLE_HF_TRANSFER: 1
         run: |

diff --git a/.github/workflows/update_open_llm_leaderboard.yaml b/.github/workflows/update_open_llm_leaderboard.yaml
@@ -3,7 +3,7 @@ name: Update Open LLM Leaderboard
 on:
   workflow_dispatch:
   schedule:
-    - cron: "0 0 * * *"
+    - cron: "0 */6 * * *"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}

diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py
@@ -1,4 +1,3 @@
-import os
 from glob import glob
 from tempfile import TemporaryDirectory
 
@@ -8,40 +7,31 @@
 
 from optimum_benchmark import Benchmark
 
-SUBSET = os.getenv("SUBSET")
-MACHINE = os.getenv("MACHINE")
 
-PULL_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
+def gather_benchmarks(subset: str, machine: str):
+    pull_repo_id = f"optimum-benchmark/llm-perf-pytorch-cuda-{subset}-{machine}"
 
-print("Pulling benchmark data from", PULL_REPO_ID)
-snapshot = snapshot_download(
-    repo_id=PULL_REPO_ID,
-    repo_type="dataset",
-    allow_patterns=["**/benchmark.json"],
-)
+    snapshot = snapshot_download(repo_type="dataset", repo_id=pull_repo_id, allow_patterns=["**/benchmark.json"])
 
-
-def gather_benchmarks():
     dfs = []
+
     for file in tqdm(glob(f"{snapshot}/**/benchmark.json", recursive=True)):
         dfs.append(Benchmark.from_json(file).to_dataframe())
 
     benchmarks = pd.concat(dfs, ignore_index=True)
-    return benchmarks
-
 
-benchmarks = gather_benchmarks()
+    tmp_dir = TemporaryDirectory()
+    push_repo_id = "optimum-benchmark/llm-perf-leaderboard"
+    file_name = f"llm-perf-leaderboard-{subset}-{machine}.csv"
 
-with TemporaryDirectory() as tmp_dir:
-    PUSH_REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
-    FILE_NAME = f"llm-perf-leaderboard-{SUBSET}-{MACHINE}.csv"
+    benchmarks.to_csv(f"{tmp_dir}/{file_name}", index=False)
 
-    benchmarks.to_csv(f"{tmp_dir}/{FILE_NAME}", index=False)
-
-    create_repo(repo_id=PUSH_REPO_ID, repo_type="dataset", private=True, exist_ok=True)
+    create_repo(repo_id=push_repo_id, repo_type="dataset", private=True, exist_ok=True)
     upload_file(
-        path_or_fileobj=f"{tmp_dir}/{FILE_NAME}",
-        path_in_repo=FILE_NAME,
-        repo_id=PUSH_REPO_ID,
-        repo_type="dataset",
+        path_or_fileobj=f"{tmp_dir}/{file_name}", path_in_repo=file_name, repo_id=push_repo_id, repo_type="dataset"
     )
+
+
+for subset in ["unquantized", "bnb", "awq", "gptq"]:
+    for machine in ["1xA10"]:
+        gather_benchmarks(subset, machine)