Skip to content

Commit

Permalink
one file
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed May 11, 2024
1 parent cf7fe8d commit 241dc66
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 36 deletions.
11 changes: 1 addition & 10 deletions .github/workflows/update_llm_perf_leaderboard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,15 @@ on:
branches:
- gather-llm-perf-benchmarks
schedule:
- cron: "0 0 * * *"
- cron: "0 */6 * * *"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
update_llm_perf_leaderboard:
strategy:
fail-fast: false
matrix:
subset: [unquantized, bnb, awq, gptq]
machine: [1xA10]

runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v3
Expand All @@ -39,8 +32,6 @@ jobs:
- name: Update Open LLM Leaderboard
env:
SUBSET: ${{ matrix.subset }}
MACHINE: ${{ matrix.machine }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_HUB_ENABLE_HF_TRANSFER: 1
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/update_open_llm_leaderboard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Update Open LLM Leaderboard
on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
- cron: "0 */6 * * *"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand Down
40 changes: 15 additions & 25 deletions llm_perf/update_llm_perf_leaderboard.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from glob import glob
from tempfile import TemporaryDirectory

Expand All @@ -8,40 +7,31 @@

from optimum_benchmark import Benchmark

SUBSET = os.getenv("SUBSET")
MACHINE = os.getenv("MACHINE")

PULL_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
def gather_benchmarks(subset: str, machine: str):
pull_repo_id = f"optimum-benchmark/llm-perf-pytorch-cuda-{subset}-{machine}"

print("Pulling benchmark data from", PULL_REPO_ID)
snapshot = snapshot_download(
repo_id=PULL_REPO_ID,
repo_type="dataset",
allow_patterns=["**/benchmark.json"],
)
snapshot = snapshot_download(repo_type="dataset", repo_id=pull_repo_id, allow_patterns=["**/benchmark.json"])


def gather_benchmarks():
dfs = []

for file in tqdm(glob(f"{snapshot}/**/benchmark.json", recursive=True)):
dfs.append(Benchmark.from_json(file).to_dataframe())

benchmarks = pd.concat(dfs, ignore_index=True)
return benchmarks


benchmarks = gather_benchmarks()
tmp_dir = TemporaryDirectory()
push_repo_id = "optimum-benchmark/llm-perf-leaderboard"
file_name = f"llm-perf-leaderboard-{subset}-{machine}.csv"

with TemporaryDirectory() as tmp_dir:
PUSH_REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
FILE_NAME = f"llm-perf-leaderboard-{SUBSET}-{MACHINE}.csv"
benchmarks.to_csv(f"{tmp_dir}/{file_name}", index=False)

benchmarks.to_csv(f"{tmp_dir}/{FILE_NAME}", index=False)

create_repo(repo_id=PUSH_REPO_ID, repo_type="dataset", private=True, exist_ok=True)
create_repo(repo_id=push_repo_id, repo_type="dataset", private=True, exist_ok=True)
upload_file(
path_or_fileobj=f"{tmp_dir}/{FILE_NAME}",
path_in_repo=FILE_NAME,
repo_id=PUSH_REPO_ID,
repo_type="dataset",
path_or_fileobj=f"{tmp_dir}/{file_name}", path_in_repo=file_name, repo_id=push_repo_id, repo_type="dataset"
)


for subset in ["unquantized", "bnb", "awq", "gptq"]:
for machine in ["1xA10"]:
gather_benchmarks(subset, machine)

0 comments on commit 241dc66

Please sign in to comment.