Skip to content

GH Task Runner (Large Suite 1) #4

GH Task Runner (Large Suite 1)

GH Task Runner (Large Suite 1) #4

name: GH Task Runner (Large Suite 1)
on:
workflow_dispatch:
inputs:
approval_notice:
description: 'WARNING: This will spin up a large number of tasks - get approval from admin before running'
required: false
default: 'NOT_APPROVED'
type: choice
options:
- NOT_APPROVED
- APPROVED
model_hf_repo:
description: 'Model Hugging Face Repository'
required: true
default: 'RWKV/rwkv-5-world-1b5'
model_args:
description: 'Model Arguments'
required: false
default: 'dtype="float32",trust_remote_code=True'
batch_size:
description: 'Batch Size'
required: true
default: 'auto'
backend:
description: 'Backend to use'
required: true
default: 'nvidia-gpu'
type: choice
options:
- nvidia-gpu
- intel-gpu
- amd-gpu
- any-gpu
gpu_vram:
description: 'Minimum GPU VRAM (ignored for MPS)'
required: true
default: '24'
type: choice
options:
- 16
- 24
- 40
- 48
- 80
num_fewshot:
description: 'num_fewshot setting (ignored if < 0)'
required: true
default: -1
env:
# Get the final task
RUN_TASK: ${{ github.event.inputs.custom_task || github.event.inputs.run_task }}
# HF repo to sync to
HF_REPO_SYNC: rwkv-x-dev/lm-eval-output
# Model HF repo
MODEL_HF_REPO: ${{ github.event.inputs.model_hf_repo }}
# Secrets
HUGGING_FACE_HUB_TOKEN: ${{secrets.HUGGING_FACE_HUB_TOKEN}}
jobs:
gh-task-runner-large-suite-1:
# Check for approval notice
if: ${{ github.event.inputs.approval_notice == 'APPROVED' }}
# Strategy Matrix
strategy:
# Disable fail-fast behavior
fail-fast: false
matrix:
# NOTE: There is a matrix limit of 256 on github
run_task:
- anli
- arc_easy
- arc_challenge
- ai2_arc
- anagrams*
- anli_*
- advanced_ai_risk
- advanced_ai_risk_fewshot-*
- advanced_ai_risk_human-*
- advanced_ai_risk_lm-*
- arithmetic
- arithmetic_*
- asdiv
- babi
- bbh
- bbh_cot_fewshot
- bbh_cot_fewshot_*
- bbh_cot_zeroshot
- bbh_cot_zeroshot_*
- bbh_fewshot
- bbh_fewshot_*
- bbh_zeroshot
- bbh_zeroshot_*
- belebele
- belebele_*
- bigbench_*
- blimp
- blimp_*
- boolq
- boolq-seq2seq
- cb
- ceval-valid
- ceval-valid_*
- chain_of_thought
- cmmlu
- cmmlu_*
- code2text_*
- codexglue_code2text
- cola
- copa
- coqa
- crows_pairs
- crows_pairs_*
- csatqa
- csatqa_*
- cycle_letters
- drop
- ethics_*
- flan_held_*
- fld
- fld_*
- freebase
- generate_until
- glue
- gpt3_translation_benchmarks
- gsm8k
- gsm8k_cot
- gsm8k_cot_self_consistency
- headqa
- headqa_en
- headqa_es
- hellaswag
- hellaswag_*
- hendrycks_ethics
- ifeval
- iwslt2017
- iwslt2017-*
- kmmlu
- kmmlu_*
- kobest
- kobest_*
- lambada
- lambada_*
- logieval
- logiqa
- logiqa2
- loglikelihood
- math_word_problems
- mathqa
- mc_taco
- medmcqa
- medqa_4options
- mgsm_*
- minerva_math
- minerva_math_*
- mmlu
- mmlu_*
- mnli
- mnli_mismatch
- mrpc
- multimedqa
- multiple_choice
- multirc
- mutual
- mutual_plus
- nq_open
- openbookqa
- paws_*
- pawsx
- persona
- persona_*
- pile
- pile_*
- piqa
- polemo2
- polemo2_*
- prost
- pubmedqa
- pythia
- qa4mre
- qa4mre_*
- qasper
- qasper_*
- qnli
- qqp
- race
- random_insertion
- realtoxicityprompts
- record
- reversed_words
- rte
- sciq
- scrolls
- self_consistency
- sglue_rte
- social_bias
- social_iqa
- squadv2
- sst2
- storycloze
- storycloze_*
- super-glue-*
- swag
- sycophancy
- sycophancy_on_*
- t0_eval
- toxigen
- translation
- triviaqa
- truthfulqa
- truthfulqa_*
- unscramble
- webqs
- wic
- wikitext
- winogrande
- wmt-ro-en-t5-prompt
- wmt-t5-prompt
- wmt14
- wmt14-*
- wmt16
- wmt16-*
- wnli
- wsc
- wsc273
- xcopa
- xcopa_*
- xnli
- xnli_*
- xstorycloze
- xstorycloze_*
- xwinograd
- xwinograd_*
# Name of the job
name: "[${{ matrix.run_task }}] ${{ github.event.inputs.model_hf_repo }} - ${{ github.event.inputs.model_args }}"
# Due to github worker hard limitation, of 24 hours
# we apply a timeout of 23 hours instead.
timeout-minutes: 1380
# Select the type of runner that the job will run on
runs-on:
- ${{ github.event.inputs.backend }}
- gpu-vram-${{ github.event.inputs.gpu_vram }}
# Actual task setup, and run steps
steps:
- name: Checkout repository
uses: actions/checkout@v3
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies / setup project
run: |
# Basic dependencies install, and output setup
mkdir -p ./output
python -m pip install .
python -m pip install -e .
# Setup HF cache
chmod +x ./gh-task-runner/*.sh
./gh-task-runner/hf-cache-setup.sh
- name: Run Task
run: |
# Run it
echo "# ------------------------------"
echo "# Running Task ...."
echo "# ------------------------------"
# Get the final task to run
task_to_run=${{ matrix.run_task }}
# Check if the few shot setting is larger or euqal to 0
if [ ${{ github.event.inputs.num_fewshot }} -ge 0 ]; then
# Fail on pipe error
set -o pipefail
# Run it
accelerate launch -m lm_eval --model hf \
--model_args pretrained=${{ github.event.inputs.model_hf_repo }},${{ github.event.inputs.model_args }} \
--tasks $task_to_run \
--batch_size ${{ github.event.inputs.batch_size }} \
--device mps \
--num_fewshot ${{ github.event.inputs.num_fewshot }} \
--log_samples --output_path ./output 2>&1 | tee -a ./output/taskrun.log
else
# Fail on pipe error
set -o pipefail
# Run it
accelerate launch -m lm_eval --model hf \
--model_args pretrained=${{ github.event.inputs.model_hf_repo }},${{ github.event.inputs.model_args }} \
--tasks $task_to_run \
--batch_size ${{ github.event.inputs.batch_size }} \
--device mps \
--log_samples --output_path ./output 2>&1 | tee -a ./output/taskrun.log
fi
- name: Upload outputs to HF
if: always()
run: |
CLEANED_TASK=$(echo "${{ matrix.run_task }}" | sed 's/\*/_/g')
HF_SUBDIR_PATH="${{ env.MODEL_HF_REPO }}/$CLEANED_TASK/${{ github.event.inputs.model_args }}-num_fewshot=${{ github.event.inputs.num_fewshot }}/${{ github.event.inputs.backend }}/"
./gh-task-runner/hf-upload-runner.sh "${{ env.HF_REPO_SYNC }}" "$HF_SUBDIR_PATH" "./output"
# Note that this is meant to be a contigency measure, in case the HF upload failed
- name: Save output Files
uses: actions/upload-artifact@v3
# if: failure()
if: always()
with:
name: output-files
path: |
output/*
retention-days: 365