GH Task Runner (Large Suite 1) #4

Workflow file for this run

.github/workflows/gh-task-runner-LS1.yml at b04ca73

	name: GH Task Runner (Large Suite 1)
	on:
	workflow_dispatch:
	inputs:
	approval_notice:
	description: 'WARNING: This will spin up a large number of tasks - get approval from admin before running'
	required: false
	default: 'NOT_APPROVED'
	type: choice
	options:
	- NOT_APPROVED
	- APPROVED
	model_hf_repo:
	description: 'Model Hugging Face Repository'
	required: true
	default: 'RWKV/rwkv-5-world-1b5'
	model_args:
	description: 'Model Arguments'
	required: false
	default: 'dtype="float32",trust_remote_code=True'
	batch_size:
	description: 'Batch Size'
	required: true
	default: 'auto'
	backend:
	description: 'Backend to use'
	required: true
	default: 'nvidia-gpu'
	type: choice
	options:
	- nvidia-gpu
	- intel-gpu
	- amd-gpu
	- any-gpu
	gpu_vram:
	description: 'Minimum GPU VRAM (ignored for MPS)'
	required: true
	default: '24'
	type: choice
	options:
	- 16
	- 24
	- 40
	- 48
	- 80
	num_fewshot:
	description: 'num_fewshot setting (ignored if < 0)'
	required: true
	default: -1

	env:
	# Get the final task
	RUN_TASK: ${{ github.event.inputs.custom_task \|\| github.event.inputs.run_task }}

	# HF repo to sync to
	HF_REPO_SYNC: rwkv-x-dev/lm-eval-output

	# Model HF repo
	MODEL_HF_REPO: ${{ github.event.inputs.model_hf_repo }}

	# Secrets
	HUGGING_FACE_HUB_TOKEN: ${{secrets.HUGGING_FACE_HUB_TOKEN}}

	jobs:
	gh-task-runner-large-suite-1:

	# Check for approval notice
	if: ${{ github.event.inputs.approval_notice == 'APPROVED' }}

	# Strategy Matrix
	strategy:
	# Disable fail-fast behavior
	fail-fast: false
	matrix:
	# NOTE: There is a matrix limit of 256 on github
	run_task:
	- anli
	- arc_easy
	- arc_challenge
	- ai2_arc
	- anagrams*
	- anli_*
	- advanced_ai_risk
	- advanced_ai_risk_fewshot-*
	- advanced_ai_risk_human-*
	- advanced_ai_risk_lm-*
	- arithmetic
	- arithmetic_*
	- asdiv
	- babi
	- bbh
	- bbh_cot_fewshot
	- bbh_cot_fewshot_*
	- bbh_cot_zeroshot
	- bbh_cot_zeroshot_*
	- bbh_fewshot
	- bbh_fewshot_*
	- bbh_zeroshot
	- bbh_zeroshot_*
	- belebele
	- belebele_*
	- bigbench_*
	- blimp
	- blimp_*
	- boolq
	- boolq-seq2seq
	- cb
	- ceval-valid
	- ceval-valid_*
	- chain_of_thought
	- cmmlu
	- cmmlu_*
	- code2text_*
	- codexglue_code2text
	- cola
	- copa
	- coqa
	- crows_pairs
	- crows_pairs_*
	- csatqa
	- csatqa_*
	- cycle_letters
	- drop
	- ethics_*
	- flan_held_*
	- fld
	- fld_*
	- freebase
	- generate_until
	- glue
	- gpt3_translation_benchmarks
	- gsm8k
	- gsm8k_cot
	- gsm8k_cot_self_consistency
	- headqa
	- headqa_en
	- headqa_es
	- hellaswag
	- hellaswag_*
	- hendrycks_ethics
	- ifeval
	- iwslt2017
	- iwslt2017-*
	- kmmlu
	- kmmlu_*
	- kobest
	- kobest_*
	- lambada
	- lambada_*
	- logieval
	- logiqa
	- logiqa2
	- loglikelihood
	- math_word_problems
	- mathqa
	- mc_taco
	- medmcqa
	- medqa_4options
	- mgsm_*
	- minerva_math
	- minerva_math_*
	- mmlu
	- mmlu_*
	- mnli
	- mnli_mismatch
	- mrpc
	- multimedqa
	- multiple_choice
	- multirc
	- mutual
	- mutual_plus
	- nq_open
	- openbookqa
	- paws_*
	- pawsx
	- persona
	- persona_*
	- pile
	- pile_*
	- piqa
	- polemo2
	- polemo2_*
	- prost
	- pubmedqa
	- pythia
	- qa4mre
	- qa4mre_*
	- qasper
	- qasper_*
	- qnli
	- qqp
	- race
	- random_insertion
	- realtoxicityprompts
	- record
	- reversed_words
	- rte
	- sciq
	- scrolls
	- self_consistency
	- sglue_rte
	- social_bias
	- social_iqa
	- squadv2
	- sst2
	- storycloze
	- storycloze_*
	- super-glue-*
	- swag
	- sycophancy
	- sycophancy_on_*
	- t0_eval
	- toxigen
	- translation
	- triviaqa
	- truthfulqa
	- truthfulqa_*
	- unscramble
	- webqs
	- wic
	- wikitext
	- winogrande
	- wmt-ro-en-t5-prompt
	- wmt-t5-prompt
	- wmt14
	- wmt14-*
	- wmt16
	- wmt16-*
	- wnli
	- wsc
	- wsc273
	- xcopa
	- xcopa_*
	- xnli
	- xnli_*
	- xstorycloze
	- xstorycloze_*
	- xwinograd
	- xwinograd_*

	# Name of the job
	name: "[${{ matrix.run_task }}] ${{ github.event.inputs.model_hf_repo }} - ${{ github.event.inputs.model_args }}"

	# Due to github worker hard limitation, of 24 hours
	# we apply a timeout of 23 hours instead.
	timeout-minutes: 1380

	# Select the type of runner that the job will run on
	runs-on:
	- ${{ github.event.inputs.backend }}
	- gpu-vram-${{ github.event.inputs.gpu_vram }}

	# Actual task setup, and run steps
	steps:
	- name: Checkout repository
	uses: actions/checkout@v3

	- uses: actions/setup-python@v5
	with:
	python-version: '3.11'

	- name: Install dependencies / setup project
	run: \|
	# Basic dependencies install, and output setup
	mkdir -p ./output
	python -m pip install .
	python -m pip install -e .

	# Setup HF cache
	chmod +x ./gh-task-runner/*.sh
	./gh-task-runner/hf-cache-setup.sh

	- name: Run Task
	run: \|
	# Run it
	echo "# ------------------------------"
	echo "# Running Task ...."
	echo "# ------------------------------"

	# Get the final task to run
	task_to_run=${{ matrix.run_task }}

	# Check if the few shot setting is larger or euqal to 0
	if [ ${{ github.event.inputs.num_fewshot }} -ge 0 ]; then
	# Fail on pipe error
	set -o pipefail

	# Run it
	accelerate launch -m lm_eval --model hf \
	--model_args pretrained=${{ github.event.inputs.model_hf_repo }},${{ github.event.inputs.model_args }} \
	--tasks $task_to_run \
	--batch_size ${{ github.event.inputs.batch_size }} \
	--device mps \
	--num_fewshot ${{ github.event.inputs.num_fewshot }} \
	--log_samples --output_path ./output 2>&1 \| tee -a ./output/taskrun.log
	else
	# Fail on pipe error
	set -o pipefail

	# Run it
	accelerate launch -m lm_eval --model hf \
	--model_args pretrained=${{ github.event.inputs.model_hf_repo }},${{ github.event.inputs.model_args }} \
	--tasks $task_to_run \
	--batch_size ${{ github.event.inputs.batch_size }} \
	--device mps \
	--log_samples --output_path ./output 2>&1 \| tee -a ./output/taskrun.log
	fi

	- name: Upload outputs to HF
	if: always()
	run: \|
	CLEANED_TASK=$(echo "${{ matrix.run_task }}" \| sed 's/\*/_/g')
	HF_SUBDIR_PATH="${{ env.MODEL_HF_REPO }}/$CLEANED_TASK/${{ github.event.inputs.model_args }}-num_fewshot=${{ github.event.inputs.num_fewshot }}/${{ github.event.inputs.backend }}/"
	./gh-task-runner/hf-upload-runner.sh "${{ env.HF_REPO_SYNC }}" "$HF_SUBDIR_PATH" "./output"
	# Note that this is meant to be a contigency measure, in case the HF upload failed
	- name: Save output Files
	uses: actions/upload-artifact@v3
	# if: failure()
	if: always()
	with:
	name: output-files
	path: \|
	output/*
	retention-days: 365

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

GH Task Runner (Large Suite 1) #4

Workflow file

GH Task Runner (Large Suite 1) #4

Jobs

Run details

Workflow file for this run