.github/workflows/gh-task-runner-Error-Suite.yml

name: GH Task Runner (Error Suite)
on:
  workflow_dispatch:
    inputs:
      approval_notice:
        description: 'WARNING: This will spin up a large number of tasks - get approval from admin before running'
        required: false
        default: 'NOT_APPROVED'
        type: choice
        options:
        - NOT_APPROVED
        - APPROVED
      model_hf_repo:
        description: 'Model Hugging Face Repository'
        required: true
        default: 'RWKV/rwkv-5-world-1b5'
      model_args:
        description: 'Model Arguments (ie: dtype="float16")'
        required: false
        default: 'dtype=bfloat16,trust_remote_code=True'
      batch_size:
        description: 'Batch Size'
        required: true
        default: 'auto'
      # backend:
      #   description: 'Backend to use'
      #   required: true
      #   default: 'nvidia-gpu'
      #   type: choice
      #   options:
      #   - nvidia-gpu
      #   - intel-gpu
      #   - amd-gpu
      #   - any-gpu
      gpu_vram:
        description: 'Minimum GPU VRAM (ignored for MPS)'
        required: true
        default: '24'
        type: choice
        options:
        - 16
        - 24
        # - 40
        - 48
        - 80
      rwkv5_file_url:
        description: 'Model file URL (for rwkv5 .pth eval)'
        default: ''
        required: false
      rwkv5_test_name:
        description: 'Model dev test name (for test)'
        default: 'TEST_MODEL_FILE'
        required: false
      num_fewshot:
        description: 'num_fewshot setting (ignored if < 0)'
        required: true
        default: -1
      upload_output:
        description: 'Upload to HF / B2'
        required: false
        default: true
        type: boolean

env:
  # Get the final task
  RUN_TASK: ${{ github.event.inputs.custom_task || github.event.inputs.run_task }}

  # HF repo to sync to
  HF_REPO_SYNC: rwkv-x-dev/lm-eval-output

  # Model HF repo
  MODEL_HF_REPO: ${{ github.event.inputs.model_hf_repo }}

  # HF / B2 sync settings
  HUGGING_FACE_HUB_TOKEN: ${{secrets.HUGGING_FACE_HUB_TOKEN}}
  B2_APPLICATION_KEY_ID: ${{secrets.B2_APPLICATION_KEY_ID}}
  B2_APPLICATION_KEY: ${{secrets.B2_APPLICATION_KEY}}
  B2_PATH_LM_EVAL_OUTPUT: ${{vars.B2_PATH_LM_EVAL_OUTPUT}}

jobs:
  gh-task-runner:

    # Check for approval notice
    if: ${{ github.event.inputs.approval_notice == 'APPROVED' }}

    # Strategy Matrix
    strategy:
      # Disable fail-fast behavior
      fail-fast: false 
      matrix:

        # NOTE: There is a matrix limit of 256 on github
        run_task:
          ### Known Problematic tasks (various error reasons - file missing, temp=0, etc)
          ### ----------------------------------------------------------------------------
          ## ----
          ## Need to double check (removed before i started organizing by failure reasons)
          ## ----
          - bbh
          - bbh_cot_zeroshot
          # - bbh_cot_zeroshot_*
          - bbh_fewshot
          # - bbh_fewshot_*
          - bbh_zeroshot
          # - bbh_zeroshot_*
          - bigbench_*
          ## ----
          ## temp=0 issues
          ## ----
          - anagrams*
          - babi
          - code2text_*
          - codexglue_code2text
          - coqa
          - cycle_letters
          - drop
          - random_insertion
          - unscramble
          - super-glue-*
          - squadv2
          - scrolls
          - reversed_words
          - qasper
          # - qasper_*
          ## ----
          ## Does not exists / file 404s / broken links
          ## ----
          - csatqa
          - csatqa_*
          - belebele
          # - belebele_*
          - generate_until
          - polemo2
          # - polemo2_*
          - pile
          # - pile_*
          ## ----
          ## Wierd package dependencies
          ## ----
          #### ifeval requires `pip install langdetect immutabledict nltk` (not documented)
          # However even after all that, it gives an "missing index error"
          - ifeval
          #### minerva_math: antlr4 (not on pip?)
          - minerva_math
          #### realtoxicityprompts: requires PERSPECTIVE_API_KEY
          - realtoxicityprompts
          # - minerva_math_*
          ## ----
          ## Requires hugging face login
          ## ----
          - toxigen
          ## ----
          ## Wierd errors (need to reinvestigate)
          ## ----
          - gpt3_translation_benchmarks
          - headqa
          # - headqa_en
          # - headqa_es
          - iwslt2017
          # - iwslt2017-*
          - wmt-ro-en-t5-prompt
          - wmt-t5-prompt
          - t0_eval
          - storycloze
          # - storycloze_*
          - self_consistency

    # Name of the job
    name: "[${{ matrix.run_task }}] ${{ github.event.inputs.rwkv5_file_url && github.event.inputs.rwkv5_test_name || github.event.inputs.model_hf_repo }} - ${{ github.event.inputs.model_args }}"

    # Due to github worker hard limitation, of 24 hours
    # we apply a timeout of 23 hours instead.
    timeout-minutes: 1380

    # Select the type of runner that the job will run on
    runs-on: 
      - nvidia-gpu
      - gpu-vram-${{ github.event.inputs.gpu_vram }}
      # - gpu-count-8

    # Actual task setup, and run steps
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3

      - name: Run the task
        uses: ./.github/actions/gh-task-runner-composite
        with:
          run_task: ${{ matrix.run_task }}
          num_fewshot: ${{ github.event.inputs.num_fewshot }}
          model_hf_repo: ${{ github.event.inputs.model_hf_repo }}
          model_args: ${{ github.event.inputs.model_args }}
          batch_size: ${{ github.event.inputs.batch_size }}
          rwkv5_file_url: ${{ github.event.inputs.rwkv5_file_url }}
          rwkv5_test_name: ${{ github.event.inputs.rwkv5_test_name }}
          upload_output: ${{ github.event.inputs.upload_output }}

  # upload_output:
  #   name: "Upload to HF / B2"
  #   needs: gh-task-runner
  #   runs-on: ubuntu-latest
  #   if: ${{ github.event.inputs.upload_output }}
  #   steps:
  #     - name: Checkout repository
  #       uses: actions/checkout@v3
  #     - name: Run the task
  #       uses: ./.github/actions/gh-upload-output
  #       with:
  #         num_fewshot: ${{ github.event.inputs.num_fewshot }}
  #         model_hf_repo: ${{ github.event.inputs.model_hf_repo }}
  #         model_args: ${{ github.event.inputs.model_args }}
  #         batch_size: ${{ github.event.inputs.batch_size }}
  #         backend: nvidia-gpu