.github/workflows/test-branch.yml

name: Test - Models

on:
  push:
    branches:
      - 'CI-CD/bach'
  workflow_dispatch:
    inputs:
      model_id:
        description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708'
        required: true
        default: homebrewltd/llama3.1-s-instruct-v0.2
        type: string
      dataset_id:
        description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test'
        required: true
        default: jan-hq/instruction-speech-conversation-test
        type: string
      extra_args:
        description: 'Extra arguments for python command, for example:--mode audio --num_rows 5'
        required: false
        default: "--mode audio --num_rows 5"
        type: string
      run_benchmark:
        description: 'Run benchmark test'
        required: false
        default: true
        type: boolean
      run_si_benchmark:
        description: 'Run SI benchmark'
        required: false
        default: true
        type: boolean
      run_asr_benchmark:
        description: 'Run ASR benchmark'
        required: false
        default: true
        type: boolean

jobs:
  run-test:
    runs-on: research
    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: 'recursive'

      - name: Extract Model Name
        id: extract_model_name
        run: |
          MODEL_ID="${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }}"
          MODEL_NAME=$(echo $MODEL_ID | rev | cut -d'/' -f 1 | rev)
          echo "MODEL_NAME=$MODEL_NAME" >> $GITHUB_ENV

      - name: Install dependencies
        working-directory: ./tests
        run: |
          python3 -m pip install --upgrade pip
          pip3 install -r requirements.txt

      - name: Run tests
        working-directory: ./tests
        run: |
          python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }}
      
      - name: Install benchmark dependencies
        if: ${{ github.event.inputs.run_benchmark == 'true' }}
        run: |
          cd lm-evaluation-harness
          pip3 install -e .
          pip3 install lm_eval[vllm]
          echo "$HOME/.local/bin" >> $GITHUB_PATH

      - name: Run benchmark
        if: ${{ github.event.inputs.run_benchmark == 'true' }}
        run: |
          cd lm-evaluation-harness
          chmod +x ./run_benchmark.sh
          ./run_benchmark.sh ${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }}

      - name: Upload benchmark results
        if: ${{ github.event.inputs.run_benchmark == 'true' }}
        uses: actions/upload-artifact@v2
        with:
          name: benchmark-results
          path: ./lm-evaluation-harness/benchmark_results/**/*.json

      - name: Eval on Speech Instruction Benchmark
        if: ${{ github.event.inputs.run_si_benchmark == 'true' }}
        env:
          AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
        run: |
          cd AudioBench
          chmod +x eval_si.sh
          ./eval_si.sh  ${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }}

      - name: Eval on ASR Benchmark
        if: ${{ github.event.inputs.run_asr_benchmark == 'true' }}
        env:
          AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
        run: |
          cd AudioBench
          chmod +x eval_asr.sh
          ./eval_asr.sh  ${{ github.event.inputs.model_id || 'homebrewltd/llama3.1-s-instruct-v0.2' }} 

      - name: Upload audio results
        if: ${{ github.event.inputs.run_benchmark == 'true' }}
        uses: actions/upload-artifact@v2
        with:
          name: audio-benchmark-results
          path: ./AudioBench/benchmark_results/log/${{ env.MODEL_NAME }}/*.json