Skip to content

lite/challenge-subset #613

lite/challenge-subset

lite/challenge-subset #613

Workflow file for this run

name: Run the benchmark
on:
workflow_dispatch:
inputs:
llm:
description: "LLM model to use"
type: choice
required: true
default: "gpt-4o-mini"
options:
- gpt-4o
- gpt-4o-mini
- claude3.5
config:
description: "Path to the configuration file in ./bin/solve_config"
required: true
type: string
default: lite/marshmallow
runner:
description: "Runner type"
required: true
default: ubuntu-latest
type: choice
options:
- ubuntu-latest
- swe-bench-ubuntu-latest
- SWE-Bench_Larger
num_runners:
description: "Number of runners to split the workload across"
required: true
default: "1"
name:
description: "Assign a name to the workflow run"
type: string
required: false
pull_request:
types: [opened, synchronize, labeled]
run-name: ${{ inputs.name || inputs.config || github.event.pull_request.title || github.event.workflow.name }}
jobs:
show-inputs:
runs-on: 'ubuntu-latest'
steps:
- name: Display Input Values
run: |
echo "llm: ${{ github.event.inputs.llm }}"
echo "config: ${{ github.event.inputs.config }}"
echo "runner: ${{ github.event.inputs.runner }}"
echo "num_runners: ${{ github.event.inputs.num_runners }}"
echo "name: ${{ github.event.inputs.name }}"
build-appmap-js:
runs-on: 'ubuntu-latest'
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
# Cache the appmap-js build
- name: Cache appmap-js build
uses: actions/cache@v4
id: cache-appmap-js
with:
lookup-only: true
path: |
submodules/appmap-js/node_modules
submodules/appmap-js/packages/*/built
submodules/appmap-js/packages/*/dist
submodules/appmap-js/packages/*/node_modules
key: appmap-js-dist-${{ runner.os }}-${{ hashFiles('.git/modules/submodules/appmap-js/HEAD') }}
- name: Set up Node.js
if: steps.cache-appmap-js.outputs.cache-hit != 'true'
uses: actions/setup-node@v3
- name: Build submodules
if: steps.cache-appmap-js.outputs.cache-hit != 'true'
env:
PUPPETEER_SKIP_DOWNLOAD: true
run: |
cd submodules/appmap-js
git checkout -- .
yarn
yarn build
chmod +x packages/cli/built/cli.js
prepare-matrix:
runs-on: ubuntu-latest
env:
NUM_RUNNERS: ${{ inputs.num_runners }}
outputs:
matrix: ${{ steps.prepare-matrix.outputs.matrix }}
steps:
- name: Prepare matrix
id: prepare-matrix
run: |
num_runners=${NUM_RUNNERS:-1}
echo "Number of runners: $num_runners"
indices=$(seq 0 $(($num_runners - 1)) | jq -R 'tonumber' | jq -s -c)
echo "Matrix: $indices"
echo "matrix=$indices" >> $GITHUB_OUTPUT
solve:
needs:
- build-appmap-js
- prepare-matrix
runs-on: ${{ inputs.runner || 'ubuntu-latest' }}
strategy:
matrix:
index: ${{ fromJson(needs['prepare-matrix'].outputs.matrix) }}
defaults:
run:
shell: bash -leo pipefail {0}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
# Cache the conda environment
- name: Cache conda environment
id: cache-conda
uses: actions/cache@v4
with:
path: /usr/share/miniconda/envs/swe-bench
key: conda-${{ runner.os }}-${{ hashFiles('environment.yml') }}
# Create conda env if cache miss happens
- name: Create conda env
if: steps.cache-conda.outputs.cache-hit != 'true'
run: |
conda init bash
conda env create -f environment.yml
# Restore the appmap-js build
- name: Restore appmap-js build
uses: actions/cache/restore@v4
id: cache-appmap-js
with:
fail-on-cache-miss: true
path: |
submodules/appmap-js/node_modules
submodules/appmap-js/packages/*/built
submodules/appmap-js/packages/*/dist
submodules/appmap-js/packages/*/node_modules
key: appmap-js-dist-${{ runner.os }}-${{ hashFiles('.git/modules/submodules/appmap-js/HEAD') }}
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
texlive \
texlive-xetex \
dvipng \
ghostscript \
libfreetype-dev \
libtiff-dev \
libxrender1
- name: Run benchmark
env:
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
CONFIG: ${{ inputs.config }}
LLM: ${{ inputs.llm }}
NUM_RUNNERS: ${{ inputs.num_runners }}
run: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate swe-bench
export PYTHONPATH=$PYTHONPATH:$(pwd)
llm="${LLM:-gpt-4o-mini}"
config="${CONFIG:-lite/marshmallow}"
./bin/solve $llm \
$config \
--temp_dir "${{ runner.temp }}" \
--path_conda $(conda info --base) \
--num_runners "${NUM_RUNNERS:-1}" \
--runner_index "${{ matrix.index }}"
- name: Run evaluation
env:
CONFIG: ${{ inputs.config }}
run: |
mkdir -p logs
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate swe-bench
export PYTHONPATH=$PYTHONPATH:$(pwd)
config="${CONFIG:-lite/marshmallow}"
tasks=$(awk '/--instances/ { sub(/--instances/, "--swe_bench_tasks"); print }' ./bin/solve_config/$config.txt)
python swebench/harness/run_evaluation.py \
--predictions_path predictions.jsonl \
$tasks \
--log_dir logs \
--testbed "${{ runner.temp }}" \
--skip_existing \
--timeout 900 \
--verbose \
--num_processes 8 \
--path_conda $(conda info --base)
- name: Compress evaluation results
if: ${{ always() }}
run: |
tar -cJf output_${{ matrix.index }}.tar.xz logs predictions.jsonl
- name: Upload evaluation results
uses: actions/upload-artifact@v4
if: ${{ always() }}
with:
name: output_${{ matrix.index }}
path: output_${{ matrix.index }}.tar.xz
compression-level: 0
report:
needs:
- solve
if: needs.solve.result == 'success' || needs.solve.result == 'failure'
runs-on: 'ubuntu-latest'
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Download evaluation results
uses: actions/download-artifact@v4
with:
path: ./eval-results
- name: Unpack evaluation results
run: |
mkdir -p logs
for file in eval-results/**/*.tar.xz; do
tar --to-stdout -xJf "$file" predictions.jsonl >> predictions.jsonl
tar -xJf "$file" logs
done
# Cache the conda environment
- name: Cache conda environment
id: cache-conda
uses: actions/cache@v4
with:
path: /usr/share/miniconda/envs/swe-bench
key: conda-${{ runner.os }}-${{ hashFiles('environment.yml') }}
# Create conda env if cache miss happens
- name: Create conda env
if: steps.cache-conda.outputs.cache-hit != 'true'
run: |
conda init bash
conda env create -f environment.yml
- name: Generate AppMap report
if: always()
env:
CONFIG: ${{ inputs.config }}
run: |
source /usr/share/miniconda/etc/profile.d/conda.sh
conda activate swe-bench
export PYTHONPATH=$PYTHONPATH:$(pwd)
conda info
config="${CONFIG:-lite/marshmallow}"
instances=$(awk '/--instances/ { print }' ./bin/solve_config/$config.txt)
split=$(awk '/--split/ { print }' ./bin/solve_config/$config.txt)
python solver/report.py \
$instances \
$split
- name: Archive predictions and logs
if: always()
uses: actions/upload-artifact@v4
with:
name: results-${{ github.run_id }}
path: |
logs/
predictions.jsonl
results.csv