Run evaluation #1
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Run evaluation | |
on: | |
workflow_dispatch: | |
inputs: | |
config: | |
description: "Path to the configuration file in ./bin/solve_config" | |
required: true | |
type: string | |
default: lite/marshmallow | |
runner: | |
description: "Runner type" | |
required: true | |
default: ubuntu-latest | |
type: choice | |
options: | |
- ubuntu-latest | |
- swe-bench-ubuntu-latest | |
- SWE-Bench_Larger | |
num_runners: | |
description: "Number of runners to split the workload across" | |
required: true | |
default: "1" | |
predictions: | |
description: "Path to the predictions file" | |
required: true | |
type: string | |
jobs: | |
prepare-matrix: | |
runs-on: ubuntu-latest | |
env: | |
NUM_RUNNERS: ${{ inputs.num_runners }} | |
outputs: | |
matrix: ${{ steps.prepare-matrix.outputs.matrix }} | |
steps: | |
- name: Prepare matrix | |
id: prepare-matrix | |
run: | | |
num_runners=${NUM_RUNNERS:-1} | |
echo "Number of runners: $num_runners" | |
indices=$(seq 0 $(($num_runners - 1)) | jq -R 'tonumber' | jq -s -c) | |
echo "Matrix: $indices" | |
echo "matrix=$indices" >> $GITHUB_OUTPUT | |
solve: | |
needs: | |
- prepare-matrix | |
runs-on: ${{ inputs.runner || 'ubuntu-latest' }} | |
strategy: | |
matrix: | |
index: ${{ fromJson(needs['prepare-matrix'].outputs.matrix) }} | |
defaults: | |
run: | |
shell: bash -leo pipefail {0} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
# Cache the conda environment | |
- name: Cache conda environment | |
id: cache-conda | |
uses: actions/cache@v4 | |
with: | |
path: /usr/share/miniconda/envs/swe-bench | |
key: conda-${{ runner.os }}-${{ hashFiles('environment.yml') }} | |
# Create conda env if cache miss happens | |
- name: Create conda env | |
if: steps.cache-conda.outputs.cache-hit != 'true' | |
run: | | |
conda init bash | |
conda env create -f environment.yml | |
- name: Install dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y \ | |
texlive \ | |
texlive-xetex \ | |
dvipng \ | |
ghostscript \ | |
libfreetype-dev \ | |
libtiff-dev \ | |
libxrender1 | |
- name: Run evaluation | |
run: | | |
mkdir -p logs | |
source /usr/share/miniconda/etc/profile.d/conda.sh | |
conda activate swe-bench | |
export PYTHONPATH=$PYTHONPATH:$(pwd) | |
tasks=$(awk '/--instances/ { sub(/--instances/, "--swe_bench_tasks"); print }' ./bin/solve_config/${{ inputs.config }}.txt) | |
cpy ${{ inputs.predictions }} predictions.jsonl | |
python swebench/harness/run_evaluation.py \ | |
--predictions_path predictions.jsonl \ | |
$tasks \ | |
--log_dir logs \ | |
--testbed "${{ runner.temp }}" \ | |
--skip_existing \ | |
--timeout 900 \ | |
--verbose \ | |
--num_processes 8 \ | |
--path_conda $(conda info --base) | |
- name: Compress evaluation results | |
if: ${{ always() }} | |
run: | | |
tar -cJf output_${{ matrix.index }}.tar.xz logs predictions.jsonl | |
- name: Upload evaluation results | |
uses: actions/upload-artifact@v4 | |
if: ${{ always() }} | |
with: | |
name: output_${{ matrix.index }} | |
path: output_${{ matrix.index }}.tar.xz | |
compression-level: 0 | |
report: | |
needs: | |
- solve | |
if: needs.solve.result == 'success' || needs.solve.result == 'failure' | |
runs-on: 'ubuntu-latest' | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- name: Download evaluation results | |
uses: actions/download-artifact@v4 | |
with: | |
path: ./eval-results | |
- name: Unpack evaluation results | |
run: | | |
mkdir -p logs | |
for file in eval-results/**/*.tar.xz; do | |
tar --to-stdout -xJf "$file" predictions.jsonl >> predictions.jsonl | |
tar -xJf "$file" logs | |
done | |
# Cache the conda environment | |
- name: Cache conda environment | |
id: cache-conda | |
uses: actions/cache@v4 | |
with: | |
path: /usr/share/miniconda/envs/swe-bench | |
key: conda-${{ runner.os }}-${{ hashFiles('environment.yml') }} | |
# Create conda env if cache miss happens | |
- name: Create conda env | |
if: steps.cache-conda.outputs.cache-hit != 'true' | |
run: | | |
conda init bash | |
conda env create -f environment.yml | |
- name: Generate AppMap report | |
if: always() | |
env: | |
CONFIG: ${{ inputs.config }} | |
run: | | |
source /usr/share/miniconda/etc/profile.d/conda.sh | |
conda activate swe-bench | |
export PYTHONPATH=$PYTHONPATH:$(pwd) | |
conda info | |
config="${CONFIG:-lite/marshmallow}" | |
instances=$(awk '/--instances/ { print }' ./bin/solve_config/$config.txt) | |
split=$(awk '/--split/ { print }' ./bin/solve_config/$config.txt) | |
python solver/report.py \ | |
$instances \ | |
$split | |
- name: Archive predictions and logs | |
if: always() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: results-${{ github.run_id }} | |
path: | | |
logs/ | |
predictions.jsonl | |
results.csv |