Skip to content

Test PR

Test PR #118

Workflow file for this run

name: New leaderboard entry
on:
issue_comment:
types: [created]
branches:
- main
jobs:
add-new-entry:
if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '/add-to-leaderboard') }}
runs-on: ubuntu-latest
steps:
- name: Obtain PR branch
id: get-branch
run: echo "branch=$(gh pr view $PR_NO --repo $REPO --json headRefName --jq '.headRefName')" >> $GITHUB_OUTPUT
env:
REPO: ${{ github.repository }}
PR_NO: ${{ github.event.issue.number }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PR branch
uses: actions/checkout@v2
with:
ref: ${{ steps.get-branch.outputs.branch }}
- name: Extract arguments from comment
id: extract_args
run: |
echo "Extracting arguments..."
comment="${{ github.event.comment.body }}"
python_version=$(echo "$comment" | grep -oP '(?<=--python )\S+')
appworld_version=$(echo "$comment" | grep -oP '(?<=--appworld )\S+')
experiment_prefix=$(echo "$comment" | grep -oP '(?<=--experiment-prefix )\S+')
replace_last_flag=$(echo "$comment" | grep -q -- '--replace-last' && echo "--replace-last" || echo "")
echo "Python version: $python_version"
echo "Appworld version: $appworld_version"
echo "Experiment names: ${{ env.experiment_prefix }}_test_normal and ${{ env.experiment_prefix }}_test_challenge"
echo "Replace last flag: $replace_last_flag"
echo "python_version=$python_version" >> $GITHUB_ENV
echo "appworld_version=$appworld_version" >> $GITHUB_ENV
echo "experiment_prefix=$experiment_prefix" >> $GITHUB_ENV
echo "replace_last_flag=$replace_last_flag" >> $GITHUB_ENV
- uses: astral-sh/setup-uv@v3
with:
version: "0.4.4"
- name: Set up Python
run: uv python install ${{ env.python_version }}
- name: Install venv
run: uv venv
- name: Install dependencies
run: |
uv pip install appworld==${{ env.appworld_version }}
uv run appworld install
- name: Download appworld data
run: uv run appworld download data
- name: Fetch main branch
run: git fetch origin main
# - name: Verify PR file changes
# run: |
# echo "Checking PR for exactly two new files..."
# experiment_prefix="${{ env.experiment_prefix }}"
# expected_files=("experiments/outputs/${experiment_prefix}_test_challenge/leaderboard.bundle" "experiments/outputs/${experiment_prefix}_test_normal/leaderboard.bundle")
# new_files=$(git diff --name-only origin/main..HEAD)
# echo "Expected files:"
# printf "%s\n" "${expected_files[@]}"
# echo "New files in the PR:"
# echo "$new_files"
# # Sort and compare file lists
# expected_sorted=$(printf "%s\n" "${expected_files[@]}")
# actual_sorted=$(echo "$new_files" | sort)
# if [[ "$expected_sorted" != "$actual_sorted" ]]; then
# echo "Error: File list does not match the expected files."
# echo "Expected:"
# echo "$expected_sorted"
# echo "Actual:"
# echo "$actual_sorted"
# exit 1
# fi
# echo "PR file check passed. The file list matches exactly."
- name: Download relevant files
run: |
FILE_PATH=experiments/outputs/${experiment_prefix}_test_normal/leaderboard.bundle
curl -L -o ${FILE_PATH} https://github.com/stonybrooknlp/appworld-leaderboard/raw/${{ steps.get-branch.outputs.branch }}/${FILE_PATH}
du -sh ${FILE_PATH}
FILE_PATH=experiments/outputs/${experiment_prefix}_test_challenge/leaderboard.bundle
curl -L -o ${FILE_PATH} https://github.com/stonybrooknlp/appworld-leaderboard/raw/${{ steps.get-branch.outputs.branch }}/${FILE_PATH}
du -sh ${FILE_PATH}
- name: Unpack experiments
run: |
uv run appworld unpack ${{ env.experiment_prefix }}_test_normal
uv run appworld unpack ${{ env.experiment_prefix }}_test_challenge
- name: Run evaluations
run: |
uv run appworld evaluate ${{ env.experiment_prefix }}_test_normal test_normal
uv run appworld evaluate ${{ env.experiment_prefix }}_test_challenge test_challenge
ls experiments/outputs/temp_test_test_normal/
ls experiments/outputs/temp_test_test_challenge/
- name: Make and add leaderboard entry
run: uv run appworld make ${{ env.experiment_prefix }}_test_normal ${{ env.experiment_prefix }}_test_challenge ${{env.replace_last_flag}}
- name: Comment with leaderboard entry
if: ${{ success() }}
uses: actions/github-script@v6
with:
script: |
const fs = require('fs');
const entries = JSON.parse(fs.readFileSync('leaderboard.json', 'utf8'));
const formattedEntry = '```json\n' + JSON.stringify(entries[entries.length - 1], null, 4) + '\n```';
const commentBody = `### Latest Leaderboard Entry\n${formattedEntry}`;
const issue_number = context.issue.number;
await github.rest.issues.createComment({
...context.repo,
issue_number: issue_number,
body: commentBody,
});
# # - name: Checkout repository
# # uses: actions/checkout@v4
# # with:
# # fetch-depth: 0
# # lfs: false # Disable automatic LFS handling
# # - name: Setup Git and LFS
# # run: |
# # git lfs install
# # git config --global core.longpaths true
# # git config --global lfs.fetchexclude ""
# # git config --global lfs.fetchinclude "*"
# # - name: Manual Checkout and LFS Fetch
# # run: |
# # git fetch origin test-pr
# # git checkout test-pr
# # git lfs fetch origin test-pr
# # git lfs pull origin test-pr
# # - name: Debug LFS Configuration
# # run: |
# # echo "LFS Configuration:"
# # git config --list | grep lfs
# # echo "\nLFS Tracking:"
# # git lfs track
# # echo "\nLFS Status:"
# # git lfs status
# # - name: Alternative Download Method
# # env:
# # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# # run: |
# # FILE_PATH="experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle"
# # # Direct download using GitHub CLI
# # gh api \
# # -H "Accept: application/vnd.github.v3.raw" \
# # "/repos/${{ github.repository }}/contents/$FILE_PATH?ref=test-pr" \
# # > "$FILE_PATH"
# # echo "After direct download:"
# # ls -la "$FILE_PATH"
# # du -sh "$FILE_PATH"
# # - name: Verify LFS File
# # run: |
# # echo "File Details:"
# # ls -la experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # file experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # - name: Decode and Write LFS Pointer
# # run: |
# Decode the base64 LFS pointer content
# # echo "$LFS_POINTER_CONTENT" | base64 -d > lfs_pointer.txt
# # cat lfs_pointer.txt
# #
# # - name: Manual LFS File Retrieval
# # env:
# # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# # run: |
# # FILE_PATH="experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle"
# #
# Extract OID from the LFS pointer
# # OID=$(grep "oid sha256:" lfs_pointer.txt | cut -d: -f2 | tr -d ' ')
# #
# Attempt to download using the OID
# # DOWNLOAD_URL="https://github.com/StonyBrookNLP/appworld-leaderboard/raw/test-pr/$FILE_PATH"
# #
# # echo "Attempting to download: $DOWNLOAD_URL"
# # echo "Expected OID: $OID"
# #
# Try multiple download methods
# # curl -L -H "Authorization: token $GITHUB_TOKEN" "$DOWNLOAD_URL" -o "$FILE_PATH" || \
# # wget --header="Authorization: token $GITHUB_TOKEN" "$DOWNLOAD_URL" -O "$FILE_PATH" || \
# # gh api \
# # -H "Accept: application/vnd.github.v3.raw" \
# # "/repos/StonyBrookNLP/appworld-leaderboard/contents/$FILE_PATH?ref=test-pr" > "$FILE_PATH"
# #
# # echo "File after download:"
# # ls -lh "$FILE_PATH"
# # du -sh "$FILE_PATH"
# # - name: Exhaustive LFS Debugging
# # env:
# # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# # run: |
# # echo "GitHub Repository: ${{ github.repository }}"
# # echo "Current Branch: $(git branch --show-current)"
# #
# # echo "::group::Git LFS Verbose Information"
# # GIT_LFS_DEBUG=1 git lfs ls-files -l
# # echo "::endgroup::"
# #
# # echo "::group::GitHub API File Information"
# # FILEPATH="experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle"
# #
# Get file metadata from GitHub API
# # gh api \
# # -H "Accept: application/vnd.github+json" \
# # "/repos/${{ github.repository }}/contents/$FILEPATH?ref=test-pr" | jq .
# #
# Try to get the blob SHA
# # BLOB_SHA=$(git ls-tree -r test-pr | grep "$FILEPATH" | awk '{print $3}')
# # echo "Blob SHA: $BLOB_SHA"
# #
# Attempt to fetch blob details
# # if [ ! -z "$BLOB_SHA" ]; then
# # gh api \
# # -H "Accept: application/vnd.github+json" \
# # "/repos/${{ github.repository }}/git/blobs/$BLOB_SHA"
# # fi
# # echo "::endgroup::"
# #
# # - name: Verify Local vs Remote Tracking
# # run: |
# Compare local and remote LFS tracking
# # echo "Local Tracking:"
# # git lfs track
# #
# # echo "\nRemote Tracking:"
# # git ls-remote --refs origin test-pr
# #
# # echo "\nLFS Pointer Details:"
# # git lfs pointer --file "experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle"
# # - name: Checkout PR branch
# # uses: actions/checkout@v4
# # with:
# # ref: test-pr
# # lfs: false # Disable automatic LFS checkout
# # - name: Manual LFS Fetch
# # run: |
# # git lfs install
# # git fetch origin test-pr
# # git checkout test-pr
# # git lfs fetch origin test-pr --all
# # git lfs checkout
# # git lfs pull origin test-pr
# # - name: Verify LFS file size
# # run: |
# # ls -lh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # - name: LFS Debugging
# # run: |
# # echo "Git LFS Version:"
# # git lfs version
# # echo "::group::Git Config"
# # git config --list
# # echo "::endgroup::"
# # echo "::group::LFS Pointer Details"
# # git lfs pointer --file experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # echo "::endgroup::"
# # echo "::group::LFS File List"
# # git lfs ls-files -l
# # echo "::endgroup::"
# # echo "::group::Check LFS File Permissions"
# # ls -l experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # echo "::endgroup::"
# # echo "::group::File Content Check"
# # file experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # cat experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # echo "::endgroup::"
# # - name: Checkout repository
# # uses: actions/checkout@v4
# # with:
# # fetch-depth: 0
# # - name: Checkout specific branch
# # run: |
# # git checkout test-pr
# # git lfs pull
# # git lfs checkout
# # - name: Checkout PR branch
# # uses: actions/checkout@v4
# # with:
# # ref: test-pr
# # lfs: true
# # - name: Debugging LFS
# # run: |
# # git lfs version
# # git lfs ls-files
# # git lfs status
# # du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # - name: Checkout PR branch
# # uses: actions/checkout@v4
# # with:
# # ref: test-pr
# # lfs: true
# # fetch-depth: 0
# # - name: Fetch LFS objects
# # run: git lfs pull
# # - name: Show LFS file size
# # run: du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # - name: Fetch LFS objects
# # run: |
# # git lfs install
# # git lfs fetch origin test-pr
# # git lfs checkout
# # - name: Show LFS file size
# # run: du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle
# # - name: Checkout code
# # uses: actions/checkout@v2
# # with:
# # lfs: true
# - name: Extract arguments from comment
# id: extract_args
# run: |
# echo "Extracting arguments..."
# comment="${{ github.event.comment.body }}"
# python_version=$(echo "$comment" | grep -oP '(?<=--python )\S+')
# appworld_version=$(echo "$comment" | grep -oP '(?<=--appworld )\S+')
# experiment_prefix=$(echo "$comment" | grep -oP '(?<=--experiment-prefix )\S+')
# replace_last_flag=$(echo "$comment" | grep -q -- '--replace-last' && echo "--replace-last" || echo "")
# echo "Python version: $python_version"
# echo "Appworld version: $appworld_version"
# echo "Experiment names: ${{ env.experiment_prefix }}_test_normal and ${{ env.experiment_prefix }}_test_challenge"
# echo "Replace last flag: $replace_last_flag"
# echo "python_version=$python_version" >> $GITHUB_ENV
# echo "appworld_version=$appworld_version" >> $GITHUB_ENV
# echo "experiment_prefix=$experiment_prefix" >> $GITHUB_ENV
# echo "replace_last_flag=$replace_last_flag" >> $GITHUB_ENV
# # # - name: Checkout PR branch
# # # uses: actions/checkout@v4
# # # with:
# # # ref: ${{ steps.get-branch.outputs.branch }}
# # # lfs: true
# # # - name: Checkout PR branch
# # # uses: actions/checkout@v2
# # # with:
# # # lfs: true
# # # # with:
# # # # ref: ${{ steps.get-branch.outputs.branch }}
# # # # run: git lfs fetch --all
# # # - name: Pull LFS files
# # # run: |
# # # git lfs fetch --all
# # # git lfs pull
# # # git lfs checkout
# # # - name: Checkout LFS objects
# # # run: git lfs checkout experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle
# # # - name: checkout engine without LFS
# # # uses: actions/checkout@v2
# # # with:
# # # lfs: false
# # # - name: apply_lfs_filters
# # # run: |
# # # git config --local lfs.fetchexclude '**/Windows/**'
# # # - name: checkout engine with LFS
# # # uses: actions/checkout@v2
# # # with:
# # # lfs: true
# # # - name: Show LFS
# # # run: git lfs pull
# # # - name: Check out LFS objects
# # # run: git lfs checkout experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle
# # - name: checkout
# # uses: actions/checkout@v2
# # with:
# # lfs: 'true'
# # - name: checkoutLFS
# # uses: actions/checkout@v2
# # - run: git lfs pull
# # - name: Show LFS
# # run: du -sh experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle
# # # - uses: astral-sh/setup-uv@v3
# # # with:
# # # version: "0.4.4"
# # # - name: Set up Python
# # # run: uv python install ${{ env.python_version }}
# # # - name: Install venv
# # # run: uv venv
# # # - name: Install dependencies
# # # run: |
# # # uv pip install appworld==${{ env.appworld_version }}
# # # uv run appworld install
# # # - name: Download appworld data
# # # run: uv run appworld download data
# # # - name: Fetch main branch
# # # run: git fetch origin main
# # # - name: Verify PR file changes
# # # run: |
# # # echo "Checking PR for exactly two new files..."
# # # experiment_prefix="${{ env.experiment_prefix }}"
# # # expected_files=("experiments/outputs/${experiment_prefix}_test_challenge/leaderboard.bundle" "experiments/outputs/${experiment_prefix}_test_normal/leaderboard.bundle")
# # # new_files=$(git diff --name-only origin/main..HEAD)
# # # echo "Expected files:"
# # # printf "%s\n" "${expected_files[@]}"
# # # echo "New files in the PR:"
# # # echo "$new_files"
# # # # Sort and compare file lists
# # # expected_sorted=$(printf "%s\n" "${expected_files[@]}")
# # # actual_sorted=$(echo "$new_files" | sort)
# # # if [[ "$expected_sorted" != "$actual_sorted" ]]; then
# # # echo "Error: File list does not match the expected files."
# # # echo "Expected:"
# # # echo "$expected_sorted"
# # # echo "Actual:"
# # # echo "$actual_sorted"
# # # exit 1
# # # fi
# # # echo "PR file check passed. The file list matches exactly."
# # # - name: Unpack experiments
# # # run: |
# # # uv run appworld unpack ${{ env.experiment_prefix }}_test_normal
# # # uv run appworld unpack ${{ env.experiment_prefix }}_test_challenge
# # # - name: Run evaluations
# # # run: |
# # # uv run appworld evaluate ${{ env.experiment_prefix }}_test_normal test_normal
# # # uv run appworld evaluate ${{ env.experiment_prefix }}_test_challenge test_challenge
# # # - name: Make and add leaderboard entry
# # # run: uv run appworld make ${{ env.experiment_prefix }}_test_normal ${{ env.experiment_prefix }}_test_challenge ${{env.replace_last_flag}}
# # # - name: Comment with leaderboard entry
# # # if: ${{ success() }}
# # # uses: actions/github-script@v6
# # # with:
# # # script: |
# # # const fs = require('fs');
# # # const entries = JSON.parse(fs.readFileSync('leaderboard.json', 'utf8'));
# # # const formattedEntry = '```json\n' + JSON.stringify(entries[entries.length - 1], null, 4) + '\n```';
# # # const commentBody = `### Latest Leaderboard Entry\n${formattedEntry}`;
# # # const issue_number = context.issue.number;
# # # await github.rest.issues.createComment({
# # # ...context.repo,
# # # issue_number: issue_number,
# # # body: commentBody,
# # # });