diff --git a/.github/workflows/new-entry.yml b/.github/workflows/new-entry.yml index 73907d8..abcf4b9 100644 --- a/.github/workflows/new-entry.yml +++ b/.github/workflows/new-entry.yml @@ -115,7 +115,7 @@ jobs: uv run appworld evaluate ${{ env.experiment_prefix }}_test_challenge test_challenge - name: Make and add leaderboard entry - run: uv run appworld make ${{ env.experiment_prefix }}_test_normal ${{ env.experiment_prefix }}_test_challenge ${{env.replace_last_flag}} + run: uv run appworld make ${{ env.experiment_prefix }}_test_normal ${{ env.experiment_prefix }}_test_challenge ${{env.replace_last_flag}} --save - name: Comment with leaderboard entry if: ${{ success() }} @@ -132,388 +132,3 @@ jobs: issue_number: issue_number, body: commentBody, }); - - -# # - name: Checkout repository -# # uses: actions/checkout@v4 -# # with: -# # fetch-depth: 0 -# # lfs: false # Disable automatic LFS handling - -# # - name: Setup Git and LFS -# # run: | -# # git lfs install -# # git config --global core.longpaths true -# # git config --global lfs.fetchexclude "" -# # git config --global lfs.fetchinclude "*" - -# # - name: Manual Checkout and LFS Fetch -# # run: | -# # git fetch origin test-pr -# # git checkout test-pr -# # git lfs fetch origin test-pr -# # git lfs pull origin test-pr - -# # - name: Debug LFS Configuration -# # run: | -# # echo "LFS Configuration:" -# # git config --list | grep lfs -# # echo "\nLFS Tracking:" -# # git lfs track -# # echo "\nLFS Status:" -# # git lfs status - -# # - name: Alternative Download Method -# # env: -# # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# # run: | -# # FILE_PATH="experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle" -# # # Direct download using GitHub CLI -# # gh api \ -# # -H "Accept: application/vnd.github.v3.raw" \ -# # "/repos/${{ github.repository }}/contents/$FILE_PATH?ref=test-pr" \ -# # > "$FILE_PATH" -# # echo "After direct download:" -# # ls -la "$FILE_PATH" -# # du -sh "$FILE_PATH" - -# # - name: Verify LFS File -# # run: | -# # echo "File Details:" -# # ls -la experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle -# # file experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle -# # du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle - - - -# # - name: Decode and Write LFS Pointer -# # run: | -# Decode the base64 LFS pointer content -# # echo "$LFS_POINTER_CONTENT" | base64 -d > lfs_pointer.txt -# # cat lfs_pointer.txt -# # -# # - name: Manual LFS File Retrieval -# # env: -# # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# # run: | -# # FILE_PATH="experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle" -# # -# Extract OID from the LFS pointer -# # OID=$(grep "oid sha256:" lfs_pointer.txt | cut -d: -f2 | tr -d ' ') -# # -# Attempt to download using the OID -# # DOWNLOAD_URL="https://github.com/StonyBrookNLP/appworld-leaderboard/raw/test-pr/$FILE_PATH" -# # -# # echo "Attempting to download: $DOWNLOAD_URL" -# # echo "Expected OID: $OID" -# # -# Try multiple download methods -# # curl -L -H "Authorization: token $GITHUB_TOKEN" "$DOWNLOAD_URL" -o "$FILE_PATH" || \ -# # wget --header="Authorization: token $GITHUB_TOKEN" "$DOWNLOAD_URL" -O "$FILE_PATH" || \ -# # gh api \ -# # -H "Accept: application/vnd.github.v3.raw" \ -# # "/repos/StonyBrookNLP/appworld-leaderboard/contents/$FILE_PATH?ref=test-pr" > "$FILE_PATH" -# # -# # echo "File after download:" -# # ls -lh "$FILE_PATH" -# # du -sh "$FILE_PATH" - - -# # - name: Exhaustive LFS Debugging -# # env: -# # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# # run: | -# # echo "GitHub Repository: ${{ github.repository }}" -# # echo "Current Branch: $(git branch --show-current)" -# # -# # echo "::group::Git LFS Verbose Information" -# # GIT_LFS_DEBUG=1 git lfs ls-files -l -# # echo "::endgroup::" -# # -# # echo "::group::GitHub API File Information" -# # FILEPATH="experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle" -# # -# Get file metadata from GitHub API -# # gh api \ -# # -H "Accept: application/vnd.github+json" \ -# # "/repos/${{ github.repository }}/contents/$FILEPATH?ref=test-pr" | jq . -# # -# Try to get the blob SHA -# # BLOB_SHA=$(git ls-tree -r test-pr | grep "$FILEPATH" | awk '{print $3}') -# # echo "Blob SHA: $BLOB_SHA" -# # -# Attempt to fetch blob details -# # if [ ! -z "$BLOB_SHA" ]; then -# # gh api \ -# # -H "Accept: application/vnd.github+json" \ -# # "/repos/${{ github.repository }}/git/blobs/$BLOB_SHA" -# # fi -# # echo "::endgroup::" -# # -# # - name: Verify Local vs Remote Tracking -# # run: | -# Compare local and remote LFS tracking -# # echo "Local Tracking:" -# # git lfs track -# # -# # echo "\nRemote Tracking:" -# # git ls-remote --refs origin test-pr -# # -# # echo "\nLFS Pointer Details:" -# # git lfs pointer --file "experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle" - - -# # - name: Checkout PR branch -# # uses: actions/checkout@v4 -# # with: -# # ref: test-pr -# # lfs: false # Disable automatic LFS checkout - -# # - name: Manual LFS Fetch -# # run: | -# # git lfs install -# # git fetch origin test-pr -# # git checkout test-pr -# # git lfs fetch origin test-pr --all -# # git lfs checkout -# # git lfs pull origin test-pr - -# # - name: Verify LFS file size -# # run: | -# # ls -lh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle -# # du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle - -# # - name: LFS Debugging -# # run: | -# # echo "Git LFS Version:" -# # git lfs version - -# # echo "::group::Git Config" -# # git config --list -# # echo "::endgroup::" - -# # echo "::group::LFS Pointer Details" -# # git lfs pointer --file experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle -# # echo "::endgroup::" - -# # echo "::group::LFS File List" -# # git lfs ls-files -l -# # echo "::endgroup::" - -# # echo "::group::Check LFS File Permissions" -# # ls -l experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle -# # echo "::endgroup::" - -# # echo "::group::File Content Check" -# # file experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle -# # cat experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle -# # echo "::endgroup::" - -# # - name: Checkout repository -# # uses: actions/checkout@v4 -# # with: -# # fetch-depth: 0 - -# # - name: Checkout specific branch -# # run: | -# # git checkout test-pr -# # git lfs pull -# # git lfs checkout - -# # - name: Checkout PR branch -# # uses: actions/checkout@v4 -# # with: -# # ref: test-pr -# # lfs: true - -# # - name: Debugging LFS -# # run: | -# # git lfs version -# # git lfs ls-files -# # git lfs status -# # du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle - -# # - name: Checkout PR branch -# # uses: actions/checkout@v4 -# # with: -# # ref: test-pr -# # lfs: true -# # fetch-depth: 0 - -# # - name: Fetch LFS objects -# # run: git lfs pull - -# # - name: Show LFS file size -# # run: du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle - -# # - name: Fetch LFS objects -# # run: | -# # git lfs install -# # git lfs fetch origin test-pr -# # git lfs checkout - -# # - name: Show LFS file size -# # run: du -sh experiments/outputs/full_code_refl_deepseekcoder_test_normal/leaderboard.bundle - -# # - name: Checkout code -# # uses: actions/checkout@v2 -# # with: -# # lfs: true - - - - - # - name: Extract arguments from comment - # id: extract_args - # run: | - # echo "Extracting arguments..." - # comment="${{ github.event.comment.body }}" - # python_version=$(echo "$comment" | grep -oP '(?<=--python )\S+') - # appworld_version=$(echo "$comment" | grep -oP '(?<=--appworld )\S+') - # experiment_prefix=$(echo "$comment" | grep -oP '(?<=--experiment-prefix )\S+') - # replace_last_flag=$(echo "$comment" | grep -q -- '--replace-last' && echo "--replace-last" || echo "") - - # echo "Python version: $python_version" - # echo "Appworld version: $appworld_version" - # echo "Experiment names: ${{ env.experiment_prefix }}_test_normal and ${{ env.experiment_prefix }}_test_challenge" - # echo "Replace last flag: $replace_last_flag" - - # echo "python_version=$python_version" >> $GITHUB_ENV - # echo "appworld_version=$appworld_version" >> $GITHUB_ENV - # echo "experiment_prefix=$experiment_prefix" >> $GITHUB_ENV - # echo "replace_last_flag=$replace_last_flag" >> $GITHUB_ENV - - -# # # - name: Checkout PR branch -# # # uses: actions/checkout@v4 -# # # with: -# # # ref: ${{ steps.get-branch.outputs.branch }} -# # # lfs: true - -# # # - name: Checkout PR branch -# # # uses: actions/checkout@v2 -# # # with: -# # # lfs: true -# # # # with: -# # # # ref: ${{ steps.get-branch.outputs.branch }} -# # # # run: git lfs fetch --all - -# # # - name: Pull LFS files -# # # run: | -# # # git lfs fetch --all -# # # git lfs pull -# # # git lfs checkout - -# # # - name: Checkout LFS objects -# # # run: git lfs checkout experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle - - - - -# # # - name: checkout engine without LFS -# # # uses: actions/checkout@v2 -# # # with: -# # # lfs: false -# # # - name: apply_lfs_filters -# # # run: | -# # # git config --local lfs.fetchexclude '**/Windows/**' -# # # - name: checkout engine with LFS -# # # uses: actions/checkout@v2 -# # # with: -# # # lfs: true -# # # - name: Show LFS -# # # run: git lfs pull - - - - -# # # - name: Check out LFS objects -# # # run: git lfs checkout experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle - -# # - name: checkout -# # uses: actions/checkout@v2 -# # with: -# # lfs: 'true' -# # - name: checkoutLFS -# # uses: actions/checkout@v2 -# # - run: git lfs pull - -# # - name: Show LFS -# # run: du -sh experiments/outputs/full_code_refl_deepseekcoder_test_challenge/leaderboard.bundle - -# # # - uses: astral-sh/setup-uv@v3 -# # # with: -# # # version: "0.4.4" - -# # # - name: Set up Python -# # # run: uv python install ${{ env.python_version }} - -# # # - name: Install venv -# # # run: uv venv - -# # # - name: Install dependencies -# # # run: | -# # # uv pip install appworld==${{ env.appworld_version }} -# # # uv run appworld install - -# # # - name: Download appworld data -# # # run: uv run appworld download data - -# # # - name: Fetch main branch -# # # run: git fetch origin main - -# # # - name: Verify PR file changes -# # # run: | -# # # echo "Checking PR for exactly two new files..." -# # # experiment_prefix="${{ env.experiment_prefix }}" -# # # expected_files=("experiments/outputs/${experiment_prefix}_test_challenge/leaderboard.bundle" "experiments/outputs/${experiment_prefix}_test_normal/leaderboard.bundle") -# # # new_files=$(git diff --name-only origin/main..HEAD) - -# # # echo "Expected files:" -# # # printf "%s\n" "${expected_files[@]}" -# # # echo "New files in the PR:" -# # # echo "$new_files" - -# # # # Sort and compare file lists -# # # expected_sorted=$(printf "%s\n" "${expected_files[@]}") -# # # actual_sorted=$(echo "$new_files" | sort) - -# # # if [[ "$expected_sorted" != "$actual_sorted" ]]; then -# # # echo "Error: File list does not match the expected files." -# # # echo "Expected:" -# # # echo "$expected_sorted" -# # # echo "Actual:" -# # # echo "$actual_sorted" -# # # exit 1 -# # # fi - -# # # echo "PR file check passed. The file list matches exactly." - -# # # - name: Unpack experiments -# # # run: | -# # # uv run appworld unpack ${{ env.experiment_prefix }}_test_normal -# # # uv run appworld unpack ${{ env.experiment_prefix }}_test_challenge - -# # # - name: Run evaluations -# # # run: | -# # # uv run appworld evaluate ${{ env.experiment_prefix }}_test_normal test_normal -# # # uv run appworld evaluate ${{ env.experiment_prefix }}_test_challenge test_challenge - -# # # - name: Make and add leaderboard entry -# # # run: uv run appworld make ${{ env.experiment_prefix }}_test_normal ${{ env.experiment_prefix }}_test_challenge ${{env.replace_last_flag}} - -# # # - name: Comment with leaderboard entry -# # # if: ${{ success() }} -# # # uses: actions/github-script@v6 -# # # with: -# # # script: | -# # # const fs = require('fs'); -# # # const entries = JSON.parse(fs.readFileSync('leaderboard.json', 'utf8')); -# # # const formattedEntry = '```json\n' + JSON.stringify(entries[entries.length - 1], null, 4) + '\n```'; -# # # const commentBody = `### Latest Leaderboard Entry\n${formattedEntry}`; -# # # const issue_number = context.issue.number; -# # # await github.rest.issues.createComment({ -# # # ...context.repo, -# # # issue_number: issue_number, -# # # body: commentBody, -# # # });