From 88a2e3acfaa16d450df7d3856ca3eebb247151f9 Mon Sep 17 00:00:00 2001 From: Helio Machado <0x2b3bfa0+git@googlemail.com> Date: Thu, 26 Sep 2024 04:33:56 +0200 Subject: [PATCH] Update build.yml --- .github/workflows/build.yml | 510 ++++++++++++++++++++++++++++++++++-- 1 file changed, 483 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 01c69d2c..e1b3ac82 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,36 +1,218 @@ name: build -on: push +on: + pull_request: {} + push: + branches: + - main + schedule: + - cron: '0 0 * * *' + + workflow_dispatch: + inputs: + dataset: + description: "Dataset Size" + required: false + default: "small" + type: choice + options: + - tiny + - small + - large + - mnist + - openorca + - used-cars + - youtube-trending + - meta-kaggle + - chest-xray-pneumonia + revs: + description: "Comma-separated list of DVC revisions" + required: false + default: "" + type: string + clouds: + description: "Run s3/gs/azure benchmarks" + required: false + default: false + type: boolean + host_dvc_rev: + description: "DVC revision to use for running the tests" + required: false + default: "main" + type: string + tests_to_run: + description: "Comma-separated list of names of tests to run. If left empty (default), it will run all the tests" + required: false + default: "" + type: string + cprofile: + description: "Enable cprofile" + required: false + default: false + type: boolean + env: DVC_TEST: "true" FORCE_COLOR: "1" - DATASET: ${{ (github.event_name == 'schedule' && 'mnist') || github.event.inputs.dataset || 'small' }} - REVS: ${{ github.event.inputs.revs || 'main,3.53.2,3.10.0,2.58.2' }} - # run on small set of revisions for clouds - CLOUD_REVS: ${{ github.event.inputs.revs || 'main,3.53.2' }} - DVC_REPOSITORY: iterative/dvc - DVC_REF: ${{ github.event.inputs.host_dvc_rev || 'main' }} - DVC_AZURE_REPOSITORY: iterative/dvc-azure - DVC_AZURE_REF: main - DVC_GS_REPOSITORY: iterative/dvc-gs - DVC_GS_REF: main - DVC_S3_REPOSITORY: iterative/dvc-s3 - DVC_S3_REF: main - UV_SYSTEM_PYTHON: true - TESTS_TO_RUN: ${{ github.event.inputs.tests_to_run || '' }} - CPROFILE_ARGS: ${{ github.event.inputs.cprofile == 'true' && '--benchmark-cprofile-dump' || '' }} - -permissions: + @@ -73,320 +23,11 @@ permissions: contents: read jobs: - build_gs: + lint: runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - uses: hynek/setup-cached-uv@v2 + with: + cache-dependency-path: requirements.txt + - name: install requirements + run: uv pip install -r requirements.txt + - uses: pre-commit/action@v3.0.1 + + gen: + runs-on: ubuntu-latest + outputs: + tests: ${{ steps.tests.outputs.tests }} + azure-tests: ${{ steps.azure-tests.outputs.azure-tests }} + gs-tests: ${{ steps.gs-tests.outputs.gs-tests }} + s3-tests: ${{ steps.s3-tests.outputs.s3-tests }} + steps: + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - uses: actions/checkout@v4 + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_REPOSITORY }} + ref: ${{ env.DVC_REF }} + path: dvc + fetch-depth: 0 + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_AZURE_REPOSITORY }} + ref: ${{ env.DVC_AZURE_REF }} + path: dvc-azure + fetch-depth: 0 + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_GS_REPOSITORY }} + ref: ${{ env.DVC_GS_REF }} + path: dvc-gs + fetch-depth: 0 + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_S3_REPOSITORY }} + ref: ${{ env.DVC_S3_REF }} + path: dvc-s3 + fetch-depth: 0 + - uses: hynek/setup-cached-uv@v2 + with: + cache-dependency-path: | + dvc/pyproject.toml + dvc-azure/pyproject.toml + dvc-gs/pyproject.toml + dvc-s3/pyproject.toml + - name: install reqs + run: uv pip install "./dvc[tests]" "./dvc-azure[tests]" "./dvc-gs[tests]" "./dvc-s3[tests]" + - id: tests + working-directory: dvc/ + run: echo "tests=$(../scripts/ci/list_tests.sh dvc/testing/benchmarks '${{ env.TESTS_TO_RUN }}')" >> $GITHUB_OUTPUT + - id: azure-tests + working-directory: dvc-azure/ + run: echo "azure-tests=$(../scripts/ci/list_tests.sh dvc_azure/tests/benchmarks.py '${{ env.TESTS_TO_RUN }}')" >> $GITHUB_OUTPUT + - id: gs-tests + working-directory: dvc-gs/ + run: echo "gs-tests=$(../scripts/ci/list_tests.sh dvc_gs/tests/benchmarks.py '${{ env.TESTS_TO_RUN }}')" >> $GITHUB_OUTPUT + - id: s3-tests + working-directory: dvc-s3/ + run: echo "s3-tests=$(../scripts/ci/list_tests.sh dvc_s3/tests/benchmarks.py '${{ env.TESTS_TO_RUN }}')" >> $GITHUB_OUTPUT + + build: + needs: [gen] + timeout-minutes: 180 + name: run ${{ matrix.test.name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + test: ${{fromJson(needs.gen.outputs.tests)}} + steps: + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - uses: actions/checkout@v4 + with: + path: dvc-bench + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_REPOSITORY }} + ref: ${{ env.DVC_REF }} + path: dvc + fetch-depth: 0 + - run: ../dvc-bench/scripts/ci/fetch-all-revs.sh "${REVS}" + working-directory: dvc + - uses: hynek/setup-cached-uv@v2 + with: + cache-dependency-path: dvc/pyproject.toml + - name: install requirements + run: uv pip install "./dvc[tests]" + - uses: actions/cache/restore@v4 + id: restore-cache + with: + path: dvc-bench/.dvc/cache + key: ${{ env.DATASET }} + - name: run benchmarks + shell: bash + working-directory: dvc/ + run: > + pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func + ${{ matrix.test.path }} + --dvc-revs ${REVS} + --dataset ${DATASET} + --dvc-bench-repo ../dvc-bench --dvc-repo $(pwd) + -W ignore + ${{ env.CPROFILE_ARGS }} + - if: ${{ steps.restore-cache.outputs.cache-hit != 'true' && matrix.test.name == 'test_add_copy' }} + uses: actions/cache/save@v4 + with: + path: dvc-bench/.dvc/cache + key: ${{ steps.restore-cache.outputs.cache-primary-key }} + - name: upload raw results + uses: actions/upload-artifact@v4 + with: + name: .benchmarks-${{ matrix.test.name }} + path: dvc/.benchmarks + - if: ${{ env.CPROFILE_ARGS }} + name: upload profiling results + uses: actions/upload-artifact@v4 + with: + name: prof-${{ matrix.test.name }} + path: dvc/prof + + build_s3: + if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} + needs: [gen] + strategy: + fail-fast: false + matrix: + test: ${{fromJson(needs.gen.outputs.s3-tests)}} + runs-on: ubuntu-latest + name: run ${{ matrix.test.name }} timeout-minutes: 480 continue-on-error: true - environment: google-cloud permissions: id-token: write steps: + - name: configure AWS credentials + if: ${{ github.event_name == 'schedule' }} + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::342840881361:role/dvc-bench-gha + role-duration-seconds: 28800 + aws-region: us-east-1 - uses: actions/setup-python@v5 with: python-version: "3.12" @@ -47,40 +229,314 @@ jobs: working-directory: dvc - uses: actions/checkout@v4 with: - repository: ${{ env.DVC_GS_REPOSITORY }} - ref: ${{ env.DVC_GS_REF }} - path: dvc-gs + repository: ${{ env.DVC_S3_REPOSITORY }} + ref: ${{ env.DVC_S3_REF }} + path: dvc-s3 + fetch-depth: 0 + - uses: hynek/setup-cached-uv@v2 + with: + cache-dependency-path: | + dvc/pyproject.toml + dvc-s3/pyproject.toml + - name: install requirements + run: uv pip install "./dvc[tests]" "./dvc-s3[tests]" + - uses: actions/cache/restore@v4 + with: + path: dvc-bench/.dvc/cache + key: ${{ env.DATASET }} + - name: configure real S3 DVC env + if: ${{ github.event_name == 'schedule' }} + run: | + echo "DVC_TEST_AWS_REPO_BUCKET=dvc-bench-ci" >> "$GITHUB_ENV" + - name: run benchmarks + shell: bash + working-directory: dvc-s3/ + run: > + pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func + ${{ matrix.test.path }} + --dvc-revs ${CLOUD_REVS} + --dataset ${DATASET} + --dvc-install-deps s3 + --dvc-bench-repo ../dvc-bench --dvc-repo ../dvc + -W ignore + ${{ env.CPROFILE_ARGS }} + - name: upload raw results + uses: actions/upload-artifact@v4 + with: + name: .benchmarks-${{ matrix.test.name }} + path: dvc-s3/.benchmarks + - if: ${{ env.CPROFILE_ARGS }} + name: upload profiling results + uses: actions/upload-artifact@v4 + with: + name: prof-${{ matrix.test.name }} + path: dvc-s3/prof + + build_azure: + if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} + needs: [gen] + strategy: + fail-fast: false + matrix: + test: ${{fromJson(needs.gen.outputs.azure-tests)}} + runs-on: ubuntu-latest + name: run ${{ matrix.test.name }} + timeout-minutes: 480 + continue-on-error: true + permissions: + id-token: write + environment: ${{ github.event_name == 'schedule' && 'github-actions' || ''}} + steps: + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - uses: actions/checkout@v4 + with: + path: dvc-bench + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_REPOSITORY }} + ref: ${{ env.DVC_REF }} + path: dvc + fetch-depth: 0 + - run: ../dvc-bench/scripts/ci/fetch-all-revs.sh "${REVS}" + working-directory: dvc + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_AZURE_REPOSITORY }} + ref: ${{ env.DVC_AZURE_REF }} + path: dvc-azure fetch-depth: 0 - uses: hynek/setup-cached-uv@v2 with: cache-dependency-path: | + dvc/pyproject.toml + dvc-azure/pyproject.toml + - name: 'Az CLI login' + if: ${{ github.event_name == 'schedule' }} + uses: azure/login@v2 + with: + client-id: ${{ vars.AZURE_CLIENT_ID }} + tenant-id: ${{ vars.AZURE_TENANT_ID }} + subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }} + - name: 'Set connection string' + if: ${{ github.event_name == 'schedule' }} + id: set-conn-str + run: | + connection_string=$(az storage account show-connection-string -g dvc-bench-ci -n dvcbenchci | jq -c '.connectionString') + echo "::add-mask::$connection_string" + echo "DVC_TEST_AZURE_CONNECTION_STRING=$connection_string" >> $GITHUB_ENV + - name: install requirements + run: uv pip install "./dvc[tests]" "./dvc-azure[tests]" + - uses: actions/cache/restore@v4 + with: + path: dvc-bench/.dvc/cache + key: ${{ env.DATASET }} + - name: setup env + if: ${{ github.event_name == 'schedule' }} + run: | + echo "DVC_TEST_AZURE_PATH=az://dvc-bench-ci" >> $GITHUB_ENV + - name: run benchmarks + shell: bash + working-directory: dvc-azure/ + run: > + pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func + ${{ matrix.test.path }} + --dvc-revs ${CLOUD_REVS} + --dataset ${DATASET} + --dvc-install-deps azure + --dvc-bench-repo ../dvc-bench --dvc-repo ../dvc + -W ignore + ${{ env.CPROFILE_ARGS }} + - name: upload raw results + uses: actions/upload-artifact@v4 + with: + name: .benchmarks-${{ matrix.test.name }} + path: dvc-azure/.benchmarks + - if: ${{ env.CPROFILE_ARGS }} + name: upload profiling results + uses: actions/upload-artifact@v4 + with: + name: prof-${{ matrix.test.name }} + path: dvc-azure/prof + + build_gs: + if: ${{ github.event_name == 'schedule' || github.event.inputs.clouds == 'true' }} + needs: [gen] + strategy: + fail-fast: false + matrix: + test: ${{fromJson(needs.gen.outputs.gs-tests)}} + runs-on: ubuntu-latest + name: run ${{ matrix.test.name }} + timeout-minutes: 480 + continue-on-error: true + permissions: + id-token: write + environment: google-cloud + steps: + @@ -416,167 +57,30 @@ jobs: dvc/pyproject.toml dvc-gs/pyproject.toml - name: install reqs - run: pip install "./dvc[tests]" "./dvc-gs[tests]" + run: uv pip install "./dvc[tests]" "./dvc-gs[tests]" - uses: actions/cache/restore@v4 with: path: dvc-bench/.dvc/cache key: ${{ env.DATASET }} - id: 'auth' + if: ${{ github.event_name == 'schedule' }} name: 'Authenticate to GCP' uses: 'google-github-actions/auth@v2.1.5' with: workload_identity_provider: 'projects/385088528371/locations/global/workloadIdentityPools/github/providers/github' service_account: 'dvc-bench@iterative-sandbox.iam.gserviceaccount.com' - name: configure real GS DVC env + if: ${{ github.event_name == 'schedule' }} run: | echo "DVC_TEST_GS_BUCKET=dvc-bench" >> "$GITHUB_ENV" - name: run benchmarks shell: bash working-directory: dvc-gs/ run: > - pytest --benchmark-save test_sharing_gs --benchmark-group-by func - dvc_gs/tests/benchmarks.py::test_sharing_gs + pytest --benchmark-save ${{ matrix.test.name }} --benchmark-group-by func + ${{ matrix.test.path }} --dvc-revs ${CLOUD_REVS} --dataset ${DATASET} --dvc-install-deps gs --dvc-bench-repo ../dvc-bench --dvc-repo ../dvc -W ignore - dvc_gs/tests/benchmarks.py::test_sharing_gs + ${{ matrix.test.path }} ${{ env.CPROFILE_ARGS }} + - name: upload raw results + uses: actions/upload-artifact@v4 + with: + name: .benchmarks-${{ matrix.test.name }} + path: dvc-gs/.benchmarks + - if: ${{ env.CPROFILE_ARGS }} + name: upload profiling results + uses: actions/upload-artifact@v4 + with: + name: prof-${{ matrix.test.name }} + path: dvc-gs/prof + notify: + if: github.event_name != 'workflow_dispatch' && github.ref == 'refs/heads/main' && failure() + needs: [build, build_s3, build_azure, build_gs] + runs-on: ubuntu-latest + steps: + - name: Slack Notification + uses: rtCamp/action-slack-notify@v2.3.0 + env: + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + SLACK_COLOR: ${{ job.status }} + SLACK_MESSAGE: 'Benchmarks failed on main :confused_dog:' + SLACK_TITLE: ":dvc:" + SLACK_USERNAME: dvc-bench + + compare: + name: join results and publish + needs: [build, build_s3, build_azure, build_gs] + if: always() && !contains(needs.*.result, 'failure') + runs-on: ubuntu-latest + environment: aws + permissions: + pages: write + pull-requests: write + contents: write + id-token: write + steps: + - if: ${{ env.CPROFILE_ARGS }} + uses: actions/upload-artifact/merge@v4 + with: + name: all-prof + pattern: prof-* + delete-merged: true + - uses: actions/upload-artifact/merge@v4 + with: + name: all-benchmarks + pattern: .benchmarks-* + delete-merged: true + + - uses: iterative/setup-cml@v3 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - uses: actions/checkout@v4 + - uses: actions/checkout@v4 + with: + repository: ${{ env.DVC_REPOSITORY }} + ref: ${{ env.DVC_REF }} + path: dvc + fetch-depth: 0 + - uses: hynek/setup-cached-uv@v2 + with: + cache-dependency-path: | + requirements.txt + dvc/pyproject.toml + - name: install requirements + run: uv pip install -r requirements.txt "./dvc[testing]" + - name: download ubuntu results + uses: actions/download-artifact@v4 + with: + pattern: all-benchmarks + path: .benchmarks + - name: compare results + shell: bash + run: ./scripts/ci/gen_html.sh + + - name: configure AWS credentials + if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::260760892802:role/dvc-bench + aws-region: us-east-2 + + - name: upload to s3 + if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} + run: aws s3 cp html/index.html s3://dvc-bench/pages/${{ github.event_name }}/${{ github.run_id }}_${{ github.run_attempt }}.html + + - name: generate pages + if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} + run: | + mv html/index.html html/latest.html + mkdir html/workflow_dispatch html/schedule + LATEST_SCHEDULE="$(aws s3 ls s3://dvc-bench/pages/schedule/ --recursive | sort -r | head -n 30 | awk '{print $4}')" + LATEST_DISPATCH="$(aws s3 ls s3://dvc-bench/pages/workflow_dispatch/ --recursive | sort -r | head -n 30 | awk '{print $4}')" + echo "$LATEST_SCHEDULE" | xargs -I '{}' aws s3 cp s3://dvc-bench/'{}' html/schedule + echo "$LATEST_DISPATCH" | xargs -I '{}' aws s3 cp s3://dvc-bench/'{}' html/workflow_dispatch + cp html/schedule/$(echo "$LATEST_SCHEDULE" | head -1 | xargs basename) html/schedule/latest.html + cp html/workflow_dispatch/$(echo "$LATEST_DISPATCH" | head -1 | xargs basename) html/workflow_dispatch/latest.html + - name: generate listings + uses: jayanta525/github-pages-directory-listing@v4.0.0 + with: + FOLDER: html + + - name: post comment + env: + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} + if: ${{ github.event_name == 'pull_request' && ! github.event.pull_request.head.repo.fork }} + run: | + cml comment update --watermark-title='dvc-bench report' report.md + - name: Setup Pages + id: pages + uses: actions/configure-pages@v5 + + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: ./html + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + permissions: + pages: write + id-token: write + runs-on: ubuntu-latest + needs: compare + if: always() && !contains(needs.*.result, 'failure') && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4