Add test runner on ibm cloud

pytorch · Jul 7, 2023 · 405285c · 405285c
1 parent 9df5215
commit 405285c
Showing 1 changed file with 96 additions and 0 deletions.
diff --git a/.github/workflows/userbenchmark-ibmcloud-testrunner.yml b/.github/workflows/userbenchmark-ibmcloud-testrunner.yml
@@ -0,0 +1,96 @@
+name: TorchBench Userbenchmark test on IBM Cloud 
+on:
+  workflow_dispatch:
+    inputs:
+      userbenchmark_name:
+        description: "Name of the user benchmark to run"
+      userbenchmark_options:
+        description: "Option of the user benchmark to run"
+env:
+  PLATFORM_NAME: "ibm_cloud"
+  TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
+  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+  CONDA_ENV: "torchbench"
+  DOCKER_IMAGE: "ghcr.io/pytorch/torchbench:latest"
+  SETUP_SCRIPT: "/workspace/setup_instance.sh"
+jobs:
+  run-userbenchmark:
+    runs-on: [self-hosted, testrunner1545]
+    timeout-minutes: 1440 # 24 hours
+    steps:
+      - name: Checkout TorchBench
+        uses: actions/checkout@v3
+        with:
+          path: benchmark
+      - name: Install Conda
+        run: |
+          cd benchmark
+          bash scripts/install_conda.sh
+      - name: Pull docker image
+        uses: pytorch/test-infra/.github/actions/pull-docker-image@main
+        with:
+          docker-image: ${{ env.DOCKER_IMAGE }}
+      - name: Run Docker Image
+        run: |
+          CONTAINER_ID=$(docker run \
+            -e CONDA_ENV \
+            -e SETUP_SCRIPT \
+            --tty \
+            --detach \
+            -v "${PWD}/benchmark:/benchmark" \
+            -w / \
+            "${{ env.DOCKER_IMAGE }}" \
+            tail -f /dev/null
+          )
+          echo "Container ID: ${CONTAINER_ID}"
+          # Write the CONTAINER_ID to GITHUB_ENV
+          echo "CONTAINER_ID=${CONTAINER_ID}" >> "${GITHUB_ENV}"
+      - name: Install TorchBench
+        run: |
+          docker exec -t -w "/benchmark" "${CONTAINER_ID}" bash /benchmark/scripts/torchbench_install.sh
+      - name: Run user benchmark
+        run: |
+          # remove old results
+          if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
+          pushd benchmark
+          if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
+          # Run userbenchmark
+          MANUAL_WORKFLOW="${{ github.event.inputs.userbenchmark_name }}"
+          if [ -z "${MANUAL_WORKFLOW}" ]; then
+            # Figure out what userbenchmarks we should run, and run it
+            docker exec -t -w "/benchmark" "${CONTAINER_ID}" bash -c ". ${SETUP_SCRIPT} && conda activate ${CONDA_ENV} && \
+                              python .github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME}"
+            if [ -d ./.userbenchmark ]; then
+              cp -r ./.userbenchmark ../benchmark-output
+            else
+              mkdir ../benchmark-output
+            fi
+          else
+            docker exec -t -w "/benchmark" "${CONTAINER_ID}" bash -c ". ${SETUP_SCRIPT} && conda activate ${CONDA_ENV} && \
+                              python run_benchmark.py \
+                              \"${{ github.event.inputs.userbenchmark_name }}\" ${{ github.event.inputs.userbenchmark_options }}"
+            cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" ../benchmark-output
+          fi
+      - name: Upload result jsons to Scribe
+        run: |
+          pushd benchmark
+          . scripts/activate_conda.sh
+          RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r))
+          echo "Uploading result jsons: ${RESULTS}"
+          for r in ${RESULTS[@]}; do
+            python ./scripts/userbenchmark/upload_scribe.py --userbenchmark_json "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
+            python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
+          done
+      - name: Upload artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: TorchBench result
+          path: benchmark-output/
+      - name: Teardown Linux
+        uses: pytorch/test-infra/.github/actions/teardown-linux@main
+        if: always()
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true