Update get-workflow-job-id to also return job name (pytorch#112103)

Then we can use this job name in `filter-test-configs` if it's available. This addresses the issue in which `filter-test-configs` on GitHub runners (MacOS x86) couldn't find the runner log to get the job name. This is expected because GitHub runners are isolated, so a job should not be able to access runner logs, which could contains information from other jobs. This allows all missing features depending on running `filter-test-configs` on GitHub runners: * Rerun disabled tests and memory leak check. For example, this would help avoid closing pytorch#110980 (comment) early with the disabled test running properly on MacOS x86 * MacOS x86 jobs can now be disabled or marked as unstable I keep the current logic to parse the log as a fallback because it's working fine on self-hosted runners. That also handles the case where `get-workflow-job-id` fails. Also I move the rest of `get-workflow-job-id` up before the test step like pytorch#111483 ### Testing Spot checks some jobs to confirm they have the correct names: * MacOS M1 test job https://github.com/pytorch/pytorch/actions/runs/6648305319/job/18065275722?pr=112103#step:10:8 * MacOS x86 build job https://github.com/pytorch/pytorch/actions/runs/6648306305/job/18065138137?pr=112103#step:9:14 * Linux test job has https://github.com/pytorch/pytorch/actions/runs/6648300991/job/18065354503?pr=112103#step:13:7 * Windows test job https://github.com/pytorch/pytorch/actions/runs/6648305319/job/18065599500?pr=112103#step:12:7 * MacOS x86 test job https://github.com/pytorch/pytorch/actions/runs/6648306305/job/18066312801#step:10:8 Pull Request resolved: pytorch#112103 Approved by: https://github.com/clee2000
mengluy0125 · Oct 26, 2023 · f6f81a5 · f6f81a5
1 parent 485cc0f
commit f6f81a5
Show file tree

Hide file tree

Showing 11 changed files with 55 additions and 28 deletions.
diff --git a/.github/actions/filter-test-configs/action.yml b/.github/actions/filter-test-configs/action.yml
@@ -13,6 +13,10 @@ inputs:
     required: true
     type: string
     description: JSON description of what test configs to run.
+  job-name:
+    type: string
+    required: false
+    default: ""
 
 outputs:
   test-matrix:
@@ -56,6 +60,7 @@ runs:
 
     - name: Get the job name
       id: get-job-name
+      if: inputs.job-name == ''
       continue-on-error: true
       shell: bash
       run: |
@@ -91,7 +96,7 @@ runs:
       shell: bash
       env:
         GITHUB_TOKEN: ${{ inputs.github-token }}
-        JOB_NAME: ${{ steps.get-job-name.outputs.job-name }}
+        JOB_NAME: ${{ inputs.job-name == '' && steps.get-job-name.outputs.job-name || inputs.job-name }}
         PR_NUMBER: ${{ github.event.pull_request.number }}
         TAG: ${{ steps.parse-ref.outputs.tag }}
         EVENT_NAME: ${{ github.event_name }}

diff --git a/.github/actions/get-workflow-job-id/action.yml b/.github/actions/get-workflow-job-id/action.yml
@@ -11,18 +11,20 @@ outputs:
   job-id:
     description: The retrieved workflow job id
     value: ${{ steps.get-job-id.outputs.job-id }}
+  job-name:
+    description: The retrieved workflow job name
+    value: ${{ steps.get-job-id.outputs.job-name }}
 
 runs:
   using: composite
   steps:
-    - name: Get jobid or fail
+    - name: Get job id and name or fail
       # timeout-minutes is unsupported for composite workflows, see https://github.com/actions/runner/issues/1979
       # timeout-minutes: 10
       shell: bash
       id: get-job-id
       run: |
         set -eux
-        GHA_WORKFLOW_JOB_ID=$(python3 .github/scripts/get_workflow_job_id.py "${GITHUB_RUN_ID}" "${RUNNER_NAME}")
-        echo "job-id=${GHA_WORKFLOW_JOB_ID}" >> "${GITHUB_OUTPUT}"
+        python3 .github/scripts/get_workflow_job_id.py "${GITHUB_RUN_ID}" "${RUNNER_NAME}"
       env:
         GITHUB_TOKEN: ${{ inputs.github-token }}
diff --git a/.github/scripts/get_workflow_job_id.py b/.github/scripts/get_workflow_job_id.py
@@ -111,7 +111,7 @@ def fetch_jobs(url: str, headers: Dict[str, str]) -> List[Dict[str, str]]:
 # running.
 
 
-def find_job_id(args: Any) -> str:
+def find_job_id_name(args: Any) -> Tuple[str, str]:
     # From https://docs.github.com/en/actions/learn-github-actions/environment-variables
     PYTORCH_REPO = os.environ.get("GITHUB_REPOSITORY", "pytorch/pytorch")
     PYTORCH_GITHUB_API = f"https://api.github.com/repos/{PYTORCH_REPO}"
@@ -130,15 +130,27 @@ def find_job_id(args: Any) -> str:
 
     for job in jobs:
         if job["runner_name"] == args.runner_name:
-            return job["id"]
+            return (job["id"], job["name"])
 
     raise RuntimeError(f"Can't find job id for runner {args.runner_name}")
 
 
+def set_output(name: str, val: Any) -> None:
+    if os.getenv("GITHUB_OUTPUT"):
+        with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
+            print(f"{name}={val}", file=env)
+    else:
+        print(f"::set-output name={name}::{val}")
+
+
 def main() -> None:
     args = parse_args()
     try:
-        print(find_job_id(args))
+        # Get both the job ID and job name because we have already spent a request
+        # here to get the job info
+        job_id, job_name = find_job_id_name(args)
+        set_output("job-id", job_id)
+        set_output("job-name", job_name)
     except Exception as e:
         print(repr(e), file=sys.stderr)
         print(f"workflow-{args.workflow_run_id}")

diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml
@@ -116,6 +116,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           test-matrix: ${{ inputs.test-matrix }}
+          job-name: ${{ steps.get-job-id.outputs.job-name }}
 
       - name: Build
         if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''

diff --git a/.github/workflows/_linux-test.yml b/.github/workflows/_linux-test.yml
@@ -128,6 +128,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           test-matrix: ${{ inputs.test-matrix }}
+          job-name: ${{ steps.get-job-id.outputs.job-name }}
 
       - name: Download pytest cache
         uses: ./.github/actions/pytest-cache-download

diff --git a/.github/workflows/_mac-build.yml b/.github/workflows/_mac-build.yml
@@ -160,6 +160,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           test-matrix: ${{ inputs.test-matrix }}
+          job-name: ${{ steps.get-job-id.outputs.job-name }}
 
       - name: Build
         if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''

diff --git a/.github/workflows/_mac-test-mps.yml b/.github/workflows/_mac-test-mps.yml
@@ -154,6 +154,7 @@ jobs:
         with:
           use-gha: true
           file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
+
       - name: Clean up disk space
         if: always()
         continue-on-error: true

diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml
@@ -99,6 +99,13 @@ jobs:
         id: parse-ref
         run: .github/scripts/parse_ref.py
 
+      - name: Get workflow job id
+        id: get-job-id
+        uses: ./.github/actions/get-workflow-job-id
+        if: always()
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Check for keep-going label and re-enabled test issues
         # This uses the filter-test-configs action because it conviniently
         # checks for labels and re-enabled test issues.  It does not actually do
@@ -108,6 +115,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           test-matrix: ${{ inputs.test-matrix }}
+          job-name: ${{ steps.get-job-id.outputs.job-name }}
 
       - name: Pre-process arm64 wheels
         if: inputs.build-environment == 'macos-12-py3-arm64'
@@ -180,13 +188,6 @@ jobs:
         run: |
           cat test/**/*_toprint.log || true
 
-      - name: Get workflow job id
-        id: get-job-id
-        uses: ./.github/actions/get-workflow-job-id
-        if: always()
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-
       - name: Stop monitoring script
         if: always() && ${{ steps.monitor-script.outputs.monitor-script-pid }}
         continue-on-error: true

diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml
@@ -84,6 +84,13 @@ jobs:
         id: parse-ref
         run: .github/scripts/parse_ref.py
 
+      - name: Get workflow job id
+        id: get-job-id
+        uses: ./.github/actions/get-workflow-job-id
+        if: always()
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Check for keep-going label and re-enabled test issues
         # This uses the filter-test-configs action because it conviniently
         # checks for labels and re-enabled test issues.  It does not actually do
@@ -93,6 +100,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           test-matrix: ${{ inputs.test-matrix }}
+          job-name: ${{ steps.get-job-id.outputs.job-name }}
 
       - name: Set Test step time
         id: test-timeout
@@ -201,13 +209,6 @@ jobs:
         run: |
           cat test/**/*_toprint.log || true
 
-      - name: Get workflow job id
-        id: get-job-id
-        uses: ./.github/actions/get-workflow-job-id
-        if: always()
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-
       - name: Stop monitoring script
         if: always() && steps.monitor-script.outputs.monitor-script-pid
         shell: bash

diff --git a/.github/workflows/_win-build.yml b/.github/workflows/_win-build.yml
@@ -105,6 +105,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           test-matrix: ${{ inputs.test-matrix }}
+          job-name: ${{ steps.get-job-id.outputs.job-name }}
 
       - name: Build
         if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == ''

diff --git a/.github/workflows/_win-test.yml b/.github/workflows/_win-test.yml
@@ -114,6 +114,13 @@ jobs:
         run: |
           tree /F C:\$Env:GITHUB_RUN_ID\build-results
 
+      - name: Get workflow job id
+        id: get-job-id
+        uses: ./.github/actions/get-workflow-job-id
+        if: always()
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Check for keep-going label and re-enabled test issues
         # This uses the filter-test-configs action because it conviniently
         # checks for labels and re-enabled test issues.  It does not actually do
@@ -123,6 +130,7 @@ jobs:
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           test-matrix: ${{ inputs.test-matrix }}
+          job-name: ${{ steps.get-job-id.outputs.job-name }}
 
       - name: Download pytest cache
         uses: ./.github/actions/pytest-cache-download
@@ -188,13 +196,6 @@ jobs:
         run: |
           cat test/**/*_toprint.log || true
 
-      - name: Get workflow job id
-        id: get-job-id
-        uses: ./.github/actions/get-workflow-job-id
-        if: always()
-        with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
-
       - name: Stop monitoring script
         if: always() && steps.monitor-script.outputs.monitor-script-pid
         shell: bash