diff --git a/scripts/torchbench_install.sh b/.ci/torchbench/install.sh
similarity index 59%
rename from scripts/torchbench_install.sh
rename to .ci/torchbench/install.sh
index 5c1d5217a9..18d0b580d5 100644
--- a/scripts/torchbench_install.sh
+++ b/.ci/torchbench/install.sh
@@ -1,5 +1,10 @@
 . ${HOME}/miniconda3/etc/profile.d/conda.sh
 
+if [ -z "${BASE_CONDA_ENV}" ]; then
+  echo "ERROR: BASE_CONDA_ENV is not set"
+  exit 1
+fi
+
 if [ -z "${CONDA_ENV}" ]; then
   echo "ERROR: CONDA_ENV is not set"
   exit 1
@@ -10,10 +15,11 @@ if [ -z "${SETUP_SCRIPT}" ]; then
   exit 1
 fi
 
-. "${SETUP_SCRIPT}"
+CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
+conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
 conda activate "${CONDA_ENV}"
 
-parent_dir=$(dirname "$(readlink -f "$0")")/..
+parent_dir=$(dirname "$(readlink -f "$0")")/../..
 cd ${parent_dir}
 
 python -c "import torch; print(torch.__version__); print(torch.version.git_version)"
diff --git a/.ci/torchbench/test.sh b/.ci/torchbench/test.sh
new file mode 100644
index 0000000000..9cba55cda7
--- /dev/null
+++ b/.ci/torchbench/test.sh
@@ -0,0 +1,31 @@
+. ${HOME}/miniconda3/etc/profile.d/conda.sh
+
+if [ -z "${CONDA_ENV}" ]; then
+  echo "ERROR: CONDA_ENV is not set"
+  exit 1
+fi
+
+if [ -z "${TEST_CONFIG}" ]; then
+  echo "ERROR: TEST_CONFIG is not set"
+  exit 1
+fi
+
+if [ -z "${SETUP_SCRIPT}" ]; then
+  echo "ERROR: SETUP_SCRIPT is not set"
+  exit 1
+fi
+
+. "${SETUP_SCRIPT}"
+conda activate "${CONDA_ENV}"
+
+parent_dir=$(dirname "$(readlink -f "$0")")/../..
+cd ${parent_dir}
+
+# Test subprocess worker
+if [[ "$TEST_CONFIG" == 'cpu' ]]; then
+  python -m torchbenchmark._components.test.test_subprocess
+  python -m torchbenchmark._components.test.test_worker
+fi
+
+# Test models
+python test.py -v -k "$TEST_CONFIG"
diff --git a/.github/workflows/pr-a10g.yml b/.github/workflows/_linux-test-cpu.yml
similarity index 67%
rename from .github/workflows/pr-a10g.yml
rename to .github/workflows/_linux-test-cpu.yml
index fb390a7638..ec4bca72ae 100644
--- a/.github/workflows/pr-a10g.yml
+++ b/.github/workflows/_linux-test-cpu.yml
@@ -1,25 +1,19 @@
-name: TorchBench PR Test on A10G
-on:
-  pull_request:
-  workflow_dispatch:
-  push:
-    branches:
-      - main
-
-env:
-  CONDA_ENV: "torchbench"
-  DOCKER_IMAGE: "ghcr.io/pytorch/torchbench:latest"
-  SETUP_SCRIPT: "/workspace/setup_instance.sh"
-  HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-
+name: linux-test
 
 jobs:
-  pr-test:
-    # AWS A10G GPU instance label: linux.g5.4xlarge.nvidia.gpu
-    # OS version: Amazon Linux 2
-    runs-on: [self-hosted, linux.g5.4xlarge.nvidia.gpu]
-    timeout-minutes: 1440 # 24 hours
+  pr-test-cpu:
+    # Don't run on forked repos
+    if: github.repository_owner == 'pytorch'
+    runs-on: [linux.24xlarge]
+    timeout-minutes: ${{ inputs.timeout-minutes }}
     environment: docker-s3-upload
+    env:
+      BASE_CONDA_ENV: "torchbench"
+      CONDA_ENV: "pr-test-cpu"
+      DOCKER_IMAGE: "ghcr.io/pytorch/torchbench:latest"
+      SETUP_SCRIPT: "/workspace/setup_instance.sh"
+      TEST_CONFIG: "cpu"
+      HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
     steps:
       - name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
         uses: pytorch/test-infra/.github/actions/setup-ssh@main
@@ -33,28 +27,26 @@ jobs:
         uses: pytorch/test-infra/.github/actions/pull-docker-image@main
         with:
           docker-image: ${{ env.DOCKER_IMAGE }}
-      - name: Install NVIDIA Driver, docker runtime, set GPU_FLAG
-        id: install-nvidia-driver
-        uses: pytorch/test-infra/.github/actions/setup-nvidia@main
       - name: Install and Test TorchBench
         run: |
           container_name=$(docker run \
+            -e BASE_CONDA_ENV="${BASE_CONDA_ENV}" \
             -e CONDA_ENV="${CONDA_ENV}" \
             -e SETUP_SCRIPT="${SETUP_SCRIPT}" \
             -e HUGGING_FACE_HUB_TOKEN="${HUGGING_FACE_HUB_TOKEN}" \
+            -e TEST_CONFIG="${TEST_CONFIG}" \
             --tty \
             --detach \
             --shm-size=32gb \
             -v "${PWD}/benchmark:/benchmark" \
-            --gpus all \
             -w / \
             "${{ env.DOCKER_IMAGE }}" \
             tail -f /dev/null
           )
           echo "Container name: ${container_name}"
           docker exec -t -w "/" "${container_name}" bash -c "sudo chown -R runner /benchmark; sudo chgrp -R runner /benchmark"
-          docker exec -t -w "/benchmark" "${container_name}" bash /benchmark/scripts/torchbench_install.sh
-          docker exec -t -w "/benchmark" "${container_name}" bash /benchmark/scripts/torchbench_test.sh
+          docker exec -t -w "/benchmark" "${container_name}" bash /benchmark/.ci/torchbench/install.sh
+          docker exec -t -w "/benchmark" "${container_name}" bash /benchmark/.ci/torchbench/test.sh
       - name: Teardown Linux
         uses: pytorch/test-infra/.github/actions/teardown-linux@main
         if: always()
diff --git a/.github/workflows/pr-gha-runner.yml b/.github/workflows/_linux-test-cuda.yml
similarity index 76%
rename from .github/workflows/pr-gha-runner.yml
rename to .github/workflows/_linux-test-cuda.yml
index 0c1a3d3a6e..7dd9403ed5 100644
--- a/.github/workflows/pr-gha-runner.yml
+++ b/.github/workflows/_linux-test-cuda.yml
@@ -1,21 +1,18 @@
-name: TorchBench PR Test
-on:
-  pull_request:
-  workflow_dispatch:
-  push:
-    branches:
-      - main
+name: linux-test
 
 jobs:
-  pr-test:
+  pr-test-cuda:
+    # Don't run on forked repos
+    if: github.repository_owner == 'pytorch'
+    runs-on: [a100-runner]
+    timeout-minutes: ${{ inputs.timeout-minutes }}
+    environment: docker-s3-upload
     env:
       BASE_CONDA_ENV: "torchbench"
-      CONDA_ENV: "pr-ci-a100"
+      CONDA_ENV: "pr-test-cuda"
       SETUP_SCRIPT: "/workspace/setup_instance.sh"
+      TEST_CONFIG: "cuda"
       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-    runs-on: [a100-runner]
-    timeout-minutes: 1440 # 24 hours
-    environment: docker-s3-upload
     steps:
       - name: Checkout TorchBench
         uses: actions/checkout@v3
@@ -31,10 +28,10 @@ jobs:
           conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
       - name: Install TorchBench
         run: |
-          bash ./scripts/torchbench_install.sh
-      - name: Validate benchmark components
+          bash ./.ci/torchbench/install.sh
+      - name: Test TorchBench
         run: |
-          bash ./scripts/torchbench_test.sh
+          bash ./.ci/torchbench/test.sh
       - name: Clean up Conda env
         if: always()
         run: |
diff --git a/.github/workflows/pr-gpu-stability-ci.yml b/.github/workflows/pr-gpu-stability-ci.yml
deleted file mode 100644
index c0ec193e29..0000000000
--- a/.github/workflows/pr-gpu-stability-ci.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: TorchBench GPU model stability test
-on:
-  workflow_dispatch:
-    inputs:
-      model:
-        description: "Model Name"
-        required: true
-        default: "fastNLP_Bert"
-  pull_request:
-
-jobs:
-  stability_test:
-    env:
-      CONDA_ENV: "stability-test-ci"
-      TEST_HOME: "/tmp/tb-stability-ci"
-      PYTHON_VERSION: "3.8"
-      CUDA_VERSION: "cu116"
-      PR_BODY: ${{ github.event.pull_request.body }}
-      MODEL: ${{ github.event.inputs.model }}
-      GPU_ID: "1"
-      GPU_FREQ: "5001,900"
-      REPEAT: "10"
-    if: ${{ (github.event.inputs.model || contains(github.event.pull_request.body, 'STABLE_TEST_MODEL:')) }}
-    runs-on: [self-hosted, bm-runner]
-    timeout-minutes: 120 # 2 hours
-    environment: docker-s3-upload
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-      - name: Create conda environment with pytorch nightly
-        run: |
-          conda create -y -n "${CONDA_ENV}" python="${PYTHON_VERSION}"
-          . activate "${CONDA_ENV}"
-          conda install -y numpy requests=2.22 ninja pyyaml mkl mkl-include setuptools \
-                           cmake cffi typing_extensions future six dataclasses tabulate gitpython
-          # Install pytorch nightly
-          pip install --pre torch torchvision torchaudio \
-          -f https://download.pytorch.org/whl/nightly/${CUDA_VERSION}/torch_nightly.html
-          # Install torchbench dependencies
-          python install.py
-      - name: Stability test
-        run: |
-          . activate "${CONDA_ENV}"
-          mkdir -p "${TEST_HOME}"
-          if [ -z "${MODEL}" ] ; then
-             # Load PR to file
-             PR_BODY_FILE="${TEST_HOME}"/pr-body.txt
-             echo "${PR_BODY}" > "${PR_BODY_FILE}"
-             MODEL=`python ./.github/scripts/test-repeated-runs.py --pr-body "${PR_BODY_FILE}"`
-          fi
-          # Setup nvidia gpu frequency
-          sudo nvidia-persistenced --user "${USER}" || true
-          sudo nvidia-smi -pm  "${GPU_ID}"
-          sudo nvidia-smi -ac "${GPU_FREQ}"
-          # Run the tests
-          EVAL_LOG="${TEST_HOME}/eval-${MODEL}.log"
-          echo -n > "${EVAL_LOG}"
-          for i in `seq 1 ${REPEAT}`; do
-            python run.py "${MODEL}" -t eval -d cuda | tee -a "${EVAL_LOG}"
-          done
-          TRAIN_LOG="${TEST_HOME}/train-${MODEL}.log"
-          echo -n > "${TRAIN_LOG}"
-          for i in `seq 1 ${REPEAT}`; do
-            python run.py "${MODEL}" -t train -d cuda | tee -a "${TRAIN_LOG}"
-          done
-          # Check the stability of GPU tests
-          python ./.github/scripts/test-repeated-runs.py --log "${EVAL_LOG}" && \
-                 echo "GPU stability test pass for inference!"
-          python ./.github/scripts/test-repeated-runs.py --log "${TRAIN_LOG}" && \
-                 echo "GPU stability test pass for train!"
-      - name: Remove conda environment
-        run: |
-          conda env remove --name "${CONDA_ENV}"
-
diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
new file mode 100644
index 0000000000..cf53acd187
--- /dev/null
+++ b/.github/workflows/pr-test.yml
@@ -0,0 +1,22 @@
+name: TorchBench PR Test
+on:
+  pull_request:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+jobs:
+  cpu-test:
+    timeout-minutes: 120 # 2 hours
+    uses: ./.github/workflow/_linux-test-cpu.yml
+    with:
+      timeout-minutes: 120 # 2 hours
+  cuda-test:
+    uses: ./.github/workflow/_linux-test-cuda.yml
+    with:
+      timeout-minutes: 120 # 2 hours
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
diff --git a/scripts/torchbench_test.sh b/scripts/torchbench_test.sh
deleted file mode 100644
index 7411b23d96..0000000000
--- a/scripts/torchbench_test.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-. ${HOME}/miniconda3/etc/profile.d/conda.sh
-
-if [ -z "${CONDA_ENV}" ]; then
-  echo "ERROR: CONDA_ENV is not set"
-  exit 1
-fi
-
-if [ -z "${SETUP_SCRIPT}" ]; then
-  echo "ERROR: SETUP_SCRIPT is not set"
-  exit 1
-fi
-
-. "${SETUP_SCRIPT}"
-conda activate "${CONDA_ENV}"
-
-parent_dir=$(dirname "$(readlink -f "$0")")/..
-cd ${parent_dir}
-
-# Test subprocess worker
-python -m torchbenchmark._components.test.test_subprocess
-python -m torchbenchmark._components.test.test_worker
-
-# Test models
-python test.py -v