diff --git a/.github/workflows/bencher.yml b/.github/workflows/bencher.yml
new file mode 100644
index 000000000000..da60b5f3ee06
--- /dev/null
+++ b/.github/workflows/bencher.yml
@@ -0,0 +1,40 @@
+name: "Continous Benchmarking"
+
+on:
+  push:
+    branches: main
+
+jobs:
+  benchmark_base_branch:
+    name: Continuous Benchmarking with Bencher
+    runs-on: [self-hosted, 1ES.Pool=gha-virtual-ccf-sub]
+    container:
+      image: ccfmsrc.azurecr.io/ccf/ci:2024-04-25-virtual-clang15
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - run: |
+          git config --global --add safe.directory /__w/CCF/CCF
+          mkdir build
+          cd build
+          cmake -GNinja -DCOMPILE_TARGET=virtual ..
+          ninja
+          # Limited list of benchmarks for now, but should be extended to
+          # everything under a single label eventually
+          ./tests.sh -VV -R pi_basic_virtual
+          ./tests.sh -VV -R historical_query
+          ./tests.sh -VV -R commit_latency
+
+      - uses: bencherdev/bencher@main
+      - name: Track base branch benchmarks with Bencher
+        run: |
+          bencher run \
+          --project ccf \
+          --token '${{ secrets.BENCHER_API_TOKEN }}' \
+          --branch main \
+          --testbed gha-virtual-ccf-sub \
+          --adapter json \
+          --err \
+          --file build/bencher.json
diff --git a/cmake/common.cmake b/cmake/common.cmake
index 71500fa6c2c0..4d24f9dc0936 100644
--- a/cmake/common.cmake
+++ b/cmake/common.cmake
@@ -211,18 +211,14 @@ function(add_perf_test)
     set(ENCLAVE_TYPE "virtual")
   endif()
 
-  set(TESTS_SUFFIX "${TESTS_SUFFIX}_cft")
-
   set(TEST_NAME "${PARSED_ARGS_NAME}${TESTS_SUFFIX}")
 
-  set(LABEL_ARG "${TEST_NAME}^")
-
   add_test(
     NAME "${PARSED_ARGS_NAME}${TESTS_SUFFIX}"
     COMMAND
       ${PYTHON} ${PARSED_ARGS_PYTHON_SCRIPT} -b . -c ${PARSED_ARGS_CLIENT_BIN}
       ${CCF_NETWORK_TEST_ARGS} ${PARSED_ARGS_CONSTITUTION} --write-tx-times
-      ${VERIFICATION_ARG} --label ${LABEL_ARG} --snapshot-tx-interval 10000
+      ${VERIFICATION_ARG} --label ${TEST_NAME} --snapshot-tx-interval 10000
       ${PARSED_ARGS_ADDITIONAL_ARGS} -e ${ENCLAVE_TYPE} -t ${ENCLAVE_PLATFORM}
       ${NODES}
   )
diff --git a/tests/commit_latency.py b/tests/commit_latency.py
index 403f1cb1064a..96622d0f0570 100644
--- a/tests/commit_latency.py
+++ b/tests/commit_latency.py
@@ -9,6 +9,7 @@
 import suite.test_requirements as reqs
 from infra.log_capture import flush_info
 from infra.tx_status import TxStatus
+import infra.bencher
 
 from loguru import logger as LOG
 
@@ -123,9 +124,13 @@ def run(args):
         print_fn(f"Mean commit latency / sig_interval = {factor:.2f}")
         factors.append(factor)
 
-    # https://github.com/microsoft/CCF/issues/6126
-    # with cimetrics.upload.metrics(complete=False) as metrics:
-    #     metrics.put("Commit latency factor", statistics.mean(factors))
+    bf = infra.bencher.Bencher()
+    bf.set(
+        "commit_latency_ratio",
+        infra.bencher.Latency(
+            statistics.mean(factors), high_value=max(factors), low_value=min(factors)
+        ),
+    )
 
 
 if __name__ == "__main__":
diff --git a/tests/historical_query_perf.py b/tests/historical_query_perf.py
index bb118acb5d29..99c5e387f535 100644
--- a/tests/historical_query_perf.py
+++ b/tests/historical_query_perf.py
@@ -10,6 +10,7 @@
 from infra.snp import IS_SNP
 import infra.jwt_issuer
 import time
+import infra.bencher
 
 from loguru import logger as LOG
 
@@ -187,14 +188,15 @@ def test_historical_query_range(network, args):
     average_fetch_rate = (id_a_fetch_rate + id_b_fetch_rate + id_c_fetch_rate) / 3
     LOG.info(f"Average fetch rate: {average_fetch_rate}")
 
-    # with cimetrics.upload.metrics(complete=False) as metrics:
-    #     upload_name = "hist_sgx_cft^"
-    #     LOG.debug(f"Uploading metric: {upload_name} = {average_fetch_rate}")
-    #     metrics.put(upload_name, average_fetch_rate)
-
     # NB: The similar test in e2e_logging checks correctness, so we make no duplicate
     # assertions here
 
+    bf = infra.bencher.Bencher()
+    bf.set(
+        "historical_queries",
+        infra.bencher.Throughput(average_fetch_rate),
+    )
+
     return network
 
 
diff --git a/tests/infra/basicperf.py b/tests/infra/basicperf.py
index fbdc3141cb9b..9ee104ef5adf 100644
--- a/tests/infra/basicperf.py
+++ b/tests/infra/basicperf.py
@@ -595,6 +595,10 @@ def table():
                 #     for key, value in additional_metrics.items():
                 #         metrics.put(key, value)
 
+                metrics = {args.label: {"throughput": {"value": round(throughput, 1)}}}
+                with open("bencher.json", "w") as fd:
+                    json.dump(metrics, fd)
+
             except Exception as e:
                 LOG.error(f"Stopping clients due to exception: {e}")
                 for remote_client in clients:
diff --git a/tests/infra/bencher.py b/tests/infra/bencher.py
new file mode 100644
index 000000000000..4f55827f4b50
--- /dev/null
+++ b/tests/infra/bencher.py
@@ -0,0 +1,58 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the Apache 2.0 License.
+
+import os
+import json
+import dataclasses
+from typing import Optional, Union
+
+BENCHER_FILE = "bencher.json"
+
+# See https://bencher.dev/docs/reference/bencher-metric-format/
+
+
+@dataclasses.dataclass
+class Value:
+    value: float
+    high_value: Optional[float] = None
+    low_value: Optional[float] = None
+
+
+@dataclasses.dataclass
+class Latency:
+    latency: Value
+
+    def __init__(
+        self,
+        value: float,
+        high_value: Optional[float] = None,
+        low_value: Optional[float] = None,
+    ):
+        self.latency = Value(value, high_value, low_value)
+
+
+@dataclasses.dataclass
+class Throughput:
+    throughput: Value
+
+    def __init__(
+        self,
+        value: float,
+        high_value: Optional[float] = None,
+        low_value: Optional[float] = None,
+    ):
+        self.throughput = Value(value, high_value, low_value)
+
+
+class Bencher:
+    def __init__(self):
+        if not os.path.isfile(BENCHER_FILE):
+            with open(BENCHER_FILE, "w+") as bf:
+                json.dump({}, bf)
+
+    def set(self, key: str, value: Union[Latency, Throughput]):
+        with open(BENCHER_FILE, "r") as bf:
+            data = json.load(bf)
+        data[key] = dataclasses.asdict(value)
+        with open(BENCHER_FILE, "w") as bf:
+            json.dump(data, bf, indent=4)