diff --git a/.github/workflows/bencher.yml b/.github/workflows/bencher.yml new file mode 100644 index 000000000000..da60b5f3ee06 --- /dev/null +++ b/.github/workflows/bencher.yml @@ -0,0 +1,40 @@ +name: "Continous Benchmarking" + +on: + push: + branches: main + +jobs: + benchmark_base_branch: + name: Continuous Benchmarking with Bencher + runs-on: [self-hosted, 1ES.Pool=gha-virtual-ccf-sub] + container: + image: ccfmsrc.azurecr.io/ccf/ci:2024-04-25-virtual-clang15 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - run: | + git config --global --add safe.directory /__w/CCF/CCF + mkdir build + cd build + cmake -GNinja -DCOMPILE_TARGET=virtual .. + ninja + # Limited list of benchmarks for now, but should be extended to + # everything under a single label eventually + ./tests.sh -VV -R pi_basic_virtual + ./tests.sh -VV -R historical_query + ./tests.sh -VV -R commit_latency + + - uses: bencherdev/bencher@main + - name: Track base branch benchmarks with Bencher + run: | + bencher run \ + --project ccf \ + --token '${{ secrets.BENCHER_API_TOKEN }}' \ + --branch main \ + --testbed gha-virtual-ccf-sub \ + --adapter json \ + --err \ + --file build/bencher.json diff --git a/cmake/common.cmake b/cmake/common.cmake index 71500fa6c2c0..4d24f9dc0936 100644 --- a/cmake/common.cmake +++ b/cmake/common.cmake @@ -211,18 +211,14 @@ function(add_perf_test) set(ENCLAVE_TYPE "virtual") endif() - set(TESTS_SUFFIX "${TESTS_SUFFIX}_cft") - set(TEST_NAME "${PARSED_ARGS_NAME}${TESTS_SUFFIX}") - set(LABEL_ARG "${TEST_NAME}^") - add_test( NAME "${PARSED_ARGS_NAME}${TESTS_SUFFIX}" COMMAND ${PYTHON} ${PARSED_ARGS_PYTHON_SCRIPT} -b . -c ${PARSED_ARGS_CLIENT_BIN} ${CCF_NETWORK_TEST_ARGS} ${PARSED_ARGS_CONSTITUTION} --write-tx-times - ${VERIFICATION_ARG} --label ${LABEL_ARG} --snapshot-tx-interval 10000 + ${VERIFICATION_ARG} --label ${TEST_NAME} --snapshot-tx-interval 10000 ${PARSED_ARGS_ADDITIONAL_ARGS} -e ${ENCLAVE_TYPE} -t ${ENCLAVE_PLATFORM} ${NODES} ) diff --git a/tests/commit_latency.py b/tests/commit_latency.py index 403f1cb1064a..96622d0f0570 100644 --- a/tests/commit_latency.py +++ b/tests/commit_latency.py @@ -9,6 +9,7 @@ import suite.test_requirements as reqs from infra.log_capture import flush_info from infra.tx_status import TxStatus +import infra.bencher from loguru import logger as LOG @@ -123,9 +124,13 @@ def run(args): print_fn(f"Mean commit latency / sig_interval = {factor:.2f}") factors.append(factor) - # https://github.com/microsoft/CCF/issues/6126 - # with cimetrics.upload.metrics(complete=False) as metrics: - # metrics.put("Commit latency factor", statistics.mean(factors)) + bf = infra.bencher.Bencher() + bf.set( + "commit_latency_ratio", + infra.bencher.Latency( + statistics.mean(factors), high_value=max(factors), low_value=min(factors) + ), + ) if __name__ == "__main__": diff --git a/tests/historical_query_perf.py b/tests/historical_query_perf.py index bb118acb5d29..99c5e387f535 100644 --- a/tests/historical_query_perf.py +++ b/tests/historical_query_perf.py @@ -10,6 +10,7 @@ from infra.snp import IS_SNP import infra.jwt_issuer import time +import infra.bencher from loguru import logger as LOG @@ -187,14 +188,15 @@ def test_historical_query_range(network, args): average_fetch_rate = (id_a_fetch_rate + id_b_fetch_rate + id_c_fetch_rate) / 3 LOG.info(f"Average fetch rate: {average_fetch_rate}") - # with cimetrics.upload.metrics(complete=False) as metrics: - # upload_name = "hist_sgx_cft^" - # LOG.debug(f"Uploading metric: {upload_name} = {average_fetch_rate}") - # metrics.put(upload_name, average_fetch_rate) - # NB: The similar test in e2e_logging checks correctness, so we make no duplicate # assertions here + bf = infra.bencher.Bencher() + bf.set( + "historical_queries", + infra.bencher.Throughput(average_fetch_rate), + ) + return network diff --git a/tests/infra/basicperf.py b/tests/infra/basicperf.py index fbdc3141cb9b..9ee104ef5adf 100644 --- a/tests/infra/basicperf.py +++ b/tests/infra/basicperf.py @@ -595,6 +595,10 @@ def table(): # for key, value in additional_metrics.items(): # metrics.put(key, value) + metrics = {args.label: {"throughput": {"value": round(throughput, 1)}}} + with open("bencher.json", "w") as fd: + json.dump(metrics, fd) + except Exception as e: LOG.error(f"Stopping clients due to exception: {e}") for remote_client in clients: diff --git a/tests/infra/bencher.py b/tests/infra/bencher.py new file mode 100644 index 000000000000..4f55827f4b50 --- /dev/null +++ b/tests/infra/bencher.py @@ -0,0 +1,58 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the Apache 2.0 License. + +import os +import json +import dataclasses +from typing import Optional, Union + +BENCHER_FILE = "bencher.json" + +# See https://bencher.dev/docs/reference/bencher-metric-format/ + + +@dataclasses.dataclass +class Value: + value: float + high_value: Optional[float] = None + low_value: Optional[float] = None + + +@dataclasses.dataclass +class Latency: + latency: Value + + def __init__( + self, + value: float, + high_value: Optional[float] = None, + low_value: Optional[float] = None, + ): + self.latency = Value(value, high_value, low_value) + + +@dataclasses.dataclass +class Throughput: + throughput: Value + + def __init__( + self, + value: float, + high_value: Optional[float] = None, + low_value: Optional[float] = None, + ): + self.throughput = Value(value, high_value, low_value) + + +class Bencher: + def __init__(self): + if not os.path.isfile(BENCHER_FILE): + with open(BENCHER_FILE, "w+") as bf: + json.dump({}, bf) + + def set(self, key: str, value: Union[Latency, Throughput]): + with open(BENCHER_FILE, "r") as bf: + data = json.load(bf) + data[key] = dataclasses.asdict(value) + with open(BENCHER_FILE, "w") as bf: + json.dump(data, bf, indent=4)