From bf77a1d50a09683bb8d9e4408f92820001e99059 Mon Sep 17 00:00:00 2001 From: Anway De Date: Tue, 7 Jan 2025 22:05:52 +0000 Subject: [PATCH] agave: add parameter for setting unified scheduler threads --- src/app/fdctl/config.h | 1 + src/app/fdctl/config/default.toml | 18 ++++++++++++++++++ src/app/fdctl/config_parse.c | 1 + src/app/fdctl/run/run_agave.c | 13 +++++++++++++ 4 files changed, 33 insertions(+) diff --git a/src/app/fdctl/config.h b/src/app/fdctl/config.h index a73fccb5ea..53587691d9 100644 --- a/src/app/fdctl/config.h +++ b/src/app/fdctl/config.h @@ -132,6 +132,7 @@ typedef struct { char affinity[ AFFINITY_SZ ]; char agave_affinity[ AFFINITY_SZ ]; + uint agave_unified_scheduler_handler_threads; uint net_tile_count; uint quic_tile_count; uint resolv_tile_count; diff --git a/src/app/fdctl/config/default.toml b/src/app/fdctl/config/default.toml index 7e06c443ae..ef659dab18 100644 --- a/src/app/fdctl/config/default.toml +++ b/src/app/fdctl/config/default.toml @@ -639,6 +639,24 @@ dynamic_port_range = "8900-9000" # determined automatically as well. agave_affinity = "auto" + # The number of threads to spawn per-fork for the unified scheduler. + # The replay stage, which is a part of the Agave subprocess, uses + # these threads for transaction execution. The threads stay within + # the cores dedicated to the Agave subprocess. + # + # If set to 0, the default depends on the number of cores available + # to the agave subprocess. + # + # agave_cores >= 8 => agave_cores - 4 + # 4 <= agave_cores < 8 => 4 + # agave_cores < 4 => agave_cores + # + # Increasing the value for this parameter might help during the + # start-up phase when the validator is trying to catchup to the + # cluster. It may also help the node stay caught up if it keeps + # falling behind. + agave_unified_scheduler_handler_threads = 0 + # How many net tiles to run. Should be set to 1. This is # configurable and designed to scale out for future network # conditions but there is no need to run more than 1 net tile given diff --git a/src/app/fdctl/config_parse.c b/src/app/fdctl/config_parse.c index b283ad1ab8..5079d2c458 100644 --- a/src/app/fdctl/config_parse.c +++ b/src/app/fdctl/config_parse.c @@ -270,6 +270,7 @@ fdctl_pod_to_cfg( config_t * config, CFG_POP ( cstr, layout.affinity ); CFG_POP ( cstr, layout.agave_affinity ); + CFG_POP ( uint, layout.agave_unified_scheduler_handler_threads ); CFG_POP ( uint, layout.net_tile_count ); CFG_POP ( uint, layout.quic_tile_count ); CFG_POP ( uint, layout.resolv_tile_count ); diff --git a/src/app/fdctl/run/run_agave.c b/src/app/fdctl/run/run_agave.c index 51137a5c61..a2aafe4c1f 100644 --- a/src/app/fdctl/run/run_agave.c +++ b/src/app/fdctl/run/run_agave.c @@ -144,6 +144,19 @@ agave_boot( config_t * config ) { ADDU( "--maximum-incremental-snapshots-to-retain", config->snapshots.maximum_incremental_snapshots_to_retain ); ADDU( "--minimal-snapshot-download-speed", config->snapshots.minimum_snapshot_download_speed ); + if( config->layout.agave_unified_scheduler_handler_threads ) { + if( FD_UNLIKELY( config->layout.agave_unified_scheduler_handler_threads>config->topo.agave_affinity_cnt ) ) { + FD_LOG_ERR(( "Trying to spawn %u handler threads but the agave subprocess has %lu cores. " + "Either increase the number of cores in [layout.agave_affinity] or reduce " + "the number of threads in [layout.agave_unified_scheduler_handler_threads].", + config->layout.agave_unified_scheduler_handler_threads, config->topo.agave_affinity_cnt )); + } + ADDU( "--unified-scheduler-handler-threads", config->layout.agave_unified_scheduler_handler_threads ); + } else { + ulong num_threads = fd_ulong_max( config->topo.agave_affinity_cnt-4UL, fd_ulong_min( config->topo.agave_affinity_cnt, 4UL ) ); + ADDU( "--unified-scheduler-handler-threads", (uint)num_threads ); + } + argv[ idx ] = NULL; if( FD_LIKELY( strcmp( config->reporting.solana_metrics_config, "" ) ) ) {