diff --git a/fabtests/benchmarks/benchmark_shared.c b/fabtests/benchmarks/benchmark_shared.c index a591e6c4680..c3ad6f9aacc 100644 --- a/fabtests/benchmarks/benchmark_shared.c +++ b/fabtests/benchmarks/benchmark_shared.c @@ -70,6 +70,12 @@ void ft_parse_benchmark_opts(int op, char *optarg) case 'W': opts.window_size = atoi(optarg); break; + case 'T': + /* Reuse FT_OPT_SKIP_MSG_ALLOC as a flag to post transmits + * before receives. + */ + opts.options |= FT_OPT_SKIP_MSG_ALLOC; + break; default: break; } @@ -84,6 +90,11 @@ void ft_benchmark_usage(void) "* The following condition is required to have at least " "one window\nsize # of messsages to be sent: " "# of iterations > window size"); + FT_PRINT_OPTS_USAGE("-T", "Post transmit operations (e.g. fi_send, fi_tsend) before receives"); + FT_PRINT_OPTS_USAGE("", "Using this option requires out-of-band address exchange and synchronization (i.e. -b must be used)"); + FT_PRINT_OPTS_USAGE("", "Only the following tests support this option for now:"); + FT_PRINT_OPTS_USAGE("", "\tfi_rdm_tagged_pingpong"); + FT_PRINT_OPTS_USAGE("", "\tfi_rdm_pingpong"); } /* Pingpong latency test with pre-posted receive buffers. */ @@ -134,6 +145,72 @@ static int pingpong_pre_posted_rx(size_t inject_size) return FI_SUCCESS; } +/* Pingpong latency test with transmit operations emitted before posting recv + * buffer. For inject transfers, this allows HW backed providers to generate + * packets while processing the posted RX command. + */ +static int pingpong_tx_first(size_t inject_size) +{ + int ret; + int i; + + if (opts.dst_addr) { + for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { + if (i == opts.warmup_iterations) + ft_start(); + + if (opts.transfer_size <= inject_size) + ret = ft_inject(ep, remote_fi_addr, + opts.transfer_size); + else + ret = ft_tx(ep, remote_fi_addr, + opts.transfer_size, &tx_ctx); + if (ret) + return ret; + + ret = ft_post_rx(ep, opts.transfer_size, &rx_ctx); + if (ret) + return ret; + + ret = ft_get_rx_comp(rx_seq); + if (ret) + return ret; + } + } else { + for (i = 0; i < opts.iterations + opts.warmup_iterations; i++) { + if (i == opts.warmup_iterations) + ft_start(); + + ret = ft_post_rx(ep, opts.transfer_size, &rx_ctx); + if (ret) + return ret; + + ret = ft_get_rx_comp(rx_seq); + if (ret) + return ret; + + if (ft_check_opts(FT_OPT_VERIFY_DATA | FT_OPT_ACTIVE)) { + ret = ft_check_buf((char *) rx_buf + ft_rx_prefix_size(), + opts.transfer_size); + if (ret) + return ret; + } + + if (opts.transfer_size <= inject_size) + ret = ft_inject(ep, remote_fi_addr, + opts.transfer_size); + else + ret = ft_tx(ep, remote_fi_addr, + opts.transfer_size, &tx_ctx); + if (ret) + return ret; + } + } + ft_stop(); + + return FI_SUCCESS; +} + int pingpong(void) { int ret; @@ -156,7 +233,11 @@ int pingpong(void) if (ret) return ret; - ret = pingpong_pre_posted_rx(inject_size); + if (ft_check_opts(FT_OPT_SKIP_MSG_ALLOC)) + ret = pingpong_tx_first(inject_size); + else + ret = pingpong_pre_posted_rx(inject_size); + if (ret) return ret; @@ -173,10 +254,22 @@ int run_pingpong(void) { int i, ret = 0; + if (ft_check_opts(FT_OPT_SKIP_MSG_ALLOC) && + !ft_check_opts(FT_OPT_OOB_SYNC)) { + FT_ERR("out-of-band address exchange and synchronization required"); + return EXIT_FAILURE; + } + ret = ft_init_fabric(); if (ret) return ret; + if (ft_check_opts(FT_OPT_SKIP_MSG_ALLOC)) { + ret = ft_alloc_msgs(); + if (ret) + return ret; + } + if (!(opts.options & FT_OPT_SIZE)) { for (i = 0; i < TEST_CNT; i++) { if (!ft_use_size(i, opts.sizes_enabled)) diff --git a/fabtests/benchmarks/benchmark_shared.h b/fabtests/benchmarks/benchmark_shared.h index 57f0facb087..98a7b05f828 100644 --- a/fabtests/benchmarks/benchmark_shared.h +++ b/fabtests/benchmarks/benchmark_shared.h @@ -40,7 +40,7 @@ extern "C" { #include -#define BENCHMARK_OPTS "vkj:W:" +#define BENCHMARK_OPTS "Tvkj:W:" #define FT_BENCHMARK_MAX_MSG_SIZE (test_size[TEST_CNT - 1].size) void ft_parse_benchmark_opts(int op, char *optarg);