From b153cd9e3ccf71c3fabcaa40614664e50363230d Mon Sep 17 00:00:00 2001 From: Jessie Yang Date: Tue, 24 Sep 2024 16:27:47 -0700 Subject: [PATCH] prov/efa: Make NACK protocol fall back to DC longCTS when DC is requested When application requests FI_DELIVERY_COMPLETE, it should fallback to the DC version of LONG CTS RTMs, as the default LongCTS is not DC. Signed-off-by: Jessie Yang --- fabtests/pytest/efa/test_rdm.py | 4 ++-- prov/efa/src/rdm/efa_rdm_ope.c | 6 ++++-- prov/efa/src/rdm/efa_rdm_pke_cmd.c | 22 ++++++++++++++++++---- prov/efa/src/rdm/efa_rdm_pke_nonreq.c | 18 +++++++++++++++--- 4 files changed, 39 insertions(+), 11 deletions(-) diff --git a/fabtests/pytest/efa/test_rdm.py b/fabtests/pytest/efa/test_rdm.py index ec1f3044c34..112893c8ce6 100644 --- a/fabtests/pytest/efa/test_rdm.py +++ b/fabtests/pytest/efa/test_rdm.py @@ -16,9 +16,9 @@ def test_rdm_pingpong(cmdline_args, iteration_type, completion_semantic, memory_ @pytest.mark.functional @pytest.mark.serial -def test_mr_exhaustion_rdm_pingpong(cmdline_args): +def test_mr_exhaustion_rdm_pingpong(cmdline_args, completion_semantic): efa_run_client_server_test(cmdline_args, "fi_efa_exhaust_mr_reg_rdm_pingpong", "short", - "transmit_complete", "host_to_host", "all", timeout=1000) + completion_semantic, "host_to_host", "all", timeout=1000) @pytest.mark.functional def test_rdm_pingpong_range(cmdline_args, completion_semantic, memory_type_bi_dir, message_size): diff --git a/prov/efa/src/rdm/efa_rdm_ope.c b/prov/efa/src/rdm/efa_rdm_ope.c index 80332ef70b9..d3c2d94b6ad 100644 --- a/prov/efa/src/rdm/efa_rdm_ope.c +++ b/prov/efa/src/rdm/efa_rdm_ope.c @@ -1773,6 +1773,8 @@ ssize_t efa_rdm_ope_post_send(struct efa_rdm_ope *ope, int pkt_type) ssize_t efa_rdm_ope_post_send_fallback(struct efa_rdm_ope *ope, int pkt_type, ssize_t err) { + bool delivery_complete_requested = ope->fi_flags & FI_DELIVERY_COMPLETE; + if (err == -FI_ENOMR) { /* Long read and runting read protocols could fail because of a * lack of memory registrations. In that case, we retry with @@ -1786,7 +1788,7 @@ ssize_t efa_rdm_ope_post_send_fallback(struct efa_rdm_ope *ope, "protocol because memory registration limit " "was reached on the sender\n"); return efa_rdm_ope_post_send_or_queue( - ope, EFA_RDM_LONGCTS_MSGRTM_PKT); + ope, delivery_complete_requested ? EFA_RDM_DC_LONGCTS_MSGRTM_PKT : EFA_RDM_LONGCTS_MSGRTM_PKT); case EFA_RDM_LONGREAD_TAGRTM_PKT: case EFA_RDM_RUNTREAD_TAGRTM_PKT: EFA_INFO(FI_LOG_EP_CTRL, @@ -1794,7 +1796,7 @@ ssize_t efa_rdm_ope_post_send_fallback(struct efa_rdm_ope *ope, "because memory registration limit was " "reached on the sender\n"); return efa_rdm_ope_post_send_or_queue( - ope, EFA_RDM_LONGCTS_TAGRTM_PKT); + ope, delivery_complete_requested ? EFA_RDM_DC_LONGCTS_TAGRTM_PKT : EFA_RDM_LONGCTS_TAGRTM_PKT); default: return err; } diff --git a/prov/efa/src/rdm/efa_rdm_pke_cmd.c b/prov/efa/src/rdm/efa_rdm_pke_cmd.c index 97741ebbd27..f095cc1f772 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_cmd.c +++ b/prov/efa/src/rdm/efa_rdm_pke_cmd.c @@ -112,14 +112,18 @@ int efa_rdm_pke_fill_data(struct efa_rdm_pke *pkt_entry, /* The data_offset will be non-zero when the long CTS RTM packet * is sent to continue a runting read transfer after the * receiver has run out of memory registrations */ - assert((data_offset == 0 || ope->internal_flags & EFA_RDM_OPE_READ_NACK) && data_size == -1); + assert(data_offset == 0 || + ope->internal_flags & EFA_RDM_OPE_READ_NACK); + assert(data_size == -1); ret = efa_rdm_pke_init_longcts_msgrtm(pkt_entry, ope); break; case EFA_RDM_LONGCTS_TAGRTM_PKT: /* The data_offset will be non-zero when the long CTS RTM packet * is sent to continue a runting read transfer after the * receiver has run out of memory registrations */ - assert((data_offset == 0 || ope->internal_flags & EFA_RDM_OPE_READ_NACK) && data_size == -1); + assert(data_offset == 0 || + ope->internal_flags & EFA_RDM_OPE_READ_NACK); + assert(data_size == -1); ret = efa_rdm_pke_init_longcts_tagrtm(pkt_entry, ope); break; case EFA_RDM_LONGREAD_MSGRTM_PKT: @@ -187,11 +191,21 @@ int efa_rdm_pke_fill_data(struct efa_rdm_pke *pkt_entry, ret = efa_rdm_pke_init_dc_medium_tagrtm(pkt_entry, ope, data_offset, data_size); break; case EFA_RDM_DC_LONGCTS_MSGRTM_PKT: - assert(data_offset == 0 && data_size == -1); + /* The data_offset will be non-zero when the DC long CTS RTM packet + * is sent to continue a runting read transfer after the + * receiver has run out of memory registrations */ + assert(data_offset == 0 || + ope->internal_flags & EFA_RDM_OPE_READ_NACK); + assert(data_size == -1); ret = efa_rdm_pke_init_dc_longcts_msgrtm(pkt_entry, ope); break; case EFA_RDM_DC_LONGCTS_TAGRTM_PKT: - assert(data_offset == 0 && data_size == -1); + /* The data_offset will be non-zero when the DC long CTS tagged RTM packet + * is sent to continue a runting read transfer after the + * receiver has run out of memory registrations */ + assert(data_offset == 0 || + ope->internal_flags & EFA_RDM_OPE_READ_NACK); + assert(data_size == -1); ret = efa_rdm_pke_init_dc_longcts_tagrtm(pkt_entry, ope); break; case EFA_RDM_DC_EAGER_RTW_PKT: diff --git a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c index 3c384743c77..b1b7be31460 100644 --- a/prov/efa/src/rdm/efa_rdm_pke_nonreq.c +++ b/prov/efa/src/rdm/efa_rdm_pke_nonreq.c @@ -690,6 +690,7 @@ void efa_rdm_pke_handle_read_nack_recv(struct efa_rdm_pke *pkt_entry) { struct efa_rdm_read_nack_hdr *nack_hdr; struct efa_rdm_ope *txe; + bool delivery_complete_requested; efa_rdm_ep_domain(pkt_entry->ep)->num_read_msg_in_flight -= 1; @@ -700,23 +701,34 @@ void efa_rdm_pke_handle_read_nack_recv(struct efa_rdm_pke *pkt_entry) efa_rdm_pke_release_rx(pkt_entry); txe->internal_flags |= EFA_RDM_OPE_READ_NACK; + delivery_complete_requested = txe->fi_flags & FI_DELIVERY_COMPLETE; + if (txe->op == ofi_op_write) { EFA_INFO(FI_LOG_EP_CTRL, "Sender fallback to emulated long CTS write " "protocol because p2p is not available\n"); - efa_rdm_ope_post_send_or_queue(txe, EFA_RDM_LONGCTS_RTW_PKT); + efa_rdm_ope_post_send_or_queue( + txe, delivery_complete_requested ? + EFA_RDM_DC_LONGCTS_RTW_PKT : + EFA_RDM_LONGCTS_RTW_PKT); } else if (txe->op == ofi_op_tagged) { EFA_INFO(FI_LOG_EP_CTRL, "Sender fallback to long CTS tagged " "protocol because memory registration limit " "was reached on the receiver\n"); - efa_rdm_ope_post_send_or_queue(txe, EFA_RDM_LONGCTS_TAGRTM_PKT); + efa_rdm_ope_post_send_or_queue( + txe, delivery_complete_requested ? + EFA_RDM_DC_LONGCTS_TAGRTM_PKT : + EFA_RDM_LONGCTS_TAGRTM_PKT); } else { EFA_INFO(FI_LOG_EP_CTRL, "Sender fallback to long CTS untagged " "protocol because memory registration limit " "was reached on the receiver\n"); - efa_rdm_ope_post_send_or_queue(txe, EFA_RDM_LONGCTS_MSGRTM_PKT); + efa_rdm_ope_post_send_or_queue( + txe, delivery_complete_requested ? + EFA_RDM_DC_LONGCTS_MSGRTM_PKT : + EFA_RDM_LONGCTS_MSGRTM_PKT); } }