Skip to content

Commit

Permalink
mi: add control primitive command
Browse files Browse the repository at this point in the history
Add a new function `nvme_mi_control` to perform a Control Primitive
command. This function is used to control the service state of the
NVMe device.

The Control Primitive command is primarily used to resolve the issue
where the NVMe Servicing State cannot return to the Idle state.

Why the Control Primitive command is needed:

For example, when NVMe transmits messages to the Management Controller
(like BMC, hereinafter referred to as BMC), the state changes to
`Transmit`. Some NVMe remains in this state until the message
transmission completes, after which it reverts to `Idle`.

In cases where multiple messages are needed to assemble a complete MI
message, such as the admin Identify message with an MTU of 64, receiving
4096 bytes results in approximately 60-70 packets. If the BMC stops
receiving halfway through, NVMe remains in the `Transmit` state without
transitioning to `Idle`, preventing further communication. now we can
use the Control Primitive command to aborts the transmission and return
to the `Idle` state.

Regarding the scenario where BMC stops receiving halfway:
1. BMC Reboot.
2. BMC uses commands like Ctrl-C during the read process to actively
abort.
3. ... (other unexpected scenarios)

TL;DR: A incomplete command may cause the NVMe device to be stuck in
`Transmit`, need a way to abort the command and return to `Idle`.

Details See `Out-of-Band Message Servicing Model`,
`Figure 34: Command Servicing State Diagram``

For some test using the example below:
1. ~# while true; do mi-mctp 1 20 identify 0; done
2. ~# use `Ctrl+C` break the command. (ps: my nvme under i2c mux,
the mux will be switch to another channel by other command, Causing an
error is a high-probability event)
3. Check the nvme state (command in next commit), always in Transmit
~#  mi-mctp 1 20  control-primitive get-state
```
NVMe control primitive
 Get State : cspr is 0x840b
  Slot Command Servicing State: Transmit
...
  Pause Flag: Yes
```
4. send the identify command again, no response
~# mi-mctp 1 20 identify 0
~# mi-mctp: can't perform Admin Identify command: Connection timed out
5. use the new function to abort the command
~# mi-mctp 1 20  control-primitive abort
6. the nvme state return to Idle, and the identify command can be
executed again.

Signed-off-by: Jian Zhang <zhangjian.3032@bytedance.com>
  • Loading branch information
zhangjian3032 authored and igaw committed Aug 7, 2024
1 parent 9967817 commit d7fa27c
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/libnvme-mi.map
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
LIBNVME_MI_1_11 {
global:
nvme_mi_control;
};

LIBNVME_MI_1_10 {
global:
nvme_mi_admin_get_ana_log_atomic;
Expand Down
68 changes: 68 additions & 0 deletions src/nvme/mi.c
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,31 @@ static void nvme_mi_admin_init_resp(struct nvme_mi_resp *resp,
resp->hdr_len = sizeof(*hdr);
}

static void nvme_mi_control_init_req(struct nvme_mi_req *req,
struct nvme_mi_control_req *control_req,
__u8 opcode, __u16 cpsp)
{
memset(req, 0, sizeof(*req));
memset(control_req, 0, sizeof(*control_req));

control_req->hdr.type = NVME_MI_MSGTYPE_NVME;
control_req->hdr.nmp = (NVME_MI_ROR_REQ << 7) |
(NVME_MI_MT_CONTROL << 3); /* we always use command slot 0 */
control_req->opcode = opcode;
control_req->cpsp = cpu_to_le16(cpsp);

req->hdr = &control_req->hdr;
req->hdr_len = sizeof(*control_req);
}

static void nvme_mi_control_init_resp(struct nvme_mi_resp *resp,
struct nvme_mi_control_resp *control_resp)
{
memset(resp, 0, sizeof(*resp));
resp->hdr = &control_resp->hdr;
resp->hdr_len = sizeof(*control_resp);
}

static int nvme_mi_admin_parse_status(struct nvme_mi_resp *resp, __u32 *result)
{
struct nvme_mi_admin_resp_hdr *admin_hdr;
Expand Down Expand Up @@ -580,6 +605,26 @@ static int nvme_mi_admin_parse_status(struct nvme_mi_resp *resp, __u32 *result)
return nvme_status;
}

static int nvme_mi_control_parse_status(struct nvme_mi_resp *resp, __u16 *cpsr)
{
struct nvme_mi_control_resp *control_resp;

if (resp->hdr_len < sizeof(*control_resp)) {
errno = -EPROTO;
return -1;
}
control_resp = (struct nvme_mi_control_resp *)resp->hdr;

if (control_resp->status)
return control_resp->status |
(NVME_STATUS_TYPE_MI << NVME_STATUS_TYPE_SHIFT);

if (cpsr)
*cpsr = le16_to_cpu(control_resp->cpsr);

return control_resp->status;
}

int nvme_mi_admin_xfer(nvme_mi_ctrl_t ctrl,
struct nvme_mi_admin_req_hdr *admin_req,
size_t req_data_size,
Expand Down Expand Up @@ -812,6 +857,29 @@ int nvme_mi_admin_identify_partial(nvme_mi_ctrl_t ctrl,
return 0;
}

int nvme_mi_control(nvme_mi_ep_t ep, __u8 opcode,
__u16 cpsp, __u16 *result_cpsr)
{
struct nvme_mi_control_resp control_resp;
struct nvme_mi_control_req control_req;
struct nvme_mi_resp resp;
struct nvme_mi_req req;
int rc = 0;

nvme_mi_control_init_req(&req, &control_req, opcode, cpsp);
nvme_mi_control_init_resp(&resp, &control_resp);

rc = nvme_mi_submit(ep, &req, &resp);
if (rc)
return rc;

rc = nvme_mi_control_parse_status(&resp, result_cpsr);
if (rc)
return rc;

return 0;
}

/* retrieves a MCTP-messsage-sized chunk of log page data. offset and len are
* specified within the args->data area. The `offset` parameter is a relative
* offset to the args->lpo !
Expand Down
64 changes: 64 additions & 0 deletions src/nvme/mi.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,52 @@ struct nvme_mi_admin_resp_hdr {
__le32 cdw0, cdw1, cdw3;
} __attribute__((packed));

/**
* enum nvme_mi_control_opcode - Operation code for Control Primitives.
* @nvme_mi_control_opcode_pause: Suspend response transmission/timeout
* @nvme_mi_control_opcode_resume: Resume from a paused condition
* @nvme_mi_control_opcode_abort: Re-initialize a Command Slot to the Idle state
* @nvme_mi_control_opcode_get_state: Get the state of a Command Slot
* @nvme_mi_control_opcode_replay: Retransmit the Response Message
*/
enum nvme_mi_control_opcode {
nvme_mi_control_opcode_pause = 0x00,
nvme_mi_control_opcode_resume = 0x01,
nvme_mi_control_opcode_abort = 0x02,
nvme_mi_control_opcode_get_state = 0x03,
nvme_mi_control_opcode_replay = 0x04,
};

/**
* struct nvme_mi_control_req - The Control Primitive request.
* @hdr: Generic MI message header
* @opcode: Control Primitive Opcodes (using &enum nvme_mi_control_opcode)
* @tag: flag - Opaque value passed from request to response
* @cpsp: Control Primitive Specific Parameter
*
*/
struct nvme_mi_control_req {
struct nvme_mi_msg_hdr hdr;
__u8 opcode;
__u8 tag;
__le16 cpsp;
} __attribute((packed));

/** struct nvme_mi_control_resp - The Control Primitive response.
* @hdr: Generic MI message header
* @status: Generic response code, non-zero on failure
* @tag: flag - Opaque value passed from request to response
* @cpsr: Control Primitive Specific Response
*
*/

struct nvme_mi_control_resp {
struct nvme_mi_msg_hdr hdr;
__u8 status;
__u8 tag;
__le16 cpsr;
} __attribute((packed));

/**
* nvme_mi_status_to_string() - return a string representation of the MI
* status.
Expand Down Expand Up @@ -1075,6 +1121,24 @@ static inline int nvme_mi_admin_identify(nvme_mi_ctrl_t ctrl,
0, NVME_IDENTIFY_DATA_SIZE);
}

/**
* nvme_mi_control() - Perform a Control Primitive command
* @ep: endpoint for MI communication
* @opcode: Control Primitive opcode (using &enum nvme_mi_control_opcode)
* @cpsp: Control Primitive Specific Parameter
* @result_cpsr: Optional field to return the result from the CPSR field
*
* Perform a Control Primitive command, using the opcode specified in @opcode
* Stores the result from the CPSR field in @result_cpsr if set.
*
* Return: 0 on success, non-zero on failure
*
* See: &enum nvme_mi_control_opcode
*
*/
int nvme_mi_control(nvme_mi_ep_t ep, __u8 opcode,
__u16 cpsp, __u16 *result_cpsr);

/**
* nvme_mi_admin_identify_cns_nsid() - Perform an Admin identify command using
* specific CNS/NSID parameters.
Expand Down

0 comments on commit d7fa27c

Please sign in to comment.