Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

performance optimization: split all the members of dpvs connection into two groups for its initialization. #598

Open
wants to merge 2 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 94 additions & 75 deletions include/ipvs/conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,27 +51,26 @@ enum {
#define DPVS_CONN_F_NOFASTXMIT IP_VS_CONN_F_NOFASTXMIT

struct dp_vs_conn_param {
int af;
uint16_t proto;
const union inet_addr *caddr;
const union inet_addr *vaddr;
uint16_t cport;
uint16_t vport;
uint16_t ct_dport; /* RS port for template connection */
bool outwall;
uint8_t af;
bool outwall;
uint16_t proto;
uint16_t cport;
uint16_t vport;
const union inet_addr *caddr;
const union inet_addr *vaddr;
uint16_t ct_dport; /* RS port for template connection */
ywc689 marked this conversation as resolved.
Show resolved Hide resolved
};

struct conn_tuple_hash {
struct list_head list;
int direct; /* inbound/outbound */

/* tuple info */
int af;
uint16_t proto;
union inet_addr saddr; /* pkt's source addr */
union inet_addr daddr; /* pkt's dest addr */
uint8_t af;
uint8_t proto;
uint8_t direct; /* inbound/outbound */
uint8_t pad;
uint16_t sport;
uint16_t dport;
union inet_addr saddr; /* pkt's source addr */
union inet_addr daddr; /* pkt's dest addr */
} __rte_cache_aligned;

struct dp_vs_conn_stats {
Expand All @@ -83,89 +82,109 @@ struct dp_vs_conn_stats {

struct dp_vs_proto;

/*
* All the members of dp_vs_conn are classified into two groups, A and B.
* And a new member must be added to either of them.
*/
struct dp_vs_conn {
int af;
RTE_MARKER cacheline0;
/*
* Group A: the below members are initialized in dp_vs_conn_new().
*/
struct conn_tuple_hash tuplehash[DPVS_CONN_DIR_MAX];

RTE_MARKER cacheline2 __rte_cache_min_aligned;
union inet_addr caddr; /* Client address */
union inet_addr vaddr; /* Virtual address */
union inet_addr laddr; /* director Local address */
union inet_addr daddr; /* Destination (RS) address */

RTE_MARKER cacheline3 __rte_cache_min_aligned;
uint8_t af;
uint8_t proto;
union inet_addr caddr; /* Client address */
union inet_addr vaddr; /* Virtual address */
union inet_addr laddr; /* director Local address */
union inet_addr daddr; /* Destination (RS) address */
lcoreid_t lcore;
bool outwall; /* flag for gfwip */
rte_atomic32_t refcnt;

uint16_t cport;
uint16_t vport;
uint16_t lport;
uint16_t dport;

struct rte_mempool *connpool;
struct conn_tuple_hash tuplehash[DPVS_CONN_DIR_MAX];
rte_atomic32_t refcnt;
struct dpvs_timer timer;
struct rte_mempool *connpool;
struct dp_vs_dest *dest; /* real server */
struct timeval timeout;
lcoreid_t lcore;
struct dp_vs_dest *dest; /* real server */
void *prot_data; /* protocol specific data */

/* for FNAT */
struct dp_vs_laddr *local; /* local address */
struct dp_vs_seq fnat_seq;

/* save last SEQ/ACK from RS for RST when conn expire*/
uint32_t rs_end_seq;
uint32_t rs_end_ack;

int (*packet_xmit)(struct dp_vs_proto *prot,
struct dp_vs_conn *conn,
struct rte_mbuf *mbuf);
struct dp_vs_conn *conn, struct rte_mbuf *mbuf);
int (*packet_out_xmit)(struct dp_vs_proto *prot,
struct dp_vs_conn *conn,
struct rte_mbuf *mbuf);
struct dp_vs_conn *conn, struct rte_mbuf *mbuf);

RTE_MARKER cacheline4 __rte_cache_min_aligned;
struct dp_vs_laddr *local; /* local address in fnat mode */
volatile uint32_t flags;

/*
* Group B: the below members are initialized in dp_vs_conn_pre_init().
*/
/* state transition */
uint16_t pad1;
volatile uint8_t state;
volatile uint8_t old_state; /* used for state transition
triggered synchronization */
/* route for neigbour */
struct netif_port *in_dev; /* inside to rs */
struct netif_port *out_dev; /* outside to client */

union inet_addr in_nexthop; /* to rs*/
union inet_addr out_nexthop; /* to client*/

RTE_MARKER cacheline5 __rte_cache_min_aligned;
/* L2 fast xmit */
struct rte_ether_addr in_smac;
struct rte_ether_addr in_dmac;
struct rte_ether_addr out_smac;
struct rte_ether_addr out_dmac;

/* route for neigbour */
struct netif_port *in_dev; /* inside to rs*/
struct netif_port *out_dev; /* outside to client*/
union inet_addr in_nexthop; /* to rs*/
union inet_addr out_nexthop; /* to client*/

#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG
/* statistics */
struct dp_vs_conn_stats stats;
#endif
/* save last SEQ/ACK from RS for RST when conn expire */
uint32_t rs_end_seq;
uint32_t rs_end_ack;

/* synproxy related members */
struct dp_vs_seq syn_proxy_seq; /* seq used in synproxy */
struct list_head ack_mbuf; /* ack mbuf saved in step2 */
uint32_t ack_num; /* ack mbuf number stored */
struct rte_mbuf *syn_mbuf; /* saved rs syn packet for retransmition */
rte_atomic32_t syn_retry_max; /* syn retransmition max packets */
/* for FNAT */
struct dp_vs_seq fnat_seq;

/* add for stopping ack storm */
uint32_t last_seq; /* seq of the last ack packet */
uint32_t last_ack_seq; /* ack seq of the last ack packet */
rte_atomic32_t dup_ack_cnt; /* count of repeated ack packets */

/* flags and state transition */
volatile uint16_t flags;
volatile uint16_t state;
volatile uint16_t old_state; /* old state, to be used for state transition
triggered synchronization */
/* controll members */
struct dp_vs_conn *control; /* master who controlls me */
rte_atomic32_t n_control; /* number of connections controlled by me*/
#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG
uint64_t ctime; /* create time */
#endif
uint32_t last_seq; /* seq of the last ack packet */
uint32_t last_ack_seq; /* ack seq of the last ack packet */
rte_atomic32_t dup_ack_cnt; /* count of repeated ack packets */
uint32_t pad2;

/* connection redirect in fnat/snat/nat modes */
struct dp_vs_redirect *redirect;
RTE_MARKER cacheline6 __rte_cache_min_aligned;
/* synproxy related members */
struct list_head ack_mbuf; /* ack mbuf saved in step2 */
struct dp_vs_seq syn_proxy_seq; /* seq used in synproxy */
struct rte_mbuf *syn_mbuf; /* saved rs syn packet for retransmition */
uint32_t ack_num; /* ack mbuf number stored */
rte_atomic32_t syn_retry_max; /* syn retransmition max packets */

/* control members */
RTE_MARKER cacheline7 __rte_cache_min_aligned;
struct dpvs_timer timer;
struct dp_vs_conn *control; /* master who controlls me */
void *prot_data; /* protocol specific data */

/* flag for gfwip */
bool outwall;
rte_atomic32_t n_control; /* number of connections controlled
by me */
uint32_t pad3;

RTE_MARKER cacheline8 __rte_cache_min_aligned;
#ifdef CONFIG_DPVS_IPVS_STATS_DEBUG
uint64_t ctime; /* create time */
struct dp_vs_conn_stats stats; /* statistics */
#endif
/*
* the below member is initialized in dp_vs_conn_alloc().
*/
struct dp_vs_redirect *redirect; /* used in fnat/snat/nat modes */
} __rte_cache_aligned;

/* for syn-proxy to save all ack packet in conn before rs's syn-ack arrives */
Expand Down
21 changes: 17 additions & 4 deletions src/ipvs/ip_vs_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,13 @@ static struct dp_vs_conn *dp_vs_conn_alloc(enum dpvs_fwd_mode fwdmode,
return NULL;
}

memset(conn, 0, sizeof(struct dp_vs_conn));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it safe to use uninitialized dp_vs_conn? Did you examine every field of dp_vs_conn and check whether it may be used uninitialized?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It has been tested and no issue is found.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think uninitialized dp_vs_conn is good idea. Someday add some field in dp_vs_conn, it may lead problem.

For uninitialized dp_vs_conn maybe we can do something in dp_vs_conn_free before invoke rte_mempool_put?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fnat_seq needs to be initialized as 0, otherwise it brings the fault to tcp connections. Additionally, double assignment for the same field of dpvs connection is not good to CPS performance.

conn->connpool = this_conn_cache;
this_conn_count++;

/* no need to create redirect for the global template connection */
if (likely((flags & DPVS_CONN_F_TEMPLATE) == 0))
r = dp_vs_redirect_alloc(fwdmode);

conn->redirect = r;
conn->redirect = r;

return conn;
}
Expand Down Expand Up @@ -801,6 +799,18 @@ static void conn_flush(void)
#endif
}

static inline void dp_vs_conn_pre_init(struct dp_vs_conn *new)
{
size_t len;

len = offsetof(struct dp_vs_conn, redirect)
- offsetof(struct dp_vs_conn, pad1);

andrewhit marked this conversation as resolved.
Show resolved Hide resolved
memset(&new->pad1, 0, len);

INIT_LIST_HEAD(&new->ack_mbuf);
}

struct dp_vs_conn *dp_vs_conn_new(struct rte_mbuf *mbuf,
const struct dp_vs_iphdr *iph,
struct dp_vs_conn_param *param,
Expand All @@ -818,7 +828,10 @@ struct dp_vs_conn *dp_vs_conn_new(struct rte_mbuf *mbuf,
if (unlikely(!new))
return NULL;

new->flags = flags;
dp_vs_conn_pre_init(new);

new->connpool = this_conn_cache;
new->flags = flags;

/* set proper RS port */
if (dp_vs_conn_is_template(new) || param->ct_dport != 0)
Expand Down