Increase default mbuf size and code simplification/cleanup 09/56709/3
authorXavier Simonart <xavier.simonart@intel.com>
Wed, 25 Apr 2018 23:03:05 +0000 (01:03 +0200)
committerDeepak S <deepak.s@linux.intel.com>
Fri, 25 May 2018 09:05:41 +0000 (14:35 +0530)
mbuf size was setup to achieve the best performance i.e.
using the smallest mbuf and not segmenting packets.
However this resulted in complex code, much dependent of the way
the pmd are working e.g. a change(fix) in recent dpdk i40e
implementation caused a 1782 (=1518+8+256) bytes mbuf to be too
small to hold a 1518 bytes packets.
Hence this change simplifies the mbuf size selection at the price
of a potential decreases in performance - as more memory is now used.
Except if jumbo frames are used, the mbuf size will now be the same
for all modes. The packets will not be segmented except if jumbo
frames are enabled.
If jumbo frames are enabled, packets are by default segmented, except
if the mbuf size is configured big enough in the config file.

Change-Id: I222fcac7a65c0d221d5d422f419deb9c0f864172
Signed-off-by: Xavier Simonart <xavier.simonart@intel.com>
Signed-off-by: Deepak S <deepak.s@linux.intel.com>
18 files changed:
VNFs/DPPD-PROX/defaults.c
VNFs/DPPD-PROX/defaults.h
VNFs/DPPD-PROX/handle_cgnat.c
VNFs/DPPD-PROX/handle_esp.c
VNFs/DPPD-PROX/handle_gen.c
VNFs/DPPD-PROX/handle_genl4.c
VNFs/DPPD-PROX/handle_l2fwd.c
VNFs/DPPD-PROX/handle_mirror.c
VNFs/DPPD-PROX/handle_nat.c
VNFs/DPPD-PROX/handle_nop.c
VNFs/DPPD-PROX/handle_swap.c
VNFs/DPPD-PROX/handle_tsc.c
VNFs/DPPD-PROX/main.c
VNFs/DPPD-PROX/prox_args.c
VNFs/DPPD-PROX/prox_port_cfg.c
VNFs/DPPD-PROX/prox_port_cfg.h
VNFs/DPPD-PROX/task_base.h
VNFs/DPPD-PROX/task_init.h

index 6688e8c..a6be0d7 100644 (file)
@@ -54,7 +54,8 @@ static const struct rte_eth_conf default_port_conf = {
                .jumbo_frame    = 0, /* Jumbo frame support disabled */
                .hw_strip_crc   = 1, /* CRC stripped by hardware --- always set to 1 in VF */
                .hw_vlan_extend = 0,
-               .mq_mode        = 0
+               .mq_mode        = 0,
+               .max_rx_pkt_len = PROX_MTU + ETHER_HDR_LEN + ETHER_CRC_LEN
        },
        .rx_adv_conf = {
                .rss_conf = {
@@ -164,7 +165,6 @@ void set_task_defaults(struct prox_cfg* prox_cfg, struct lcore_cfg* lcore_cfg_in
                        targ->tunnel_hop_limit = 3;
                        targ->ctrl_freq = 1000;
                        targ->lb_friend_core = 0xFF;
-                       targ->mbuf_size = MBUF_SIZE;
                        targ->n_pkts = 1024*64;
                        targ->runtime_flags |= TASK_TX_CRC;
                        targ->accuracy_limit_nsec = 5000;
index 4cf3cf7..8f850d0 100644 (file)
@@ -41,7 +41,24 @@ void set_port_defaults(void);
 #define MAX_RSS_QUEUE_BITS      9
 
 #define PROX_VLAN_TAG_SIZE     4
-#define MBUF_SIZE (ETHER_MAX_LEN + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM +  2 * PROX_VLAN_TAG_SIZE)
+
+/* MBUF_SIZE can be configured based on the following:
+   - If only one segment is used ETH_TXQ_FLAGS_NOMULTSEGS can be used resulting
+     in vector mode used for transmission hence higher performance
+   - Only one segment is used by the rx function if the mbuf size is big enough
+   - Bigger mbufs result in more memory used, hence slighly lower performance (DTLB misses)
+   - Selecting the smaller mbuf is not obvious as pmds might behave slighly differently:
+     - on ixgbe a 1526 + 256 mbuf size will cause any packets bigger than 1024 bytes to be segmented
+     - on i40e a 1526 + 256 mbuf size will cause any packets bigger than 1408 bytes to be segmented
+     - other pmds might have additional requirements
+   As the performance decrease due to the usage of bigger mbuf is not very important, we prefer
+   here to use  the same, bigger, mbuf size for all pmds, making the code easier to support.
+   An mbuf size of 2048 + 128 + 128 + 8 can hold a 2048 packet, and only one segment will be used
+   except if jumbo frames are enabled. +8 (VLAN) is needed for i40e (and maybe other pmds).
+   TX_MBUF_SIZE is used for when transmitting only: in this case the mbuf size can be smaller.
+*/
+#define MBUF_SIZE (2048 + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM + 2 * PROX_VLAN_TAG_SIZE)
+#define TX_MBUF_SIZE (ETHER_MAX_LEN + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM +  2 * PROX_VLAN_TAG_SIZE)
 
 #define PROX_MTU   ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN
 
index 6f176c0..d79a6d5 100644 (file)
@@ -973,12 +973,11 @@ static struct task_init task_init_nat = {
        .init = init_task_nat,
        .handle = handle_nat_bulk,
 #ifdef SOFT_CRC
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_ROUTING|TASK_FEATURE_ZERO_RX,
+       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_ROUTING|TASK_FEATURE_ZERO_RX,
 #else
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_ROUTING|TASK_FEATURE_ZERO_RX,
+       .flag_features = TASK_FEATURE_ROUTING|TASK_FEATURE_ZERO_RX,
 #endif
        .size = sizeof(struct task_nat),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 __attribute__((constructor)) static void reg_task_nat(void)
index b609fce..0039c9a 100644 (file)
@@ -691,21 +691,19 @@ static int handle_esp_dec_bulk(struct task_base *tbase, struct rte_mbuf **mbufs,
 }
 
 struct task_init task_init_esp_enc = {
-       .mode = ESP_ENC,
-       .mode_str = "esp_enc",
-       .init = init_task_esp_enc,
-       .handle = handle_esp_enc_bulk,
-       .size = sizeof(struct task_esp_enc),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM
+        .mode = ESP_ENC,
+        .mode_str = "esp_enc",
+        .init = init_task_esp_enc,
+        .handle = handle_esp_enc_bulk,
+        .size = sizeof(struct task_esp_enc),
 };
 
 struct task_init task_init_esp_dec = {
-       .mode = ESP_ENC,
-       .mode_str = "esp_dec",
-       .init = init_task_esp_dec,
-       .handle = handle_esp_dec_bulk,
-       .size = sizeof(struct task_esp_dec),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM
+        .mode = ESP_ENC,
+        .mode_str = "esp_dec",
+        .init = init_task_esp_dec,
+        .handle = handle_esp_dec_bulk,
+        .size = sizeof(struct task_esp_dec),
 };
 
 __attribute__((constructor)) static void reg_task_esp_enc(void)
index 0e5164b..ffdbb1b 100644 (file)
@@ -1005,15 +1005,20 @@ static struct rte_mempool *task_gen_create_mempool(struct task_args *targ, uint1
        const int sock_id = rte_lcore_to_socket_id(targ->lconf->id);
 
        name[0]++;
-       uint32_t mbuf_size = MBUF_SIZE;
+       uint32_t mbuf_size = TX_MBUF_SIZE;
        if (max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > mbuf_size)
                mbuf_size = max_frame_size + (unsigned)sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
+       plog_info("\t\tCreating mempool with name '%s'\n", name);
        ret = rte_mempool_create(name, targ->nb_mbuf - 1, mbuf_size,
                                 targ->nb_cache_mbuf, sizeof(struct rte_pktmbuf_pool_private),
                                 rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
                                 sock_id, 0);
        PROX_PANIC(ret == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
                   sock_id, targ->nb_mbuf - 1);
+
+        plog_info("\t\tMempool %p size = %u * %u cache %u, socket %d\n", ret,
+                  targ->nb_mbuf - 1, mbuf_size, targ->nb_cache_mbuf, sock_id);
+
        return ret;
 }
 
@@ -1318,7 +1323,7 @@ static struct task_init task_init_gen = {
 #ifdef SOFT_CRC
        // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
        // vector mode is used by DPDK, resulting (theoretically) in higher performance.
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
        .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif
@@ -1335,7 +1340,7 @@ static struct task_init task_init_gen_l3 = {
 #ifdef SOFT_CRC
        // For SOFT_CRC, no offload is needed. If both NOOFFLOADS and NOMULTSEGS flags are set the
        // vector mode is used by DPDK, resulting (theoretically) in higher performance.
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
        .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif
@@ -1350,7 +1355,7 @@ static struct task_init task_init_gen_pcap = {
        .start = start_pcap,
        .early_init = init_task_gen_early,
 #ifdef SOFT_CRC
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX | TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
        .flag_features = TASK_FEATURE_NEVER_DISCARDS | TASK_FEATURE_NO_RX,
 #endif
index 4c62c64..056bd83 100644 (file)
@@ -845,7 +845,7 @@ static void init_task_gen(struct task_base *tbase, struct task_args *targ)
        static char name[] = "server_mempool";
        name[0]++;
        task->mempool = rte_mempool_create(name,
-                                          4*1024 - 1, MBUF_SIZE,
+                                          4*1024 - 1, TX_MBUF_SIZE,
                                           targ->nb_cache_mbuf,
                                           sizeof(struct rte_pktmbuf_pool_private),
                                           rte_pktmbuf_pool_init, NULL,
@@ -959,7 +959,7 @@ static void init_task_gen_client(struct task_base *tbase, struct task_args *targ
        const uint32_t socket = rte_lcore_to_socket_id(targ->lconf->id);
        name[0]++;
        task->mempool = rte_mempool_create(name,
-                                          4*1024 - 1, MBUF_SIZE,
+                                          4*1024 - 1, TX_MBUF_SIZE,
                                           targ->nb_cache_mbuf,
                                           sizeof(struct rte_pktmbuf_pool_private),
                                           rte_pktmbuf_pool_init, NULL,
@@ -1118,7 +1118,6 @@ static struct task_init task_init_gen1 = {
        .stop = stop_task_gen_server,
        .flag_features = TASK_FEATURE_ZERO_RX,
        .size = sizeof(struct task_gen_server),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 static struct task_init task_init_gen2 = {
@@ -1129,7 +1128,6 @@ static struct task_init task_init_gen2 = {
        .stop = stop_task_gen_client,
        .flag_features = TASK_FEATURE_ZERO_RX,
        .size = sizeof(struct task_gen_client),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 __attribute__((constructor)) static void reg_task_gen(void)
index faebe6f..e5a8c33 100644 (file)
@@ -112,9 +112,8 @@ static struct task_init task_init_l2fwd = {
        .mode_str = "l2fwd",
        .init = init_task_l2fwd,
        .handle = handle_l2fwd_bulk,
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS|TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS|TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
        .size = sizeof(struct task_l2fwd),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 __attribute__((constructor)) static void reg_task_l2fwd(void)
index 0d764b4..894ea79 100644 (file)
@@ -139,9 +139,8 @@ static struct task_init task_init_mirror = {
        .mode_str = "mirror",
        .init = init_task_mirror,
        .handle = handle_mirror_bulk,
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS | TASK_FEATURE_TXQ_FLAGS_REFCOUNT,
+       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_REFCOUNT,
        .size = sizeof(struct task_mirror),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 static struct task_init task_init_mirror2 = {
@@ -149,9 +148,8 @@ static struct task_init task_init_mirror2 = {
        .sub_mode_str = "copy",
        .init = init_task_mirror_copy,
        .handle = handle_mirror_bulk_copy,
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS | TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
        .size = sizeof(struct task_mirror),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 __attribute__((constructor)) static void reg_task_mirror(void)
index 23d7ad8..dff53d6 100644 (file)
@@ -182,12 +182,11 @@ static struct task_init task_init_nat = {
        .init = init_task_nat,
        .handle = handle_nat_bulk,
 #ifdef SOFT_CRC
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
 #else
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = 0,
 #endif
        .size = sizeof(struct task_nat),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 __attribute__((constructor)) static void reg_task_nat(void)
index b3eef54..4d10a36 100644 (file)
@@ -22,9 +22,8 @@ static struct task_init task_init_nop_thrpt_opt = {
        .init = NULL,
        .handle = handle_nop_bulk,
        .thread_x = thread_nop,
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS|TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_THROUGHPUT_OPT|TASK_FEATURE_MULTI_RX,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS|TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_THROUGHPUT_OPT|TASK_FEATURE_MULTI_RX,
        .size = sizeof(struct task_nop),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 static struct task_init task_init_nop_lat_opt = {
@@ -33,9 +32,8 @@ static struct task_init task_init_nop_lat_opt = {
        .init = NULL,
        .handle = handle_nop_bulk,
        .thread_x = thread_nop,
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS|TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_MULTI_RX,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS|TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_MULTI_RX,
        .size = sizeof(struct task_nop),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 static struct task_init task_init_none;
index 68dfe2b..63c4dbd 100644 (file)
@@ -246,9 +246,8 @@ static struct task_init task_init_swap = {
        .mode_str = "swap",
        .init = init_task_swap,
        .handle = handle_swap_bulk,
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
        .size = sizeof(struct task_swap),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 static struct task_init task_init_swap_arp = {
@@ -256,9 +255,8 @@ static struct task_init task_init_swap_arp = {
        .sub_mode_str = "l3",
        .init = init_task_swap,
        .handle = handle_swap_bulk,
-       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS,
+       .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS,
        .size = sizeof(struct task_swap),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 __attribute__((constructor)) static void reg_task_swap(void)
index e686aaa..245fe7a 100644 (file)
@@ -40,9 +40,8 @@ static struct task_init task_init = {
        .mode_str = "tsc",
        .init = NULL,
        .handle = handle_bulk_tsc,
-       .flag_features = TASK_FEATURE_NEVER_DISCARDS|TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_THROUGHPUT_OPT,
+       .flag_features = TASK_FEATURE_NEVER_DISCARDS|TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_THROUGHPUT_OPT,
        .size = sizeof(struct task_tsc),
-       .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
 };
 
 __attribute__((constructor)) static void reg_task_nop(void)
index 59a169d..2e1616a 100644 (file)
@@ -252,10 +252,6 @@ static void configure_if_tx_queues(struct task_args *targ, uint8_t socket)
                   use refcnt. */
                if (!chain_flag_state(targ, TASK_FEATURE_TXQ_FLAGS_REFCOUNT, 1)) {
                        prox_port_cfg[if_port].tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOREFCOUNT;
-                       plog_info("\t\tEnabling No refcnt on port %d\n", if_port);
-               }
-               else {
-                       plog_info("\t\tRefcnt used on port %d\n", if_port);
                }
 
                /* By default OFFLOAD is enabled, but if the whole
@@ -264,22 +260,8 @@ static void configure_if_tx_queues(struct task_args *targ, uint8_t socket)
                   disabled for the destination port. */
                if (!chain_flag_state(targ, TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS, 0)) {
                        prox_port_cfg[if_port].tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
-                       plog_info("\t\tDisabling TX offloads on port %d\n", if_port);
-               } else {
-                       plog_info("\t\tEnabling TX offloads on port %d\n", if_port);
                }
 
-               /* By default NOMULTSEGS is disabled, as drivers/NIC might split packets on RX
-                  It should only be enabled when we know for sure that the RX does not split packets.
-                  Set the ETH_TXQ_FLAGS_NOMULTSEGS flag if all of the tasks up to the task
-                  transmitting to the port use no_multsegs. */
-               if (!chain_flag_state(targ, TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS, 0)) {
-                       prox_port_cfg[if_port].tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
-                       plog_info("\t\tEnabling No MultiSegs on port %d\n", if_port);
-               }
-               else {
-                       plog_info("\t\tMultiSegs used on port %d\n", if_port);
-               }
        }
 }
 
@@ -293,32 +275,50 @@ static void configure_if_rx_queues(struct task_args *targ, uint8_t socket)
                        return;
                }
 
-               PROX_PANIC(!prox_port_cfg[if_port].active, "Port %u not used, aborting...\n", if_port);
                port = &prox_port_cfg[if_port];
+               PROX_PANIC(!port->active, "Port %u not used, aborting...\n", if_port);
 
                if(port->rx_ring[0] != '\0') {
                        port->n_rxq = 0;
                }
 
-               // Force multi segment support if mbuf size is not big enough.
+               // If the mbuf size (of the rx task) is not big enough, we might receive multiple segments
                // This is usually the case when setting a big mtu size i.e. enabling jumbo frames.
+               // If the packets get transmitted, then multi segments will have to be enabled on the TX port
                uint16_t max_frame_size = port->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + 2 * PROX_VLAN_TAG_SIZE;
                if (max_frame_size + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM > targ->mbuf_size) {
-                       targ->task_init->flag_features &= ~TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS;
-                       plog_info("\t\tDisabling No MultSegs on port %u as %lu > %u\n", if_port, max_frame_size + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM, targ->mbuf_size);
+                       targ->task_init->flag_features |= TASK_FEATURE_TXQ_FLAGS_MULTSEGS;
                }
-               targ->rx_port_queue[i].queue = prox_port_cfg[if_port].n_rxq;
+               targ->rx_port_queue[i].queue = port->n_rxq;
                port->pool[targ->rx_port_queue[i].queue] = targ->pool;
                port->pool_size[targ->rx_port_queue[i].queue] = targ->nb_mbuf - 1;
                port->n_rxq++;
 
-               int dsocket = prox_port_cfg[if_port].socket;
+               int dsocket = port->socket;
                if (dsocket != -1 && dsocket != socket) {
                        plog_warn("RX core on socket %d while device on socket %d\n", socket, dsocket);
                }
        }
 }
 
+static void configure_multi_segments(void)
+{
+       struct lcore_cfg *lconf = NULL;
+       struct task_args *targ;
+       uint8_t if_port;
+
+       while (core_targ_next(&lconf, &targ, 0) == 0) {
+               for (uint8_t i = 0; i < targ->nb_txports; ++i) {
+                       if_port = targ->tx_port_queue[i].port;
+                       // Multi segment is disabled for most tasks. It is only enabled for tasks requiring big packets.
+                       // We can only enable "no multi segment" if no such task exists in the chain of tasks.
+                       if (!chain_flag_state(targ, TASK_FEATURE_TXQ_FLAGS_MULTSEGS, 1)) {
+                               prox_port_cfg[if_port].tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
+                       }
+               }
+       }
+}
+
 static void configure_if_queues(void)
 {
        struct lcore_cfg *lconf = NULL;
@@ -591,20 +591,16 @@ static void set_mbuf_size(struct task_args *targ)
 {
        /* mbuf size can be set
         *  - from config file (highest priority, overwriting any other config) - should only be used as workaround
-        *  - through each 'mode', overwriting the default mbuf_size
-        *  - defaulted to MBUF_SIZE i.e. 1518 Bytes
+        *  - defaulted to MBUF_SIZE.
         * Except if set explicitely, ensure that size is big enough for vmxnet3 driver
         */
-       if (targ->mbuf_size_set_explicitely)
+       if (targ->mbuf_size)
                return;
 
-       if (targ->task_init->mbuf_size != 0) {
-               /* mbuf_size not set through config file but set through mode */
-               targ->mbuf_size = targ->task_init->mbuf_size;
-       }
-
+       targ->mbuf_size = MBUF_SIZE;
        struct prox_port_cfg *port;
-       uint16_t max_frame_size = 0;
+       uint16_t max_frame_size = 0, min_buffer_size = 0;
+       int i40e = 0;
        for (int i = 0; i < targ->nb_rxports; i++) {
                uint8_t if_port = targ->rx_port_queue[i].port;
 
@@ -614,22 +610,24 @@ static void set_mbuf_size(struct task_args *targ)
                port = &prox_port_cfg[if_port];
                if (max_frame_size < port->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + 2 * PROX_VLAN_TAG_SIZE)
                        max_frame_size = port->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + 2 * PROX_VLAN_TAG_SIZE;
+               if (min_buffer_size < port->min_rx_bufsize)
+                       min_buffer_size = port->min_rx_bufsize;
 
-               if (strcmp(port->short_name, "vmxnet3") == 0) {
-                       if (targ->mbuf_size < MBUF_SIZE + RTE_PKTMBUF_HEADROOM)
-                               targ->mbuf_size = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
-                       if (targ->mbuf_size < max_frame_size)
-                               targ->mbuf_size = max_frame_size + RTE_PKTMBUF_HEADROOM;
-               }
+               // Check whether we receive from i40e. This driver have extra mbuf size requirements
+               if (strcmp(port->short_name, "i40e") == 0)
+                       i40e = 1;
        }
-       if (max_frame_size) {
+       if (i40e) {
                // i40e supports a maximum of 5 descriptors chained
                uint16_t required_mbuf_size = RTE_ALIGN(max_frame_size / 5, 128) + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
                if (required_mbuf_size > targ->mbuf_size) {
                        targ->mbuf_size = required_mbuf_size;
-                       plog_info("\t\tSetting mbuf_size to %u to support frame_size %u (mtu %u)\n", targ->mbuf_size, max_frame_size, port->mtu);
+                       plog_info("\t\tSetting mbuf_size to %u to support frame_size %u\n", targ->mbuf_size, max_frame_size);
                }
        }
+       if (min_buffer_size > targ->mbuf_size) {
+               plog_warn("Mbuf size might be too small. This might result in packet segmentation and memory leak\n");
+       }
 
 }
 
@@ -915,6 +913,8 @@ static void init_lcores(void)
        plog_info("=== Initializing queue numbers on cores ===\n");
        configure_if_queues();
 
+       configure_multi_segments();
+
        plog_info("=== Initializing rings on cores ===\n");
        init_rings();
 
index aa7ff51..fb88a65 100644 (file)
@@ -553,8 +553,11 @@ static int get_port_cfg(unsigned sindex, char *str, void *data)
                }
                if (val) {
                        cfg->mtu = val;
-                       if (cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > ETHER_MAX_LEN) {
-                               cfg->port_conf.rxmode.max_rx_pkt_len = cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + 2 * PROX_VLAN_TAG_SIZE;
+                       // A frame of 1526 bytes (1500 bytes mtu, 14 bytes hdr, 4 bytes crc and 8 bytes vlan)
+                       // should not be considered as a jumbo frame. However rte_ethdev.c considers that
+                       // the max_rx_pkt_len for a non jumbo frame is 1518
+                       cfg->port_conf.rxmode.max_rx_pkt_len = cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
+                       if (cfg->port_conf.rxmode.max_rx_pkt_len > ETHER_MAX_LEN) {
                                cfg->port_conf.rxmode.jumbo_frame = 1;
                        }
                }
@@ -1219,7 +1222,6 @@ static int get_core_cfg(unsigned sindex, char *str, void *data)
        }
 
        else if (STR_EQ(str, "mbuf size")) {
-               targ->mbuf_size_set_explicitely = 1;
                return parse_int(&targ->mbuf_size, pkey);
        }
        if (STR_EQ(str, "memcache size")) {
index 269b1c6..cea69a3 100644 (file)
@@ -182,6 +182,8 @@ void init_rte_dev(int use_dummy_devices)
 
                port_cfg->max_txq = dev_info.max_tx_queues;
                port_cfg->max_rxq = dev_info.max_rx_queues;
+               port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
+               port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
 
                if (!dev_info.pci_dev)
                        continue;
@@ -189,7 +191,7 @@ void init_rte_dev(int use_dummy_devices)
                snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
                         "%04x:%02x:%02x.%1x", dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
                strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
-               plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
+               plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d, max_rx_pktlen = %d, min_rx_bufsize = %d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq, port_cfg->max_rx_pkt_len, port_cfg->min_rx_bufsize);
 
                if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
                        strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
@@ -278,10 +280,7 @@ static void init_port(struct prox_port_cfg *port_cfg)
                /* not receiving on this port */
                plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
                port_cfg->n_rxq = 1;
-               uint32_t mbuf_size = MBUF_SIZE;
-               if (strcmp(port_cfg->short_name, "vmxnet3") == 0) {
-                       mbuf_size = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
-               }
+               uint32_t mbuf_size = TX_MBUF_SIZE;
                plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
                          port_cfg->socket, port_cfg->n_rxd, mbuf_size);
                port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
@@ -295,9 +294,14 @@ static void init_port(struct prox_port_cfg *port_cfg)
                dummy_pool_name[0]++;
        } else {
                // Most pmd should now support setting mtu
+               if (port_cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
+                       plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
+                       port_cfg->mtu = port_cfg->max_rx_pkt_len;
+               }
                plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
                ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
-               PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
+               if (ret)
+                       plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
 
                if (port_cfg->n_txq == 0) {
                        /* not sending on this port */
@@ -317,6 +321,20 @@ static void init_port(struct prox_port_cfg *port_cfg)
                port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
 #endif
        }
+       if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
+               plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
+       else
+               plog_info("\t\tRefcnt enabled on port %d\n", port_id);
+
+       if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
+               plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
+       else
+               plog_info("\t\tTX offloads enabled on port %d\n", port_id);
+
+       if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
+               plog_info("\t\tEnabling No TX MultiSegs on port %d\n", port_id);
+       else
+               plog_info("\t\tTX Multi segments enabled on port %d\n", port_id);
 
        plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
                  port_id, port_cfg->n_rxq, port_cfg->n_txq);
index 455e2b2..0c804c6 100644 (file)
@@ -64,6 +64,8 @@ struct prox_port_cfg {
        struct {
                int tx_offload_cksum;
        } capabilities;
+       uint32_t max_rx_pkt_len;
+       uint32_t min_rx_bufsize;
 };
 
 extern rte_atomic32_t lsc;
index 95c50ba..1327a6c 100644 (file)
@@ -44,7 +44,7 @@
 #define TASK_FEATURE_NEVER_DISCARDS            0x0008
 #define TASK_FEATURE_NO_RX                     0x0010
 #define TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS      0x0020
-#define TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS      0x0040
+#define TASK_FEATURE_TXQ_FLAGS_MULTSEGS        0x0040
 #define TASK_FEATURE_ZERO_RX                   0x0080
 #define TASK_FEATURE_TXQ_FLAGS_REFCOUNT        0x0100
 #define TASK_FEATURE_TSC_RX                    0x0200
index a8ac14c..5186826 100644 (file)
@@ -80,7 +80,6 @@ struct task_init {
        size_t size;
        uint16_t     flag_req_data; /* flags from prox_shared.h */
        uint64_t     flag_features;
-       uint32_t mbuf_size;
        LIST_ENTRY(task_init) entries;
 };
 
@@ -106,7 +105,6 @@ struct task_args {
        struct lcore_cfg       *lconf;
        uint32_t               nb_mbuf;
        uint32_t               mbuf_size;
-       uint8_t                mbuf_size_set_explicitely;
        uint32_t               nb_cache_mbuf;
        uint8_t                nb_slave_threads;
        uint8_t                nb_worker_threads;