2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
43 #include "prox_compat.h"
44 #include "rte_ethdev.h"
46 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
49 int prox_nb_active_ports(void)
52 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
53 ret += prox_port_cfg[i].active;
58 int prox_last_port_active(void)
61 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
62 if (prox_port_cfg[i].active) {
69 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
70 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
71 __attribute__((unused)) void *ret_param)
73 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
74 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
75 __attribute__((unused)) void *ret_param)
77 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
81 if (RTE_ETH_EVENT_INTR_LSC != type) {
82 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
89 rte_atomic32_inc(&lsc);
91 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
96 struct prox_pktmbuf_reinit_args {
97 struct rte_mempool *mp;
98 struct lcore_cfg *lconf;
101 /* standard mbuf initialization procedure */
102 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
104 struct rte_mbuf *mbuf = _m;
106 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
107 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
109 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
110 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
113 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
116 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
118 struct prox_pktmbuf_reinit_args *init_args = arg;
122 obj += init_args->mp->header_size;
123 m = (struct rte_mbuf*)obj;
125 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
128 #define CONFIGURE_TX_OFFLOAD(flag) \
129 if (port_cfg->requested_tx_offload & flag) {\
130 if (port_cfg->disabled_tx_offload & flag) {\
131 plog_info("\t\t%s disabled by configuration\n", #flag);\
132 port_cfg->requested_tx_offload &= ~flag;\
133 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
134 port_cfg->port_conf.txmode.offloads |= flag;\
135 plog_info("\t\t%s enabled on port\n", #flag);\
136 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
137 port_cfg->tx_conf.offloads |= flag;\
138 plog_info("\t\t%s enabled on queue\n", #flag);\
140 port_cfg->requested_tx_offload &= ~flag;\
141 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
144 plog_info("\t\t%s disabled\n", #flag);\
147 #define CONFIGURE_RX_OFFLOAD(flag) \
148 if (port_cfg->requested_rx_offload & flag) {\
149 if (port_cfg->dev_info.rx_offload_capa & flag) {\
150 port_cfg->port_conf.rxmode.offloads |= flag;\
151 plog_info("\t\t%s enabled on port\n", #flag);\
152 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
153 port_cfg->rx_conf.offloads |= flag;\
154 plog_info("\t\t%s enabled on queue\n", #flag);\
156 port_cfg->requested_rx_offload &= ~flag;\
157 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
160 plog_info("\t\t%s disabled\n", #flag);\
164 /* initialize rte devices and check the number of available ports */
165 void init_rte_dev(int use_dummy_devices)
167 uint8_t nb_ports, port_id_max;
169 struct rte_eth_dev_info dev_info;
170 const struct rte_pci_device *pci_dev;
172 nb_ports = rte_eth_dev_count();
173 /* get available ports configuration */
174 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
176 if (use_dummy_devices) {
177 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
178 nb_ports = prox_last_port_active() + 1;
179 plog_info("Creating %u dummy devices\n", nb_ports);
181 char port_name[32] = "0dummy_dev";
182 for (uint32_t i = 0; i < nb_ports; ++i) {
183 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
184 rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
186 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
191 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
194 else if (prox_last_port_active() != -1) {
195 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
196 plog_info("\tDPDK has found %u ports\n", nb_ports);
199 if (nb_ports > PROX_MAX_PORTS) {
200 plog_warn("\tWarning: I can deal with at most %u ports."
201 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
203 nb_ports = PROX_MAX_PORTS;
205 port_id_max = nb_ports - 1;
206 port_id_last = prox_last_port_active();
207 PROX_PANIC(port_id_last > port_id_max,
208 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
209 port_id_last, port_id_max);
211 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
212 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
213 /* skip ports that are not enabled */
214 if (!prox_port_cfg[port_id].active) {
217 plog_info("\tGetting info for rte dev %u\n", port_id);
218 rte_eth_dev_info_get(port_id, &dev_info);
219 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
220 port_cfg->socket = -1;
222 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
223 port_cfg->max_txq = dev_info.max_tx_queues;
224 port_cfg->max_rxq = dev_info.max_rx_queues;
225 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
226 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
228 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
229 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
231 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
232 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
233 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
234 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
236 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
239 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
243 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
244 pci_dev = dev_info.pci_dev;
246 if (!dev_info.device)
248 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
253 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
254 "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
255 /* Try to find the device's numa node */
257 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
258 FILE* numa_node_fd = fopen(buf, "r");
260 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
261 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
263 port_cfg->socket = strtol(buf, 0, 0);
264 if (port_cfg->socket == -1) {
265 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
267 fclose(numa_node_fd);
270 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
271 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
272 plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
273 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
275 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
276 plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
277 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
282 /* Create rte ring-backed devices */
283 uint8_t init_rte_ring_dev(void)
285 uint8_t nb_ring_dev = 0;
287 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
288 /* skip ports that are not enabled */
289 if (!prox_port_cfg[port_id].active) {
292 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
293 if (port_cfg->rx_ring[0] != '\0') {
294 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
296 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
297 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
298 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
299 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
301 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
302 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
304 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
313 static void print_port_capa(struct prox_port_cfg *port_cfg)
317 port_id = port_cfg - prox_port_cfg;
318 plog_info("\t*** Initializing port %u ***\n", port_id);
319 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
320 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
321 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
322 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
323 plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
325 if (port_cfg->max_link_speed != UINT32_MAX) {
326 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
329 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
330 plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
331 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
332 plog_info("VLAN STRIP | ");
333 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
334 plog_info("IPV4 CKSUM | ");
335 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
336 plog_info("UDP CKSUM | ");
337 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
338 plog_info("TCP CKSUM | ");
339 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
340 plog_info("TCP LRO | ");
341 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
342 plog_info("QINQ STRIP | ");
343 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
344 plog_info("OUTER_IPV4_CKSUM | ");
345 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
346 plog_info("MACSEC STRIP | ");
347 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
348 plog_info("HEADER SPLIT | ");
349 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
350 plog_info("VLAN FILTER | ");
351 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
352 plog_info("VLAN EXTEND | ");
353 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
354 plog_info("JUMBO FRAME | ");
355 #if defined(DEV_RX_OFFLOAD_CRC_STRIP)
356 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
357 plog_info("CRC STRIP | ");
359 #if defined(DEV_RX_OFFLOAD_KEEP_CRC)
360 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)
361 plog_info("KEEP CRC | ");
363 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
364 plog_info("SCATTER | ");
365 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
366 plog_info("TIMESTAMP | ");
367 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
368 plog_info("SECURITY ");
371 plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
372 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
373 plog_info("VLAN INSERT | ");
374 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
375 plog_info("IPV4 CKSUM | ");
376 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
377 plog_info("UDP CKSUM | ");
378 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
379 plog_info("TCP CKSUM | ");
380 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
381 plog_info("SCTP CKSUM | ");
382 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
383 plog_info("TCP TS0 | ");
384 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
385 plog_info("UDP TSO | ");
386 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
387 plog_info("OUTER IPV4 CKSUM | ");
388 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
389 plog_info("QINQ INSERT | ");
390 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
391 plog_info("VLAN TNL TSO | ");
392 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
393 plog_info("GRE TNL TSO | ");
394 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
395 plog_info("IPIP TNL TSO | ");
396 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
397 plog_info("GENEVE TNL TSO | ");
398 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
399 plog_info("MACSEC INSERT | ");
400 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
401 plog_info("MT LOCKFREE | ");
402 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
403 plog_info("MULTI SEG | ");
404 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
405 plog_info("SECURITY | ");
406 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
407 plog_info("UDP TNL TSO | ");
408 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
409 plog_info("IP TNL TSO | ");
412 plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
413 plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
414 plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
415 plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
416 plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
420 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
422 port_cfg->max_link_speed = UINT32_MAX;
424 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
425 // virtio and vmxnet3 reports fake max_link_speed
426 if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
427 // Get link_speed from highest capability from the port
428 // This will be used by gen and lat for extrapolation purposes
429 // The negotiated link_speed (as reported by rte_eth_link_get
430 // or rte_eth_link_get_nowait) might be reported too late
431 // and might result in wrong exrapolation, and hence should not be used
432 // for extrapolation purposes
433 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
434 port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
435 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
436 port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
437 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
438 port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
439 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
440 port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
441 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
442 port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
443 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
444 port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
445 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
446 port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
447 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
448 port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
449 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
450 port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
451 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
452 port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
453 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
454 port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
455 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
456 port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
462 static void init_port(struct prox_port_cfg *port_cfg)
464 static char dummy_pool_name[] = "0_dummy";
465 struct rte_eth_link link;
469 get_max_link_speed(port_cfg);
470 print_port_capa(port_cfg);
471 port_id = port_cfg - prox_port_cfg;
472 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
473 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
475 if (port_cfg->n_rxq == 0) {
476 /* not receiving on this port */
477 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
479 uint32_t mbuf_size = TX_MBUF_SIZE;
480 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
481 mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
483 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
484 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
485 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
487 sizeof(struct rte_pktmbuf_pool_private),
488 rte_pktmbuf_pool_init, NULL,
489 prox_pktmbuf_init, 0,
490 port_cfg->socket, 0);
491 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
492 port_cfg->socket, port_cfg->n_rxd);
493 dummy_pool_name[0]++;
495 // Most pmd should now support setting mtu
496 if (port_cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
497 plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
498 port_cfg->mtu = port_cfg->max_rx_pkt_len;
500 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
501 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
503 plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
505 if (port_cfg->n_txq == 0) {
506 /* not sending on this port */
507 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
512 if (port_cfg->n_rxq > 1) {
513 // Enable RSS if multiple receive queues
514 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
515 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
516 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
517 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
518 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP|ETH_RSS_UDP;
520 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
524 // Make sure that the requested RSS offload is supported by the PMD
525 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
526 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
528 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
530 // rxmode such as hw src strip
531 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
532 #if defined (DEV_RX_OFFLOAD_CRC_STRIP)
533 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
535 #if defined (DEV_RX_OFFLOAD_KEEP_CRC)
536 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_KEEP_CRC);
538 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
539 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
541 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
542 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
544 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
545 port_cfg->port_conf.rxmode.jumbo_frame = 1;
549 // IPV4, UDP, SCTP Checksums
550 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
551 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
552 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
553 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
555 if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
556 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
557 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
559 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
560 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
561 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
565 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
566 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
568 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
569 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
570 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
571 } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
572 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
574 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
576 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
577 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
579 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
583 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
584 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
586 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
587 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
589 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
592 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
593 port_id, port_cfg->n_rxq, port_cfg->n_txq);
595 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
596 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
598 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
599 !strcmp(port_cfg->short_name, "virtio") ||
600 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
601 !strcmp(port_cfg->short_name, "i40e") ||
603 !strcmp(port_cfg->short_name, "i40e_vf") ||
604 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
605 !strcmp(port_cfg->driver_name, "") || /* NULL device */
606 !strcmp(port_cfg->short_name, "vmxnet3")) {
607 port_cfg->port_conf.intr_conf.lsc = 0;
608 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
611 if (port_cfg->lsc_set_explicitely) {
612 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
613 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
615 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
616 if (port_cfg->n_txd < 512) {
617 // Vmxnet3 driver requires minimum 512 tx descriptors
618 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
619 port_cfg->n_txd = 512;
623 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
624 port_cfg->n_txq, &port_cfg->port_conf);
625 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
627 if (port_cfg->port_conf.intr_conf.lsc) {
628 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
631 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
633 /* initialize TX queues first */
634 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
635 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
636 queue_id, port_cfg->socket, port_cfg->n_txd);
637 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
638 port_cfg->socket, &port_cfg->tx_conf);
639 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
642 /* initialize RX queues */
643 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
644 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
645 queue_id, port_id, port_cfg->socket,
646 port_cfg->n_rxd, port_cfg->pool[queue_id]);
647 ret = rte_eth_rx_queue_setup(port_id, queue_id,
649 port_cfg->socket, &port_cfg->rx_conf,
650 port_cfg->pool[queue_id]);
651 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
654 plog_info("\t\tStarting up port %u ...", port_id);
655 ret = rte_eth_dev_start(port_id);
657 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
658 plog_info(" done: ");
660 /* Getting link status can be done without waiting if Link
661 State Interrupt is enabled since in that case, if the link
662 is recognized as being down, an interrupt will notify that
664 if (port_cfg->port_conf.intr_conf.lsc)
665 rte_eth_link_get_nowait(port_id, &link);
667 rte_eth_link_get(port_id, &link);
669 port_cfg->link_up = link.link_status;
670 port_cfg->link_speed = link.link_speed;
672 if (link.link_status) {
673 plog_info("Link Up - speed %'u Mbps - %s\n",
675 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
676 "full-duplex" : "half-duplex");
679 plog_info("Link Down\n");
682 if (port_cfg->promiscuous) {
683 rte_eth_promiscuous_enable(port_id);
684 plog_info("\t\tport %u in promiscuous mode\n", port_id);
687 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
688 strcmp(port_cfg->short_name, "i40e") &&
689 strcmp(port_cfg->short_name, "i40e_vf") &&
690 strcmp(port_cfg->short_name, "vmxnet3")) {
691 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
692 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
694 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
697 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
698 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
700 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
704 if (port_cfg->nb_mc_addr) {
705 rte_eth_allmulticast_enable(port_id);
706 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
707 plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
708 port_cfg->nb_mc_addr = 0;
709 rte_eth_allmulticast_disable(port_id);
710 plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
712 plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
713 plog_info("\t\tport %u in multicast mode\n", port_id);
718 void init_port_all(void)
720 uint8_t max_port_idx = prox_last_port_active() + 1;
722 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
723 if (!prox_port_cfg[portid].active) {
726 init_port(&prox_port_cfg[portid]);
730 void close_ports_atexit(void)
732 uint8_t max_port_idx = prox_last_port_active() + 1;
734 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
735 if (!prox_port_cfg[portid].active) {
738 rte_eth_dev_close(portid);
742 void init_port_addr(void)
744 struct prox_port_cfg *port_cfg;
747 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
748 if (!prox_port_cfg[port_id].active) {
751 port_cfg = &prox_port_cfg[port_id];
753 switch (port_cfg->type) {
754 case PROX_PORT_MAC_HW:
755 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
757 case PROX_PORT_MAC_RAND:
758 eth_random_addr(port_cfg->eth_addr.addr_bytes);
760 case PROX_PORT_MAC_SET:
761 if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
762 plog_warn("port %u: failed to set mac address. Error = %d\n", port_id, rc);
768 int port_is_active(uint8_t port_id)
770 if (port_id > PROX_MAX_PORTS) {
771 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
775 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
776 if (!port_cfg->active) {
777 plog_info("Port %u is not active\n", port_id);