2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
43 #include "prox_compat.h"
45 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
48 int prox_nb_active_ports(void)
51 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
52 ret += prox_port_cfg[i].active;
57 int prox_last_port_active(void)
60 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
61 if (prox_port_cfg[i].active) {
68 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
69 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
70 __attribute__((unused)) void *ret_param)
72 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
73 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
74 __attribute__((unused)) void *ret_param)
76 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
80 if (RTE_ETH_EVENT_INTR_LSC != type) {
81 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
88 rte_atomic32_inc(&lsc);
90 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
95 struct prox_pktmbuf_reinit_args {
96 struct rte_mempool *mp;
97 struct lcore_cfg *lconf;
100 /* standard mbuf initialization procedure */
101 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
103 struct rte_mbuf *mbuf = _m;
105 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
106 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
108 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
109 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
112 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
115 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
117 struct prox_pktmbuf_reinit_args *init_args = arg;
121 obj += init_args->mp->header_size;
122 m = (struct rte_mbuf*)obj;
124 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
127 #define CONFIGURE_TX_OFFLOAD(flag) \
128 if (port_cfg->requested_tx_offload & flag) {\
129 if (port_cfg->disabled_tx_offload & flag) {\
130 plog_info("\t\t%s disabled by configuration\n", #flag);\
131 port_cfg->requested_tx_offload &= ~flag;\
132 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
133 port_cfg->port_conf.txmode.offloads |= flag;\
134 plog_info("\t\t%s enabled on port\n", #flag);\
135 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
136 port_cfg->tx_conf.offloads |= flag;\
137 plog_info("\t\t%s enabled on queue\n", #flag);\
139 port_cfg->requested_tx_offload &= ~flag;\
140 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
143 plog_info("\t\t%s disabled\n", #flag);\
146 #define CONFIGURE_RX_OFFLOAD(flag) \
147 if (port_cfg->requested_rx_offload & flag) {\
148 if (port_cfg->dev_info.rx_offload_capa & flag) {\
149 port_cfg->port_conf.rxmode.offloads |= flag;\
150 plog_info("\t\t%s enabled on port\n", #flag);\
151 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
152 port_cfg->rx_conf.offloads |= flag;\
153 plog_info("\t\t%s enabled on queue\n", #flag);\
155 port_cfg->requested_rx_offload &= ~flag;\
156 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
159 plog_info("\t\t%s disabled\n", #flag);\
163 /* initialize rte devices and check the number of available ports */
164 void init_rte_dev(int use_dummy_devices)
166 uint8_t nb_ports, port_id_max;
168 struct rte_eth_dev_info dev_info;
169 const struct rte_pci_device *pci_dev;
171 nb_ports = rte_eth_dev_count();
172 /* get available ports configuration */
173 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
175 if (use_dummy_devices) {
176 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
177 nb_ports = prox_last_port_active() + 1;
178 plog_info("Creating %u dummy devices\n", nb_ports);
180 char port_name[32] = "0dummy_dev";
181 for (uint32_t i = 0; i < nb_ports; ++i) {
182 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
183 rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
185 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
190 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
193 else if (prox_last_port_active() != -1) {
194 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
195 plog_info("\tDPDK has found %u ports\n", nb_ports);
198 if (nb_ports > PROX_MAX_PORTS) {
199 plog_warn("\tWarning: I can deal with at most %u ports."
200 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
202 nb_ports = PROX_MAX_PORTS;
204 port_id_max = nb_ports - 1;
205 port_id_last = prox_last_port_active();
206 PROX_PANIC(port_id_last > port_id_max,
207 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
208 port_id_last, port_id_max);
210 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
211 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
212 /* skip ports that are not enabled */
213 if (!prox_port_cfg[port_id].active) {
216 plog_info("\tGetting info for rte dev %u\n", port_id);
217 rte_eth_dev_info_get(port_id, &dev_info);
218 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
219 port_cfg->socket = -1;
221 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
222 port_cfg->max_txq = dev_info.max_tx_queues;
223 port_cfg->max_rxq = dev_info.max_rx_queues;
224 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
225 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
227 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
228 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
230 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
231 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
232 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
233 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
235 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
238 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
242 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
243 pci_dev = dev_info.pci_dev;
245 if (!dev_info.device)
247 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
252 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
253 "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
254 /* Try to find the device's numa node */
256 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
257 FILE* numa_node_fd = fopen(buf, "r");
259 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
260 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
262 port_cfg->socket = strtol(buf, 0, 0);
263 if (port_cfg->socket == -1) {
264 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
266 fclose(numa_node_fd);
269 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
270 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
271 plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
272 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
274 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
275 plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
276 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
281 /* Create rte ring-backed devices */
282 uint8_t init_rte_ring_dev(void)
284 uint8_t nb_ring_dev = 0;
286 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
287 /* skip ports that are not enabled */
288 if (!prox_port_cfg[port_id].active) {
291 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
292 if (port_cfg->rx_ring[0] != '\0') {
293 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
295 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
296 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
297 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
298 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
300 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
301 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
303 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
312 static void print_port_capa(struct prox_port_cfg *port_cfg)
316 port_id = port_cfg - prox_port_cfg;
317 plog_info("\t*** Initializing port %u ***\n", port_id);
318 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
319 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
320 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
321 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
322 plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
324 if (port_cfg->max_link_speed != UINT32_MAX) {
325 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
328 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
329 plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
330 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
331 plog_info("VLAN STRIP | ");
332 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
333 plog_info("IPV4 CKSUM | ");
334 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
335 plog_info("UDP CKSUM | ");
336 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
337 plog_info("TCP CKSUM | ");
338 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
339 plog_info("TCP LRO | ");
340 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
341 plog_info("QINQ STRIP | ");
342 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
343 plog_info("OUTER_IPV4_CKSUM | ");
344 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
345 plog_info("MACSEC STRIP | ");
346 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
347 plog_info("HEADER SPLIT | ");
348 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
349 plog_info("VLAN FILTER | ");
350 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
351 plog_info("VLAN EXTEND | ");
352 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
353 plog_info("JUMBO FRAME | ");
354 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
355 plog_info("CRC STRIP | ");
356 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
357 plog_info("SCATTER | ");
358 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
359 plog_info("TIMESTAMP | ");
360 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
361 plog_info("SECURITY ");
364 plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
365 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
366 plog_info("VLAN INSERT | ");
367 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
368 plog_info("IPV4 CKSUM | ");
369 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
370 plog_info("UDP CKSUM | ");
371 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
372 plog_info("TCP CKSUM | ");
373 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
374 plog_info("SCTP CKSUM | ");
375 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
376 plog_info("TCP TS0 | ");
377 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
378 plog_info("UDP TSO | ");
379 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
380 plog_info("OUTER IPV4 CKSUM | ");
381 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
382 plog_info("QINQ INSERT | ");
383 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
384 plog_info("VLAN TNL TSO | ");
385 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
386 plog_info("GRE TNL TSO | ");
387 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
388 plog_info("IPIP TNL TSO | ");
389 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
390 plog_info("GENEVE TNL TSO | ");
391 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
392 plog_info("MACSEC INSERT | ");
393 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
394 plog_info("MT LOCKFREE | ");
395 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
396 plog_info("MULTI SEG | ");
397 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
398 plog_info("SECURITY | ");
399 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
400 plog_info("UDP TNL TSO | ");
401 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
402 plog_info("IP TNL TSO | ");
405 plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
406 plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
407 plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
408 plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
409 plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
413 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
415 port_cfg->max_link_speed = UINT32_MAX;
417 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
418 // virtio and vmxnet3 reports fake max_link_speed
419 if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
420 // Get link_speed from highest capability from the port
421 // This will be used by gen and lat for extrapolation purposes
422 // The negotiated link_speed (as reported by rte_eth_link_get
423 // or rte_eth_link_get_nowait) might be reported too late
424 // and might result in wrong exrapolation, and hence should not be used
425 // for extrapolation purposes
426 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
427 port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
428 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
429 port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
430 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
431 port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
432 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
433 port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
434 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
435 port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
436 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
437 port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
438 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
439 port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
440 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
441 port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
442 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
443 port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
444 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
445 port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
446 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
447 port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
448 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
449 port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
455 static void init_port(struct prox_port_cfg *port_cfg)
457 static char dummy_pool_name[] = "0_dummy";
458 struct rte_eth_link link;
462 get_max_link_speed(port_cfg);
463 print_port_capa(port_cfg);
464 port_id = port_cfg - prox_port_cfg;
465 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
466 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
468 if (port_cfg->n_rxq == 0) {
469 /* not receiving on this port */
470 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
472 uint32_t mbuf_size = TX_MBUF_SIZE;
473 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
474 mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
476 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
477 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
478 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
480 sizeof(struct rte_pktmbuf_pool_private),
481 rte_pktmbuf_pool_init, NULL,
482 prox_pktmbuf_init, 0,
483 port_cfg->socket, 0);
484 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
485 port_cfg->socket, port_cfg->n_rxd);
486 dummy_pool_name[0]++;
488 // Most pmd should now support setting mtu
489 if (port_cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
490 plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
491 port_cfg->mtu = port_cfg->max_rx_pkt_len;
493 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
494 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
496 plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
498 if (port_cfg->n_txq == 0) {
499 /* not sending on this port */
500 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
505 if (port_cfg->n_rxq > 1) {
506 // Enable RSS if multiple receive queues
507 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
508 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
509 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
510 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
511 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP|ETH_RSS_UDP;
513 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
517 // Make sure that the requested RSS offload is supported by the PMD
518 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
519 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
521 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP);
523 // rxmode such as hw src strip
524 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
525 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
526 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
527 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
529 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
530 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
532 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
533 port_cfg->port_conf.rxmode.jumbo_frame = 1;
537 // IPV4, UDP, SCTP Checksums
538 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
539 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
540 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
541 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
543 if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
544 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
545 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
547 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
548 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
549 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
553 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
554 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
556 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
557 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
558 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
559 } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
560 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
562 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
564 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
565 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
567 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
571 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
572 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
574 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
575 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
577 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
580 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
581 port_id, port_cfg->n_rxq, port_cfg->n_txq);
583 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
584 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
586 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
587 !strcmp(port_cfg->short_name, "virtio") ||
588 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
589 !strcmp(port_cfg->short_name, "i40e") ||
591 !strcmp(port_cfg->short_name, "i40e_vf") ||
592 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
593 !strcmp(port_cfg->driver_name, "") || /* NULL device */
594 !strcmp(port_cfg->short_name, "vmxnet3")) {
595 port_cfg->port_conf.intr_conf.lsc = 0;
596 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
599 if (port_cfg->lsc_set_explicitely) {
600 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
601 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
603 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
604 if (port_cfg->n_txd < 512) {
605 // Vmxnet3 driver requires minimum 512 tx descriptors
606 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
607 port_cfg->n_txd = 512;
611 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
612 port_cfg->n_txq, &port_cfg->port_conf);
613 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
615 if (port_cfg->port_conf.intr_conf.lsc) {
616 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
619 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
621 /* initialize TX queues first */
622 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
623 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
624 queue_id, port_cfg->socket, port_cfg->n_txd);
625 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
626 port_cfg->socket, &port_cfg->tx_conf);
627 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
630 /* initialize RX queues */
631 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
632 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
633 queue_id, port_id, port_cfg->socket,
634 port_cfg->n_rxd, port_cfg->pool[queue_id]);
635 ret = rte_eth_rx_queue_setup(port_id, queue_id,
637 port_cfg->socket, &port_cfg->rx_conf,
638 port_cfg->pool[queue_id]);
639 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
642 plog_info("\t\tStarting up port %u ...", port_id);
643 ret = rte_eth_dev_start(port_id);
645 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
646 plog_info(" done: ");
648 /* Getting link status can be done without waiting if Link
649 State Interrupt is enabled since in that case, if the link
650 is recognized as being down, an interrupt will notify that
652 if (port_cfg->port_conf.intr_conf.lsc)
653 rte_eth_link_get_nowait(port_id, &link);
655 rte_eth_link_get(port_id, &link);
657 port_cfg->link_up = link.link_status;
658 port_cfg->link_speed = link.link_speed;
660 if (link.link_status) {
661 plog_info("Link Up - speed %'u Mbps - %s\n",
663 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
664 "full-duplex" : "half-duplex");
667 plog_info("Link Down\n");
670 if (port_cfg->promiscuous) {
671 rte_eth_promiscuous_enable(port_id);
672 plog_info("\t\tport %u in promiscuous mode\n", port_id);
675 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
676 strcmp(port_cfg->short_name, "i40e") &&
677 strcmp(port_cfg->short_name, "i40e_vf") &&
678 strcmp(port_cfg->short_name, "vmxnet3")) {
679 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
680 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
682 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
685 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
686 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
688 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
694 void init_port_all(void)
696 uint8_t max_port_idx = prox_last_port_active() + 1;
698 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
699 if (!prox_port_cfg[portid].active) {
702 init_port(&prox_port_cfg[portid]);
706 void close_ports_atexit(void)
708 uint8_t max_port_idx = prox_last_port_active() + 1;
710 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
711 if (!prox_port_cfg[portid].active) {
714 rte_eth_dev_close(portid);
718 void init_port_addr(void)
720 struct prox_port_cfg *port_cfg;
722 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
723 if (!prox_port_cfg[port_id].active) {
726 port_cfg = &prox_port_cfg[port_id];
728 switch (port_cfg->type) {
729 case PROX_PORT_MAC_HW:
730 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
732 case PROX_PORT_MAC_RAND:
733 eth_random_addr(port_cfg->eth_addr.addr_bytes);
735 case PROX_PORT_MAC_SET:
741 int port_is_active(uint8_t port_id)
743 if (port_id > PROX_MAX_PORTS) {
744 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
748 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
749 if (!port_cfg->active) {
750 plog_info("Port %u is not active\n", port_id);