2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
43 #include "prox_compat.h"
44 #include "rte_ethdev.h"
46 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
49 int prox_nb_active_ports(void)
52 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
53 ret += prox_port_cfg[i].active;
58 int prox_last_port_active(void)
61 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
62 if (prox_port_cfg[i].active) {
69 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
70 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
71 __attribute__((unused)) void *ret_param)
73 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
74 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
75 __attribute__((unused)) void *ret_param)
77 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
81 if (RTE_ETH_EVENT_INTR_LSC != type) {
82 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
89 rte_atomic32_inc(&lsc);
91 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
96 struct prox_pktmbuf_reinit_args {
97 struct rte_mempool *mp;
98 struct lcore_cfg *lconf;
101 /* standard mbuf initialization procedure */
102 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
104 struct rte_mbuf *mbuf = _m;
106 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
107 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
109 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
110 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
113 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
116 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
118 struct prox_pktmbuf_reinit_args *init_args = arg;
122 obj += init_args->mp->header_size;
123 m = (struct rte_mbuf*)obj;
125 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
128 #define CONFIGURE_TX_OFFLOAD(flag) \
129 if (port_cfg->requested_tx_offload & flag) {\
130 if (port_cfg->disabled_tx_offload & flag) {\
131 plog_info("\t\t%s disabled by configuration\n", #flag);\
132 port_cfg->requested_tx_offload &= ~flag;\
133 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
134 port_cfg->port_conf.txmode.offloads |= flag;\
135 plog_info("\t\t%s enabled on port\n", #flag);\
136 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
137 port_cfg->tx_conf.offloads |= flag;\
138 plog_info("\t\t%s enabled on queue\n", #flag);\
140 port_cfg->requested_tx_offload &= ~flag;\
141 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
144 plog_info("\t\t%s disabled\n", #flag);\
147 #define CONFIGURE_RX_OFFLOAD(flag) \
148 if (port_cfg->requested_rx_offload & flag) {\
149 if (port_cfg->dev_info.rx_offload_capa & flag) {\
150 port_cfg->port_conf.rxmode.offloads |= flag;\
151 plog_info("\t\t%s enabled on port\n", #flag);\
152 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
153 port_cfg->rx_conf.offloads |= flag;\
154 plog_info("\t\t%s enabled on queue\n", #flag);\
156 port_cfg->requested_rx_offload &= ~flag;\
157 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
160 plog_info("\t\t%s disabled\n", #flag);\
164 /* initialize rte devices and check the number of available ports */
165 void init_rte_dev(int use_dummy_devices)
167 uint8_t nb_ports, port_id_max;
169 struct rte_eth_dev_info dev_info;
170 const struct rte_pci_device *pci_dev;
172 nb_ports = rte_eth_dev_count();
173 /* get available ports configuration */
174 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
176 if (use_dummy_devices) {
177 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
178 nb_ports = prox_last_port_active() + 1;
179 plog_info("Creating %u dummy devices\n", nb_ports);
181 char port_name[32] = "0dummy_dev";
182 for (uint32_t i = 0; i < nb_ports; ++i) {
183 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
184 rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
186 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
191 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
194 else if (prox_last_port_active() != -1) {
195 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
196 plog_info("\tDPDK has found %u ports\n", nb_ports);
199 if (nb_ports > PROX_MAX_PORTS) {
200 plog_warn("\tWarning: I can deal with at most %u ports."
201 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
203 nb_ports = PROX_MAX_PORTS;
205 port_id_max = nb_ports - 1;
206 port_id_last = prox_last_port_active();
207 PROX_PANIC(port_id_last > port_id_max,
208 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
209 port_id_last, port_id_max);
211 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
212 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
213 /* skip ports that are not enabled */
214 if (!prox_port_cfg[port_id].active) {
217 plog_info("\tGetting info for rte dev %u\n", port_id);
218 rte_eth_dev_info_get(port_id, &dev_info);
219 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
220 port_cfg->socket = -1;
222 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
223 port_cfg->max_txq = dev_info.max_tx_queues;
224 port_cfg->max_rxq = dev_info.max_rx_queues;
225 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
226 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
227 port_cfg->min_tx_desc = dev_info.tx_desc_lim.nb_min;
228 port_cfg->max_tx_desc = dev_info.tx_desc_lim.nb_max;
229 port_cfg->min_rx_desc = dev_info.rx_desc_lim.nb_min;
230 port_cfg->max_rx_desc = dev_info.rx_desc_lim.nb_max;
232 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
233 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
234 plog_info("\tPort %u : %d<=nb_tx_desc<=%d %d<=nb_rx_desc<=%d\n", port_id, port_cfg->min_tx_desc, port_cfg->max_tx_desc, port_cfg->min_rx_desc, port_cfg->max_rx_desc);
236 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
237 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
238 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
239 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
241 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
244 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
248 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
249 pci_dev = dev_info.pci_dev;
251 if (!dev_info.device)
253 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
258 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
259 "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
260 /* Try to find the device's numa node */
262 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
263 FILE* numa_node_fd = fopen(buf, "r");
265 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
266 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
268 port_cfg->socket = strtol(buf, 0, 0);
269 if (port_cfg->socket == -1) {
270 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
272 fclose(numa_node_fd);
275 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
276 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
277 plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
278 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
280 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
281 plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
282 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
284 // Some OVS versions reports that they support UDP offload and no IPv4 offload, but fails when UDP offload is enabled
285 if ((!strcmp(port_cfg->short_name, "virtio")) &&
286 ((port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) == 0) &&
287 (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
288 plog_info("\t\tDisabling UDP cksum on virtio\n");
289 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
294 /* Create rte ring-backed devices */
295 uint8_t init_rte_ring_dev(void)
297 uint8_t nb_ring_dev = 0;
299 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
300 /* skip ports that are not enabled */
301 if (!prox_port_cfg[port_id].active) {
304 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
305 if (port_cfg->rx_ring[0] != '\0') {
306 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
308 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
309 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
310 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
311 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
313 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
314 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
316 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
325 static void print_port_capa(struct prox_port_cfg *port_cfg)
329 port_id = port_cfg - prox_port_cfg;
330 plog_info("\t*** Initializing port %u ***\n", port_id);
331 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
332 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
333 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
334 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
335 plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
337 if (port_cfg->max_link_speed != UINT32_MAX) {
338 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
341 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
342 plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
343 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
344 plog_info("VLAN STRIP | ");
345 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
346 plog_info("IPV4 CKSUM | ");
347 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
348 plog_info("UDP CKSUM | ");
349 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
350 plog_info("TCP CKSUM | ");
351 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
352 plog_info("TCP LRO | ");
353 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
354 plog_info("QINQ STRIP | ");
355 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
356 plog_info("OUTER_IPV4_CKSUM | ");
357 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
358 plog_info("MACSEC STRIP | ");
359 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
360 plog_info("HEADER SPLIT | ");
361 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
362 plog_info("VLAN FILTER | ");
363 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
364 plog_info("VLAN EXTEND | ");
365 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
366 plog_info("JUMBO FRAME | ");
367 #if defined(DEV_RX_OFFLOAD_CRC_STRIP)
368 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
369 plog_info("CRC STRIP | ");
371 #if defined(DEV_RX_OFFLOAD_KEEP_CRC)
372 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)
373 plog_info("KEEP CRC | ");
375 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
376 plog_info("SCATTER | ");
377 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
378 plog_info("TIMESTAMP | ");
379 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
380 plog_info("SECURITY ");
383 plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
384 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
385 plog_info("VLAN INSERT | ");
386 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
387 plog_info("IPV4 CKSUM | ");
388 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
389 plog_info("UDP CKSUM | ");
390 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
391 plog_info("TCP CKSUM | ");
392 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
393 plog_info("SCTP CKSUM | ");
394 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
395 plog_info("TCP TS0 | ");
396 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
397 plog_info("UDP TSO | ");
398 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
399 plog_info("OUTER IPV4 CKSUM | ");
400 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
401 plog_info("QINQ INSERT | ");
402 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
403 plog_info("VLAN TNL TSO | ");
404 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
405 plog_info("GRE TNL TSO | ");
406 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
407 plog_info("IPIP TNL TSO | ");
408 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
409 plog_info("GENEVE TNL TSO | ");
410 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
411 plog_info("MACSEC INSERT | ");
412 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
413 plog_info("MT LOCKFREE | ");
414 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
415 plog_info("MULTI SEG | ");
416 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
417 plog_info("SECURITY | ");
418 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
419 plog_info("UDP TNL TSO | ");
420 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
421 plog_info("IP TNL TSO | ");
424 plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
425 plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
426 plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
427 plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
428 plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
432 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
434 port_cfg->max_link_speed = UINT32_MAX;
436 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
437 // virtio and vmxnet3 reports fake max_link_speed
438 if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
439 // Get link_speed from highest capability from the port
440 // This will be used by gen and lat for extrapolation purposes
441 // The negotiated link_speed (as reported by rte_eth_link_get
442 // or rte_eth_link_get_nowait) might be reported too late
443 // and might result in wrong exrapolation, and hence should not be used
444 // for extrapolation purposes
445 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
446 port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
447 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
448 port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
449 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
450 port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
451 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
452 port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
453 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
454 port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
455 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
456 port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
457 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
458 port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
459 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
460 port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
461 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
462 port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
463 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
464 port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
465 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
466 port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
467 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
468 port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
474 static void init_port(struct prox_port_cfg *port_cfg)
476 static char dummy_pool_name[] = "0_dummy";
477 struct rte_eth_link link;
481 get_max_link_speed(port_cfg);
482 print_port_capa(port_cfg);
483 port_id = port_cfg - prox_port_cfg;
484 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
485 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
487 if (port_cfg->n_rxq == 0) {
488 /* not receiving on this port */
489 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
491 uint32_t mbuf_size = TX_MBUF_SIZE;
492 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
493 mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
495 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
496 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
497 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
499 sizeof(struct rte_pktmbuf_pool_private),
500 rte_pktmbuf_pool_init, NULL,
501 prox_pktmbuf_init, 0,
502 port_cfg->socket, 0);
503 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
504 port_cfg->socket, port_cfg->n_rxd);
505 dummy_pool_name[0]++;
507 // Most pmd should now support setting mtu
508 if (port_cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
509 plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
510 port_cfg->mtu = port_cfg->max_rx_pkt_len;
512 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
513 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
515 plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
517 if (port_cfg->n_txq == 0) {
518 /* not sending on this port */
519 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
524 if (port_cfg->n_rxq > 1) {
525 // Enable RSS if multiple receive queues
526 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
527 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
528 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
529 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
530 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP|ETH_RSS_UDP;
532 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
536 // Make sure that the requested RSS offload is supported by the PMD
537 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
538 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
540 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
542 // rxmode such as hw src strip
543 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
544 #if defined (DEV_RX_OFFLOAD_CRC_STRIP)
545 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
547 #if defined (DEV_RX_OFFLOAD_KEEP_CRC)
548 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_KEEP_CRC);
550 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
551 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
553 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
554 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
556 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
557 port_cfg->port_conf.rxmode.jumbo_frame = 1;
561 // IPV4, UDP, SCTP Checksums
562 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
563 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
564 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
565 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
567 if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
568 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
569 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
571 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
572 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
573 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
577 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
578 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
580 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
581 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
582 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
583 } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
584 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
586 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
588 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
589 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
591 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
595 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
596 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
598 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
599 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
601 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
604 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
605 port_id, port_cfg->n_rxq, port_cfg->n_txq);
607 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
608 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
610 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
611 !strcmp(port_cfg->short_name, "virtio") ||
612 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
613 !strcmp(port_cfg->short_name, "i40e") ||
615 !strcmp(port_cfg->short_name, "i40e_vf") ||
616 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
617 !strcmp(port_cfg->driver_name, "") || /* NULL device */
618 !strcmp(port_cfg->short_name, "vmxnet3")) {
619 port_cfg->port_conf.intr_conf.lsc = 0;
620 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
623 if (port_cfg->lsc_set_explicitely) {
624 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
625 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
627 if (port_cfg->n_txd < port_cfg->min_tx_desc) {
628 plog_info("\t\tNumber of TX descriptors is set to %d (minimum required for %s\n", port_cfg->min_tx_desc, port_cfg->short_name);
629 port_cfg->n_txd = port_cfg->min_tx_desc;
632 if (port_cfg->n_rxd < port_cfg->min_rx_desc) {
633 plog_info("\t\tNumber of RX descriptors is set to %d (minimum required for %s\n", port_cfg->min_rx_desc, port_cfg->short_name);
634 port_cfg->n_rxd = port_cfg->min_rx_desc;
637 if (port_cfg->n_txd > port_cfg->max_tx_desc) {
638 plog_info("\t\tNumber of TX descriptors is set to %d (maximum required for %s\n", port_cfg->max_tx_desc, port_cfg->short_name);
639 port_cfg->n_txd = port_cfg->max_tx_desc;
642 if (port_cfg->n_rxd > port_cfg->max_rx_desc) {
643 plog_info("\t\tNumber of RX descriptors is set to %d (maximum required for %s\n", port_cfg->max_rx_desc, port_cfg->short_name);
644 port_cfg->n_rxd = port_cfg->max_rx_desc;
647 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
648 port_cfg->n_txq, &port_cfg->port_conf);
649 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
651 if (port_cfg->port_conf.intr_conf.lsc) {
652 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
655 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
657 /* initialize TX queues first */
658 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
659 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
660 queue_id, port_cfg->socket, port_cfg->n_txd);
661 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
662 port_cfg->socket, &port_cfg->tx_conf);
663 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
666 /* initialize RX queues */
667 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
668 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
669 queue_id, port_id, port_cfg->socket,
670 port_cfg->n_rxd, port_cfg->pool[queue_id]);
671 ret = rte_eth_rx_queue_setup(port_id, queue_id,
673 port_cfg->socket, &port_cfg->rx_conf,
674 port_cfg->pool[queue_id]);
675 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
678 plog_info("\t\tStarting up port %u ...", port_id);
679 ret = rte_eth_dev_start(port_id);
681 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
682 plog_info(" done: ");
684 /* Getting link status can be done without waiting if Link
685 State Interrupt is enabled since in that case, if the link
686 is recognized as being down, an interrupt will notify that
688 if (port_cfg->port_conf.intr_conf.lsc)
689 rte_eth_link_get_nowait(port_id, &link);
691 rte_eth_link_get(port_id, &link);
693 port_cfg->link_up = link.link_status;
694 port_cfg->link_speed = link.link_speed;
696 if (link.link_status) {
697 plog_info("Link Up - speed %'u Mbps - %s\n",
699 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
700 "full-duplex" : "half-duplex");
703 plog_info("Link Down\n");
706 if (port_cfg->promiscuous) {
707 rte_eth_promiscuous_enable(port_id);
708 plog_info("\t\tport %u in promiscuous mode\n", port_id);
711 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
712 strcmp(port_cfg->short_name, "i40e") &&
713 strcmp(port_cfg->short_name, "i40e_vf") &&
714 strcmp(port_cfg->short_name, "vmxnet3")) {
715 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
716 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
718 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
721 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
722 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
724 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
728 if (port_cfg->nb_mc_addr) {
729 rte_eth_allmulticast_enable(port_id);
730 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
731 plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
732 port_cfg->nb_mc_addr = 0;
733 rte_eth_allmulticast_disable(port_id);
734 plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
736 plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
737 plog_info("\t\tport %u in multicast mode\n", port_id);
742 void init_port_all(void)
744 uint8_t max_port_idx = prox_last_port_active() + 1;
746 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
747 if (!prox_port_cfg[portid].active) {
750 init_port(&prox_port_cfg[portid]);
754 void close_ports_atexit(void)
756 uint8_t max_port_idx = prox_last_port_active() + 1;
758 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
759 if (!prox_port_cfg[portid].active) {
762 rte_eth_dev_close(portid);
766 void init_port_addr(void)
768 struct prox_port_cfg *port_cfg;
771 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
772 if (!prox_port_cfg[port_id].active) {
775 port_cfg = &prox_port_cfg[port_id];
777 switch (port_cfg->type) {
778 case PROX_PORT_MAC_HW:
779 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
781 case PROX_PORT_MAC_RAND:
782 eth_random_addr(port_cfg->eth_addr.addr_bytes);
784 case PROX_PORT_MAC_SET:
785 if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
786 plog_warn("port %u: failed to set mac address. Error = %d\n", port_id, rc);
792 int port_is_active(uint8_t port_id)
794 if (port_id > PROX_MAX_PORTS) {
795 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
799 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
800 if (!port_cfg->active) {
801 plog_info("Port %u is not active\n", port_id);