2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include <sys/ioctl.h>
37 #include "prox_port_cfg.h"
38 #include "prox_globals.h"
44 #include "prox_cksum.h"
45 #include "stats_irq.h"
46 #include "prox_compat.h"
47 #include "rte_ethdev.h"
49 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
53 int prox_nb_active_ports(void)
56 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
57 ret += prox_port_cfg[i].active;
62 int prox_last_port_active(void)
65 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
66 if (prox_port_cfg[i].active) {
73 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
74 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
75 __attribute__((unused)) void *ret_param)
77 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
78 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
79 __attribute__((unused)) void *ret_param)
81 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
85 if (RTE_ETH_EVENT_INTR_LSC != type) {
86 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
93 rte_atomic32_inc(&lsc);
95 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
100 struct prox_pktmbuf_reinit_args {
101 struct rte_mempool *mp;
102 struct lcore_cfg *lconf;
105 /* standard mbuf initialization procedure */
106 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
108 struct rte_mbuf *mbuf = _m;
110 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
111 mbuf->tx_offload = CALC_TX_OL(sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr));
113 mbuf->pkt.vlan_macip.f.l2_len = sizeof(prox_rte_ether_hdr);
114 mbuf->pkt.vlan_macip.f.l3_len = sizeof(prox_rte_ipv4_hdr);
117 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
120 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
122 struct prox_pktmbuf_reinit_args *init_args = arg;
126 obj += init_args->mp->header_size;
127 m = (struct rte_mbuf*)obj;
129 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
132 #define CONFIGURE_TX_OFFLOAD(flag) \
133 if (port_cfg->requested_tx_offload & flag) {\
134 if (port_cfg->disabled_tx_offload & flag) {\
135 plog_info("\t\t%s disabled by configuration\n", #flag);\
136 port_cfg->requested_tx_offload &= ~flag;\
137 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
138 port_cfg->port_conf.txmode.offloads |= flag;\
139 plog_info("\t\t%s enabled on port\n", #flag);\
140 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
141 port_cfg->tx_conf.offloads |= flag;\
142 plog_info("\t\t%s enabled on queue\n", #flag);\
144 port_cfg->requested_tx_offload &= ~flag;\
145 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
148 plog_info("\t\t%s disabled\n", #flag);\
151 #define CONFIGURE_RX_OFFLOAD(flag) \
152 if (port_cfg->requested_rx_offload & flag) {\
153 if (port_cfg->dev_info.rx_offload_capa & flag) {\
154 port_cfg->port_conf.rxmode.offloads |= flag;\
155 plog_info("\t\t%s enabled on port\n", #flag);\
156 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
157 port_cfg->rx_conf.offloads |= flag;\
158 plog_info("\t\t%s enabled on queue\n", #flag);\
160 port_cfg->requested_rx_offload &= ~flag;\
161 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
164 plog_info("\t\t%s disabled\n", #flag);\
168 static void set_ip_address (char *devname, uint32_t *ip)
171 struct sockaddr_in in_addr;
174 memset(&ifreq, 0, sizeof(struct ifreq));
175 memset(&in_addr, 0, sizeof(struct sockaddr_in));
177 in_addr.sin_family = AF_INET;
178 in_addr.sin_addr = *(struct in_addr *)ip;
179 fd = socket(in_addr.sin_family, SOCK_DGRAM, 0);
181 strncpy(ifreq.ifr_name, devname, IFNAMSIZ);
182 ifreq.ifr_addr = *(struct sockaddr *)&in_addr;
183 rc = ioctl(fd, SIOCSIFADDR, &ifreq);
184 PROX_PANIC(rc < 0, "Failed to set IP address %d on device %s: error = %d\n", *ip, devname, errno);
188 /* initialize rte devices and check the number of available ports */
189 void init_rte_dev(int use_dummy_devices)
191 uint8_t nb_ports, port_id_max;
192 int port_id_last, rc = 0;
193 struct rte_eth_dev_info dev_info;
194 const struct rte_pci_device *pci_dev;
196 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
197 if (!prox_port_cfg[port_id].active) {
200 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
201 if (port_cfg->vdev[0]) {
202 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
203 char name[MAX_NAME_SIZE], tap[MAX_NAME_SIZE];
204 snprintf(tap, MAX_NAME_SIZE, "net_tap%d", port_id);
205 snprintf(name, MAX_NAME_SIZE, "iface=%s", port_cfg->vdev);
206 rc = rte_vdev_init(tap, name);
208 rc = eth_dev_null_create(tap, name, PROX_RTE_ETHER_MIN_LEN, 0);
210 PROX_PANIC(rc != 0, "Unable to create device %s %s\n", "net tap", port_cfg->vdev);
211 int vdev_port_id = prox_rte_eth_dev_count_avail() - 1;
212 PROX_PANIC(vdev_port_id >= PROX_MAX_PORTS, "Too many port defined %d >= %d\n", vdev_port_id, PROX_MAX_PORTS);
213 plog_info("\tCreating device %s, port %d\n", port_cfg->vdev, vdev_port_id);
214 prox_port_cfg[vdev_port_id].active = 1;
215 prox_port_cfg[vdev_port_id].dpdk_mapping = port_id;
216 prox_port_cfg[vdev_port_id].n_txq = 1;
218 if (prox_port_cfg[port_id].vlan_tag) {
220 snprintf(prox_port_cfg[vdev_port_id].name, MAX_NAME_SIZE, "%s_%d", port_cfg->vdev, prox_port_cfg[port_id].vlan_tag);
221 sprintf(command, "ip link add link %s name %s type vlan id %d", port_cfg->vdev, prox_port_cfg[vdev_port_id].name, prox_port_cfg[port_id].vlan_tag);
223 plog_info("Running %s\n", command);
224 plog_info("Using vlan tag %d - added device %s\n", prox_port_cfg[port_id].vlan_tag, prox_port_cfg[vdev_port_id].name);
226 strncpy(prox_port_cfg[vdev_port_id].name, port_cfg->vdev, MAX_NAME_SIZE);
228 prox_port_cfg[port_id].dpdk_mapping = vdev_port_id;
229 prox_port_cfg[vdev_port_id].ip = rte_be_to_cpu_32(prox_port_cfg[port_id].ip);
230 prox_port_cfg[port_id].ip = 0; // So only vdev has an IP associated
231 prox_port_cfg[vdev_port_id].type = prox_port_cfg[port_id].type;
232 if (prox_port_cfg[vdev_port_id].type == PROX_PORT_MAC_HW) {
233 // If DPDK port MAC set to HW, then make sure the vdev has the same MAC as DPDK port
234 prox_port_cfg[vdev_port_id].type = PROX_PORT_MAC_SET;
235 rte_eth_macaddr_get(port_id, &prox_port_cfg[vdev_port_id].eth_addr);
236 plog_info("\tDPDK port %d MAC address pre-configured to MAC from port %d: "MAC_BYTES_FMT"\n",
237 vdev_port_id, port_id, MAC_BYTES(prox_port_cfg[vdev_port_id].eth_addr.addr_bytes));
239 memcpy(&prox_port_cfg[vdev_port_id].eth_addr, &prox_port_cfg[port_id].eth_addr, sizeof(prox_port_cfg[port_id].eth_addr));
242 nb_ports = prox_rte_eth_dev_count_avail();
243 /* get available ports configuration */
244 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
246 if (use_dummy_devices) {
247 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
248 nb_ports = prox_last_port_active() + 1;
249 plog_info("Creating %u dummy devices\n", nb_ports);
251 char port_name[32] = "0dummy_dev";
252 for (uint32_t i = 0; i < nb_ports; ++i) {
253 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
254 rte_vdev_init(port_name, "size=64,copy=0");
256 eth_dev_null_create(port_name, 0, PROX_RTE_ETHER_MIN_LEN, 0);
261 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
264 else if (prox_last_port_active() != -1) {
265 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
266 plog_info("\tDPDK has found %u ports\n", nb_ports);
269 if (nb_ports > PROX_MAX_PORTS) {
270 plog_warn("\tWarning: I can deal with at most %u ports."
271 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
273 nb_ports = PROX_MAX_PORTS;
277 RTE_ETH_FOREACH_DEV(id) {
279 rte_eth_dev_get_name_by_port(id, name);
280 plog_info("\tFound DPDK port id %u %s\n", id, name);
281 if (id >= PROX_MAX_PORTS) {
282 plog_warn("\tWarning: I can deal with at most %u ports."
283 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
285 prox_port_cfg[id].available = 1;
286 if (id > port_id_max)
290 port_id_last = prox_last_port_active();
291 PROX_PANIC(port_id_last > port_id_max,
292 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
293 port_id_last, port_id_max);
295 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
296 for (uint8_t port_id = 0; port_id <= port_id_last; ++port_id) {
297 /* skip ports that are not enabled */
298 if (!prox_port_cfg[port_id].active) {
300 } else if (prox_port_cfg[port_id].available == 0) {
301 PROX_PANIC(1, "port %u enabled but not available\n", port_id);
303 plog_info("\tGetting info for rte dev %u\n", port_id);
304 rte_eth_dev_info_get(port_id, &dev_info);
305 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
306 port_cfg->socket = -1;
308 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
309 port_cfg->max_txq = dev_info.max_tx_queues;
310 port_cfg->max_rxq = dev_info.max_rx_queues;
311 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
312 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
313 port_cfg->min_tx_desc = dev_info.tx_desc_lim.nb_min;
314 port_cfg->max_tx_desc = dev_info.tx_desc_lim.nb_max;
315 port_cfg->min_rx_desc = dev_info.rx_desc_lim.nb_min;
316 port_cfg->max_rx_desc = dev_info.rx_desc_lim.nb_max;
318 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
319 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
320 plog_info("\tPort %u : %d<=nb_tx_desc<=%d %d<=nb_rx_desc<=%d\n", port_id, port_cfg->min_tx_desc, port_cfg->max_tx_desc, port_cfg->min_rx_desc, port_cfg->max_rx_desc);
322 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
323 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
324 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
325 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
327 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
330 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
334 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
335 pci_dev = dev_info.pci_dev;
337 if (!dev_info.device)
339 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
344 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
345 "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
346 /* Try to find the device's numa node */
348 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
349 FILE* numa_node_fd = fopen(buf, "r");
351 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
352 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
354 port_cfg->socket = strtol(buf, 0, 0);
355 if (port_cfg->socket == -1) {
356 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
358 fclose(numa_node_fd);
361 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
362 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
363 plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
364 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
366 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
367 plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
368 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
370 // Some OVS versions reports that they support UDP offload and no IPv4 offload, but fails when UDP offload is enabled
371 if ((!strcmp(port_cfg->short_name, "virtio")) &&
372 ((port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) == 0) &&
373 (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
374 plog_info("\t\tDisabling UDP cksum on virtio\n");
375 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
380 /* Create rte ring-backed devices */
381 uint8_t init_rte_ring_dev(void)
383 uint8_t nb_ring_dev = 0;
385 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
386 /* skip ports that are not enabled */
387 if (!prox_port_cfg[port_id].active) {
390 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
391 if (port_cfg->rx_ring[0] != '\0') {
392 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
394 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
395 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
396 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
397 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
399 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
400 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
402 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
411 static void print_port_capa(struct prox_port_cfg *port_cfg)
415 port_id = port_cfg - prox_port_cfg;
416 plog_info("\t*** Initializing port %u ***\n", port_id);
417 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
418 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
419 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
420 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
421 plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
423 if (port_cfg->max_link_speed != UINT32_MAX) {
424 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
427 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
428 plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
429 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
430 plog_info("VLAN STRIP | ");
431 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
432 plog_info("IPV4 CKSUM | ");
433 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
434 plog_info("UDP CKSUM | ");
435 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
436 plog_info("TCP CKSUM | ");
437 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
438 plog_info("TCP LRO | ");
439 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
440 plog_info("QINQ STRIP | ");
441 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
442 plog_info("OUTER_IPV4_CKSUM | ");
443 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
444 plog_info("MACSEC STRIP | ");
445 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
446 plog_info("HEADER SPLIT | ");
447 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
448 plog_info("VLAN FILTER | ");
449 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
450 plog_info("VLAN EXTEND | ");
451 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
452 plog_info("JUMBO FRAME | ");
453 #if defined(DEV_RX_OFFLOAD_CRC_STRIP)
454 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
455 plog_info("CRC STRIP | ");
457 #if defined(DEV_RX_OFFLOAD_KEEP_CRC)
458 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)
459 plog_info("KEEP CRC | ");
461 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
462 plog_info("SCATTER | ");
463 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
464 plog_info("TIMESTAMP | ");
465 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
466 plog_info("SECURITY ");
469 plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
470 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
471 plog_info("VLAN INSERT | ");
472 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
473 plog_info("IPV4 CKSUM | ");
474 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
475 plog_info("UDP CKSUM | ");
476 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
477 plog_info("TCP CKSUM | ");
478 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
479 plog_info("SCTP CKSUM | ");
480 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
481 plog_info("TCP TS0 | ");
482 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
483 plog_info("UDP TSO | ");
484 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
485 plog_info("OUTER IPV4 CKSUM | ");
486 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
487 plog_info("QINQ INSERT | ");
488 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
489 plog_info("VLAN TNL TSO | ");
490 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
491 plog_info("GRE TNL TSO | ");
492 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
493 plog_info("IPIP TNL TSO | ");
494 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
495 plog_info("GENEVE TNL TSO | ");
496 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
497 plog_info("MACSEC INSERT | ");
498 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
499 plog_info("MT LOCKFREE | ");
500 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
501 plog_info("MULTI SEG | ");
502 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
503 plog_info("SECURITY | ");
504 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
505 plog_info("UDP TNL TSO | ");
506 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
507 plog_info("IP TNL TSO | ");
510 plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
511 plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
512 plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
513 plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
514 plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
518 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
520 port_cfg->max_link_speed = UINT32_MAX;
522 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
523 // virtio and vmxnet3 reports fake max_link_speed
524 if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
525 // Get link_speed from highest capability from the port
526 // This will be used by gen and lat for extrapolation purposes
527 // The negotiated link_speed (as reported by rte_eth_link_get
528 // or rte_eth_link_get_nowait) might be reported too late
529 // and might result in wrong exrapolation, and hence should not be used
530 // for extrapolation purposes
531 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
532 port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
533 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
534 port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
535 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
536 port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
537 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
538 port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
539 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
540 port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
541 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
542 port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
543 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
544 port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
545 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
546 port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
547 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
548 port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
549 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
550 port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
551 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
552 port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
553 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
554 port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
560 static void init_port(struct prox_port_cfg *port_cfg)
562 static char dummy_pool_name[] = "0_dummy";
563 struct rte_eth_link link;
567 get_max_link_speed(port_cfg);
568 print_port_capa(port_cfg);
569 port_id = port_cfg - prox_port_cfg;
570 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
571 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
573 if (port_cfg->n_rxq == 0) {
574 /* not receiving on this port */
575 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
577 uint32_t mbuf_size = TX_MBUF_SIZE;
578 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
579 mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
581 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
582 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
583 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
585 sizeof(struct rte_pktmbuf_pool_private),
586 rte_pktmbuf_pool_init, NULL,
587 prox_pktmbuf_init, 0,
588 port_cfg->socket, 0);
589 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
590 port_cfg->socket, port_cfg->n_rxd);
591 dummy_pool_name[0]++;
593 // Most pmd should now support setting mtu
594 if (port_cfg->mtu + PROX_RTE_ETHER_HDR_LEN + PROX_RTE_ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
595 plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
596 port_cfg->mtu = port_cfg->max_rx_pkt_len;
598 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
599 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
601 plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
603 if (port_cfg->n_txq == 0) {
604 /* not sending on this port */
605 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
610 if (port_cfg->n_rxq > 1) {
611 // Enable RSS if multiple receive queues
612 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
613 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
614 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
615 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
616 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP|ETH_RSS_UDP;
618 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
622 // Make sure that the requested RSS offload is supported by the PMD
623 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
624 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
626 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
628 // rxmode such as hw src strip
629 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
630 #if defined (DEV_RX_OFFLOAD_CRC_STRIP)
631 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
633 #if defined (DEV_RX_OFFLOAD_KEEP_CRC)
634 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_KEEP_CRC);
636 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
637 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
639 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
640 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
642 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
643 port_cfg->port_conf.rxmode.jumbo_frame = 1;
647 // IPV4, UDP, SCTP Checksums
648 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
649 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
650 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
651 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
653 if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
654 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
655 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
657 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
658 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
659 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
663 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
664 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
666 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
667 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
668 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
669 } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
670 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
672 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
674 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
675 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
677 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
681 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
682 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
684 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
685 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
687 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
690 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
691 port_id, port_cfg->n_rxq, port_cfg->n_txq);
693 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
694 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
696 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
697 !strcmp(port_cfg->short_name, "virtio") ||
698 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
699 !strcmp(port_cfg->short_name, "i40e") ||
701 !strcmp(port_cfg->short_name, "i40e_vf") ||
702 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
703 !strcmp(port_cfg->driver_name, "") || /* NULL device */
704 !strcmp(port_cfg->short_name, "vmxnet3")) {
705 port_cfg->port_conf.intr_conf.lsc = 0;
706 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
709 if (port_cfg->lsc_set_explicitely) {
710 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
711 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
713 if (port_cfg->n_txd < port_cfg->min_tx_desc) {
714 plog_info("\t\tNumber of TX descriptors is set to %d (minimum required for %s\n", port_cfg->min_tx_desc, port_cfg->short_name);
715 port_cfg->n_txd = port_cfg->min_tx_desc;
718 if (port_cfg->n_rxd < port_cfg->min_rx_desc) {
719 plog_info("\t\tNumber of RX descriptors is set to %d (minimum required for %s\n", port_cfg->min_rx_desc, port_cfg->short_name);
720 port_cfg->n_rxd = port_cfg->min_rx_desc;
723 if (port_cfg->n_txd > port_cfg->max_tx_desc) {
724 plog_info("\t\tNumber of TX descriptors is set to %d (maximum required for %s\n", port_cfg->max_tx_desc, port_cfg->short_name);
725 port_cfg->n_txd = port_cfg->max_tx_desc;
728 if (port_cfg->n_rxd > port_cfg->max_rx_desc) {
729 plog_info("\t\tNumber of RX descriptors is set to %d (maximum required for %s\n", port_cfg->max_rx_desc, port_cfg->short_name);
730 port_cfg->n_rxd = port_cfg->max_rx_desc;
733 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
734 port_cfg->n_txq, &port_cfg->port_conf);
735 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
737 if (port_cfg->port_conf.intr_conf.lsc) {
738 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
741 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
743 /* initialize TX queues first */
744 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
745 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
746 queue_id, port_cfg->socket, port_cfg->n_txd);
747 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
748 port_cfg->socket, &port_cfg->tx_conf);
749 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
752 /* initialize RX queues */
753 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
754 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
755 queue_id, port_id, port_cfg->socket,
756 port_cfg->n_rxd, port_cfg->pool[queue_id]);
757 ret = rte_eth_rx_queue_setup(port_id, queue_id,
759 port_cfg->socket, &port_cfg->rx_conf,
760 port_cfg->pool[queue_id]);
761 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
764 plog_info("\t\tStarting up port %u ...", port_id);
765 ret = rte_eth_dev_start(port_id);
767 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
768 plog_info(" done: ");
770 if (prox_port_cfg[port_id].ip) {
771 set_ip_address(prox_port_cfg[port_id].name, &prox_port_cfg[port_id].ip);
773 /* Getting link status can be done without waiting if Link
774 State Interrupt is enabled since in that case, if the link
775 is recognized as being down, an interrupt will notify that
777 if (port_cfg->port_conf.intr_conf.lsc)
778 rte_eth_link_get_nowait(port_id, &link);
780 rte_eth_link_get(port_id, &link);
782 port_cfg->link_up = link.link_status;
783 port_cfg->link_speed = link.link_speed;
785 if (link.link_status) {
786 plog_info("Link Up - speed %'u Mbps - %s\n",
788 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
789 "full-duplex" : "half-duplex");
792 plog_info("Link Down\n");
795 if (port_cfg->promiscuous) {
796 rte_eth_promiscuous_enable(port_id);
797 plog_info("\t\tport %u in promiscuous mode\n", port_id);
800 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
801 strcmp(port_cfg->short_name, "i40e") &&
802 strcmp(port_cfg->short_name, "i40e_vf") &&
803 strcmp(port_cfg->short_name, "vmxnet3")) {
804 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
805 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
807 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
810 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
811 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
813 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
817 if (port_cfg->nb_mc_addr) {
818 rte_eth_allmulticast_enable(port_id);
819 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
820 plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
821 port_cfg->nb_mc_addr = 0;
822 rte_eth_allmulticast_disable(port_id);
823 plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
825 plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
826 plog_info("\t\tport %u in multicast mode\n", port_id);
831 void init_port_all(void)
833 enum rte_proc_type_t proc_type;
834 proc_type = rte_eal_process_type();
835 if (proc_type == RTE_PROC_SECONDARY) {
836 plog_info("\tSkipping port initialization as secondary process\n");
839 uint8_t max_port_idx = prox_last_port_active() + 1;
841 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
842 if (!prox_port_cfg[portid].active) {
845 init_port(&prox_port_cfg[portid]);
849 void close_ports_atexit(void)
851 uint8_t max_port_idx = prox_last_port_active() + 1;
853 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
854 if (!prox_port_cfg[portid].active) {
857 plog_info("Closing port %u\n", portid);
858 rte_eth_dev_close(portid);
862 void init_port_addr(void)
864 struct prox_port_cfg *port_cfg;
865 enum rte_proc_type_t proc_type;
868 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
869 if (!prox_port_cfg[port_id].active) {
872 port_cfg = &prox_port_cfg[port_id];
874 switch (port_cfg->type) {
875 case PROX_PORT_MAC_HW:
876 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
878 case PROX_PORT_MAC_RAND:
879 prox_rte_eth_random_addr(port_cfg->eth_addr.addr_bytes);
881 case PROX_PORT_MAC_SET:
882 proc_type = rte_eal_process_type();
883 if (proc_type == RTE_PROC_SECONDARY) {
884 plog_warn("\tport %u: unable to change port mac address as secondary process\n", port_id);
885 } else if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
886 plog_warn("\tport %u: failed to set mac address. Error = %d\n", port_id, rc);
888 plog_info("Setting MAC to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
894 int port_is_active(uint8_t port_id)
896 if (port_id > PROX_MAX_PORTS) {
897 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
901 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
902 if (!port_cfg->active) {
903 plog_info("Port %u is not active\n", port_id);