2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include <sys/ioctl.h>
37 #include "prox_port_cfg.h"
38 #include "prox_globals.h"
44 #include "prox_cksum.h"
45 #include "stats_irq.h"
46 #include "prox_compat.h"
47 #include "rte_ethdev.h"
50 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
54 int prox_nb_active_ports(void)
57 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
58 ret += prox_port_cfg[i].active;
63 int prox_last_port_active(void)
66 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
67 if (prox_port_cfg[i].active) {
74 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
75 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
76 __attribute__((unused)) void *ret_param)
78 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
79 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
80 __attribute__((unused)) void *ret_param)
82 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
86 if (RTE_ETH_EVENT_INTR_LSC != type) {
87 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
94 rte_atomic32_inc(&lsc);
96 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
101 struct prox_pktmbuf_reinit_args {
102 struct rte_mempool *mp;
103 struct lcore_cfg *lconf;
106 /* standard mbuf initialization procedure */
107 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
109 struct rte_mbuf *mbuf = _m;
111 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
112 mbuf->tx_offload = CALC_TX_OL(sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr));
114 mbuf->pkt.vlan_macip.f.l2_len = sizeof(prox_rte_ether_hdr);
115 mbuf->pkt.vlan_macip.f.l3_len = sizeof(prox_rte_ipv4_hdr);
118 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
121 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
123 struct prox_pktmbuf_reinit_args *init_args = arg;
127 obj += init_args->mp->header_size;
128 m = (struct rte_mbuf*)obj;
130 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
133 #define CONFIGURE_TX_OFFLOAD(flag) \
134 if (port_cfg->requested_tx_offload & flag) {\
135 if (port_cfg->disabled_tx_offload & flag) {\
136 plog_info("\t\t%s disabled by configuration\n", #flag);\
137 port_cfg->requested_tx_offload &= ~flag;\
138 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
139 port_cfg->port_conf.txmode.offloads |= flag;\
140 plog_info("\t\t%s enabled on port\n", #flag);\
141 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
142 port_cfg->tx_conf.offloads |= flag;\
143 plog_info("\t\t%s enabled on queue\n", #flag);\
145 port_cfg->requested_tx_offload &= ~flag;\
146 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
149 plog_info("\t\t%s disabled\n", #flag);\
152 #define CONFIGURE_RX_OFFLOAD(flag) \
153 if (port_cfg->requested_rx_offload & flag) {\
154 if (port_cfg->dev_info.rx_offload_capa & flag) {\
155 port_cfg->port_conf.rxmode.offloads |= flag;\
156 plog_info("\t\t%s enabled on port\n", #flag);\
157 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
158 port_cfg->rx_conf.offloads |= flag;\
159 plog_info("\t\t%s enabled on queue\n", #flag);\
161 port_cfg->requested_rx_offload &= ~flag;\
162 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
165 plog_info("\t\t%s disabled\n", #flag);\
168 static inline uint32_t get_netmask(uint8_t prefix)
171 return(~((uint32_t) -1));
173 return rte_cpu_to_be_32(~((1 << (32 - prefix)) - 1));
176 static void set_ip_address(char *devname, uint32_t *ip, uint8_t prefix)
179 struct sockaddr_in in_addr;
181 uint32_t netmask = get_netmask(prefix);
182 plog_info("Setting netmask to %x\n", netmask);
184 fd = socket(AF_INET, SOCK_DGRAM, 0);
186 memset(&ifreq, 0, sizeof(struct ifreq));
187 memset(&in_addr, 0, sizeof(struct sockaddr_in));
189 in_addr.sin_family = AF_INET;
190 in_addr.sin_addr = *(struct in_addr *)ip;
192 strncpy(ifreq.ifr_name, devname, IFNAMSIZ);
193 ifreq.ifr_addr = *(struct sockaddr *)&in_addr;
194 rc = ioctl(fd, SIOCSIFADDR, &ifreq);
195 PROX_PANIC(rc < 0, "Failed to set IP address %x on device %s: error = %d (%s)\n", *ip, devname, errno, strerror(errno));
197 in_addr.sin_addr = *(struct in_addr *)&netmask;
198 ifreq.ifr_netmask = *(struct sockaddr *)&in_addr;
199 rc = ioctl(fd, SIOCSIFNETMASK, &ifreq);
200 PROX_PANIC(rc < 0, "Failed to set netmask %x (prefix %d) on device %s: error = %d (%s)\n", netmask, prefix, devname, errno, strerror(errno));
204 /* initialize rte devices and check the number of available ports */
205 void init_rte_dev(int use_dummy_devices)
207 uint8_t nb_ports, port_id_max;
208 int port_id_last, rc = 0;
209 struct rte_eth_dev_info dev_info;
210 const struct rte_pci_device *pci_dev;
212 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
213 if (!prox_port_cfg[port_id].active) {
216 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
217 if (port_cfg->vdev[0]) {
218 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
219 char name[MAX_NAME_SIZE], tap[MAX_NAME_SIZE];
220 snprintf(tap, MAX_NAME_SIZE, "net_tap%d", port_id);
221 snprintf(name, MAX_NAME_SIZE, "iface=%s", port_cfg->vdev);
222 rc = rte_vdev_init(tap, name);
224 rc = eth_dev_null_create(tap, name, PROX_RTE_ETHER_MIN_LEN, 0);
226 PROX_PANIC(rc != 0, "Unable to create device %s %s\n", "net tap", port_cfg->vdev);
227 int vdev_port_id = prox_rte_eth_dev_count_avail() - 1;
228 PROX_PANIC(vdev_port_id >= PROX_MAX_PORTS, "Too many port defined %d >= %d\n", vdev_port_id, PROX_MAX_PORTS);
229 plog_info("\tCreating device %s, port %d\n", port_cfg->vdev, vdev_port_id);
230 prox_port_cfg[vdev_port_id].is_vdev = 1;
231 prox_port_cfg[vdev_port_id].active = 1;
232 prox_port_cfg[vdev_port_id].dpdk_mapping = port_id;
233 prox_port_cfg[vdev_port_id].n_txq = 1;
235 if (prox_port_cfg[port_id].vlan_tag) {
237 snprintf(prox_port_cfg[vdev_port_id].name, MAX_NAME_SIZE, "%s_%d", port_cfg->vdev, prox_port_cfg[port_id].vlan_tag);
238 sprintf(command, "ip link add link %s name %s type vlan id %d", port_cfg->vdev, prox_port_cfg[vdev_port_id].name, prox_port_cfg[port_id].vlan_tag);
240 plog_info("Running %s\n", command);
241 plog_info("Using vlan tag %d - added device %s\n", prox_port_cfg[port_id].vlan_tag, prox_port_cfg[vdev_port_id].name);
243 strncpy(prox_port_cfg[vdev_port_id].name, port_cfg->vdev, MAX_NAME_SIZE);
245 prox_port_cfg[port_id].dpdk_mapping = vdev_port_id;
246 prox_port_cfg[vdev_port_id].ip = rte_be_to_cpu_32(prox_port_cfg[port_id].ip);
247 prox_port_cfg[vdev_port_id].prefix = prox_port_cfg[port_id].prefix;
248 prox_port_cfg[vdev_port_id].type = prox_port_cfg[port_id].type;
249 if (prox_port_cfg[vdev_port_id].type == PROX_PORT_MAC_HW) {
250 // If DPDK port MAC set to HW, then make sure the vdev has the same MAC as DPDK port
251 prox_port_cfg[vdev_port_id].type = PROX_PORT_MAC_SET;
252 rte_eth_macaddr_get(port_id, &prox_port_cfg[vdev_port_id].eth_addr);
253 plog_info("\tDPDK port %d MAC address pre-configured to MAC from port %d: "MAC_BYTES_FMT"\n",
254 vdev_port_id, port_id, MAC_BYTES(prox_port_cfg[vdev_port_id].eth_addr.addr_bytes));
256 memcpy(&prox_port_cfg[vdev_port_id].eth_addr, &prox_port_cfg[port_id].eth_addr, sizeof(prox_port_cfg[port_id].eth_addr));
259 nb_ports = prox_rte_eth_dev_count_avail();
260 /* get available ports configuration */
261 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
263 if (use_dummy_devices) {
264 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
265 nb_ports = prox_last_port_active() + 1;
266 plog_info("Creating %u dummy devices\n", nb_ports);
268 char port_name[32] = "0dummy_dev";
269 for (uint32_t i = 0; i < nb_ports; ++i) {
270 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
271 rte_vdev_init(port_name, "size=64,copy=0");
273 eth_dev_null_create(port_name, 0, PROX_RTE_ETHER_MIN_LEN, 0);
278 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
281 else if (prox_last_port_active() != -1) {
282 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
283 plog_info("\tDPDK has found %u ports\n", nb_ports);
286 if (nb_ports > PROX_MAX_PORTS) {
287 plog_warn("\tWarning: I can deal with at most %u ports."
288 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
290 nb_ports = PROX_MAX_PORTS;
294 RTE_ETH_FOREACH_DEV(id) {
296 rte_eth_dev_get_name_by_port(id, name);
297 plog_info("\tFound DPDK port id %u %s\n", id, name);
298 if (id >= PROX_MAX_PORTS) {
299 plog_warn("\tWarning: I can deal with at most %u ports."
300 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
302 prox_port_cfg[id].available = 1;
303 if (id > port_id_max)
307 port_id_last = prox_last_port_active();
308 PROX_PANIC(port_id_last > port_id_max,
309 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
310 port_id_last, port_id_max);
312 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
313 for (uint8_t port_id = 0; port_id <= port_id_last; ++port_id) {
314 /* skip ports that are not enabled */
315 if (!prox_port_cfg[port_id].active) {
317 } else if (prox_port_cfg[port_id].available == 0) {
318 PROX_PANIC(1, "port %u enabled but not available\n", port_id);
320 plog_info("\tGetting info for rte dev %u\n", port_id);
321 rte_eth_dev_info_get(port_id, &dev_info);
322 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
323 port_cfg->socket = -1;
325 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
326 port_cfg->max_txq = dev_info.max_tx_queues;
327 port_cfg->max_rxq = dev_info.max_rx_queues;
328 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
329 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
330 port_cfg->min_tx_desc = dev_info.tx_desc_lim.nb_min;
331 port_cfg->max_tx_desc = dev_info.tx_desc_lim.nb_max;
332 port_cfg->min_rx_desc = dev_info.rx_desc_lim.nb_min;
333 port_cfg->max_rx_desc = dev_info.rx_desc_lim.nb_max;
335 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
336 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
337 plog_info("\tPort %u : %d<=nb_tx_desc<=%d %d<=nb_rx_desc<=%d\n", port_id, port_cfg->min_tx_desc, port_cfg->max_tx_desc, port_cfg->min_rx_desc, port_cfg->max_rx_desc);
339 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
340 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
341 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
342 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
344 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
347 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
351 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
352 pci_dev = dev_info.pci_dev;
354 if (!dev_info.device)
356 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
361 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
362 "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
363 /* Try to find the device's numa node */
365 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
366 FILE* numa_node_fd = fopen(buf, "r");
368 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
369 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
371 port_cfg->socket = strtol(buf, 0, 0);
372 if (port_cfg->socket == -1) {
373 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
375 fclose(numa_node_fd);
378 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
379 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
380 plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
381 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
383 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
384 plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
385 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
387 // Some OVS versions reports that they support UDP offload and no IPv4 offload, but fails when UDP offload is enabled
388 if ((!strcmp(port_cfg->short_name, "virtio")) &&
389 ((port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) == 0) &&
390 (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
391 plog_info("\t\tDisabling UDP cksum on virtio\n");
392 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
397 /* Create rte ring-backed devices */
398 uint8_t init_rte_ring_dev(void)
400 uint8_t nb_ring_dev = 0;
402 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
403 /* skip ports that are not enabled */
404 if (!prox_port_cfg[port_id].active) {
407 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
408 if (port_cfg->rx_ring[0] != '\0') {
409 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
411 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
412 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
413 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
414 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
416 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
417 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
419 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
428 static void print_port_capa(struct prox_port_cfg *port_cfg)
432 port_id = port_cfg - prox_port_cfg;
433 plog_info("\t*** Initializing port %u ***\n", port_id);
434 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
435 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
436 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
437 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
438 plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
440 if (port_cfg->max_link_speed != UINT32_MAX) {
441 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
444 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
445 plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
446 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
447 plog_info("VLAN STRIP | ");
448 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
449 plog_info("IPV4 CKSUM | ");
450 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
451 plog_info("UDP CKSUM | ");
452 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
453 plog_info("TCP CKSUM | ");
454 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
455 plog_info("TCP LRO | ");
456 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
457 plog_info("QINQ STRIP | ");
458 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
459 plog_info("OUTER_IPV4_CKSUM | ");
460 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
461 plog_info("MACSEC STRIP | ");
462 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
463 plog_info("HEADER SPLIT | ");
464 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
465 plog_info("VLAN FILTER | ");
466 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
467 plog_info("VLAN EXTEND | ");
468 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
469 plog_info("JUMBO FRAME | ");
470 #if defined(DEV_RX_OFFLOAD_CRC_STRIP)
471 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
472 plog_info("CRC STRIP | ");
474 #if defined(DEV_RX_OFFLOAD_KEEP_CRC)
475 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)
476 plog_info("KEEP CRC | ");
478 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
479 plog_info("SCATTER | ");
480 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
481 plog_info("TIMESTAMP | ");
482 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
483 plog_info("SECURITY ");
486 plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
487 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
488 plog_info("VLAN INSERT | ");
489 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
490 plog_info("IPV4 CKSUM | ");
491 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
492 plog_info("UDP CKSUM | ");
493 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
494 plog_info("TCP CKSUM | ");
495 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
496 plog_info("SCTP CKSUM | ");
497 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
498 plog_info("TCP TS0 | ");
499 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
500 plog_info("UDP TSO | ");
501 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
502 plog_info("OUTER IPV4 CKSUM | ");
503 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
504 plog_info("QINQ INSERT | ");
505 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
506 plog_info("VLAN TNL TSO | ");
507 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
508 plog_info("GRE TNL TSO | ");
509 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
510 plog_info("IPIP TNL TSO | ");
511 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
512 plog_info("GENEVE TNL TSO | ");
513 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
514 plog_info("MACSEC INSERT | ");
515 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
516 plog_info("MT LOCKFREE | ");
517 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
518 plog_info("MULTI SEG | ");
519 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
520 plog_info("SECURITY | ");
521 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
522 plog_info("UDP TNL TSO | ");
523 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
524 plog_info("IP TNL TSO | ");
527 plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
528 plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
529 plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
530 plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
531 plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
535 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
537 port_cfg->max_link_speed = UINT32_MAX;
539 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
540 // virtio and vmxnet3 reports fake max_link_speed
541 if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
542 // Get link_speed from highest capability from the port
543 // This will be used by gen and lat for extrapolation purposes
544 // The negotiated link_speed (as reported by rte_eth_link_get
545 // or rte_eth_link_get_nowait) might be reported too late
546 // and might result in wrong exrapolation, and hence should not be used
547 // for extrapolation purposes
548 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
549 port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
550 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
551 port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
552 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
553 port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
554 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
555 port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
556 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
557 port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
558 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
559 port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
560 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
561 port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
562 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
563 port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
564 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
565 port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
566 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
567 port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
568 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
569 port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
570 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
571 port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
577 static void init_port(struct prox_port_cfg *port_cfg)
579 static char dummy_pool_name[] = "0_dummy";
580 struct rte_eth_link link;
584 get_max_link_speed(port_cfg);
585 print_port_capa(port_cfg);
586 port_id = port_cfg - prox_port_cfg;
587 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
588 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
590 if (port_cfg->n_rxq == 0) {
591 /* not receiving on this port */
592 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
594 uint32_t mbuf_size = TX_MBUF_SIZE;
595 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
596 mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
598 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
599 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
600 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
602 sizeof(struct rte_pktmbuf_pool_private),
603 rte_pktmbuf_pool_init, NULL,
604 prox_pktmbuf_init, 0,
605 port_cfg->socket, 0);
606 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
607 port_cfg->socket, port_cfg->n_rxd);
608 dummy_pool_name[0]++;
610 // Most pmd should now support setting mtu
611 if (port_cfg->mtu + PROX_RTE_ETHER_HDR_LEN + PROX_RTE_ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
612 plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
613 port_cfg->mtu = port_cfg->max_rx_pkt_len;
615 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
616 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
618 plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
620 if (port_cfg->n_txq == 0) {
621 /* not sending on this port */
622 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
627 if (port_cfg->n_rxq > 1) {
628 // Enable RSS if multiple receive queues
629 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
630 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
631 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
632 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
633 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP|ETH_RSS_UDP;
635 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
639 // Make sure that the requested RSS offload is supported by the PMD
640 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
641 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
643 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
645 // rxmode such as hw src strip
646 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
647 #if defined (DEV_RX_OFFLOAD_CRC_STRIP)
648 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
650 #if defined (DEV_RX_OFFLOAD_KEEP_CRC)
651 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_KEEP_CRC);
653 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
654 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
656 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
657 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
659 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
660 port_cfg->port_conf.rxmode.jumbo_frame = 1;
664 // IPV4, UDP, SCTP Checksums
665 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
666 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
667 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
668 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
670 if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
671 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
672 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
674 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
675 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
676 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
680 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
681 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
683 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
684 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
685 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
686 } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
687 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
689 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
691 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
692 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
694 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
698 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
699 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
701 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
702 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
704 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
707 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
708 port_id, port_cfg->n_rxq, port_cfg->n_txq);
710 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
711 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
713 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
714 !strcmp(port_cfg->short_name, "virtio") ||
715 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
716 !strcmp(port_cfg->short_name, "i40e") ||
718 !strcmp(port_cfg->short_name, "i40e_vf") ||
719 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
720 !strcmp(port_cfg->driver_name, "") || /* NULL device */
721 !strcmp(port_cfg->short_name, "vmxnet3")) {
722 port_cfg->port_conf.intr_conf.lsc = 0;
723 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
726 if (port_cfg->lsc_set_explicitely) {
727 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
728 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
730 if (port_cfg->n_txd < port_cfg->min_tx_desc) {
731 plog_info("\t\tNumber of TX descriptors is set to %d (minimum required for %s\n", port_cfg->min_tx_desc, port_cfg->short_name);
732 port_cfg->n_txd = port_cfg->min_tx_desc;
735 if (port_cfg->n_rxd < port_cfg->min_rx_desc) {
736 plog_info("\t\tNumber of RX descriptors is set to %d (minimum required for %s\n", port_cfg->min_rx_desc, port_cfg->short_name);
737 port_cfg->n_rxd = port_cfg->min_rx_desc;
740 if (port_cfg->n_txd > port_cfg->max_tx_desc) {
741 plog_info("\t\tNumber of TX descriptors is set to %d (maximum required for %s\n", port_cfg->max_tx_desc, port_cfg->short_name);
742 port_cfg->n_txd = port_cfg->max_tx_desc;
745 if (port_cfg->n_rxd > port_cfg->max_rx_desc) {
746 plog_info("\t\tNumber of RX descriptors is set to %d (maximum required for %s\n", port_cfg->max_rx_desc, port_cfg->short_name);
747 port_cfg->n_rxd = port_cfg->max_rx_desc;
750 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
751 port_cfg->n_txq, &port_cfg->port_conf);
752 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
754 if (port_cfg->port_conf.intr_conf.lsc) {
755 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
758 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
760 /* initialize TX queues first */
761 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
762 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
763 queue_id, port_cfg->socket, port_cfg->n_txd);
764 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
765 port_cfg->socket, &port_cfg->tx_conf);
766 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
769 /* initialize RX queues */
770 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
771 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
772 queue_id, port_id, port_cfg->socket,
773 port_cfg->n_rxd, port_cfg->pool[queue_id]);
774 ret = rte_eth_rx_queue_setup(port_id, queue_id,
776 port_cfg->socket, &port_cfg->rx_conf,
777 port_cfg->pool[queue_id]);
778 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
781 plog_info("\t\tStarting up port %u ...", port_id);
782 ret = rte_eth_dev_start(port_id);
784 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
785 plog_info(" done: ");
787 if ((prox_port_cfg[port_id].ip) && (prox_port_cfg[port_id].is_vdev)) {
788 set_ip_address(prox_port_cfg[port_id].name, &prox_port_cfg[port_id].ip, prox_port_cfg[port_id].prefix);
790 /* Getting link status can be done without waiting if Link
791 State Interrupt is enabled since in that case, if the link
792 is recognized as being down, an interrupt will notify that
794 if (port_cfg->port_conf.intr_conf.lsc)
795 rte_eth_link_get_nowait(port_id, &link);
797 rte_eth_link_get(port_id, &link);
799 port_cfg->link_up = link.link_status;
800 port_cfg->link_speed = link.link_speed;
802 if (link.link_status) {
803 plog_info("Link Up - speed %'u Mbps - %s\n",
805 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
806 "full-duplex" : "half-duplex");
809 plog_info("Link Down\n");
812 if (port_cfg->promiscuous) {
813 rte_eth_promiscuous_enable(port_id);
814 plog_info("\t\tport %u in promiscuous mode\n", port_id);
817 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
818 strcmp(port_cfg->short_name, "i40e") &&
819 strcmp(port_cfg->short_name, "i40e_vf") &&
820 strcmp(port_cfg->short_name, "vmxnet3")) {
821 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
822 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
824 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
827 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
828 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
830 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
834 if (port_cfg->nb_mc_addr) {
835 rte_eth_allmulticast_enable(port_id);
836 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
837 plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
838 port_cfg->nb_mc_addr = 0;
839 rte_eth_allmulticast_disable(port_id);
840 plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
842 plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
843 plog_info("\t\tport %u in multicast mode\n", port_id);
848 void init_port_all(void)
850 enum rte_proc_type_t proc_type;
851 proc_type = rte_eal_process_type();
852 if (proc_type == RTE_PROC_SECONDARY) {
853 plog_info("\tSkipping port initialization as secondary process\n");
856 uint8_t max_port_idx = prox_last_port_active() + 1;
858 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
859 if (!prox_port_cfg[portid].active) {
862 init_port(&prox_port_cfg[portid]);
866 void close_ports_atexit(void)
868 uint8_t max_port_idx = prox_last_port_active() + 1;
870 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
871 if (!prox_port_cfg[portid].active) {
874 plog_info("Closing port %u\n", portid);
875 rte_eth_dev_close(portid);
878 struct lcore_cfg *lconf = NULL;
879 struct task_args *targ;
880 while (core_targ_next(&lconf, &targ, 0) == 0) {
882 rte_mempool_free(targ->pool);
883 plog_info("freeing pool %p\n", targ->pool);
889 void init_port_addr(void)
891 struct prox_port_cfg *port_cfg;
892 enum rte_proc_type_t proc_type;
895 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
896 if (!prox_port_cfg[port_id].active) {
899 port_cfg = &prox_port_cfg[port_id];
901 switch (port_cfg->type) {
902 case PROX_PORT_MAC_HW:
903 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
905 case PROX_PORT_MAC_RAND:
906 prox_rte_eth_random_addr(port_cfg->eth_addr.addr_bytes);
908 case PROX_PORT_MAC_SET:
909 proc_type = rte_eal_process_type();
910 if (proc_type == RTE_PROC_SECONDARY) {
911 plog_warn("\tport %u: unable to change port mac address as secondary process\n", port_id);
912 } else if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
913 plog_warn("\tport %u: failed to set mac address. Error = %d\n", port_id, rc);
915 plog_info("Setting MAC to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
921 int port_is_active(uint8_t port_id)
923 if (port_id > PROX_MAX_PORTS) {
924 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
928 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
929 if (!port_cfg->active) {
930 plog_info("Port %u is not active\n", port_id);