2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include <sys/ioctl.h>
37 #include "prox_port_cfg.h"
38 #include "prox_globals.h"
44 #include "prox_cksum.h"
45 #include "stats_irq.h"
46 #include "prox_compat.h"
47 #include "rte_ethdev.h"
49 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
52 int prox_nb_active_ports(void)
55 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
56 ret += prox_port_cfg[i].active;
61 int prox_last_port_active(void)
64 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
65 if (prox_port_cfg[i].active) {
72 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
73 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
74 __attribute__((unused)) void *ret_param)
76 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
77 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
78 __attribute__((unused)) void *ret_param)
80 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
84 if (RTE_ETH_EVENT_INTR_LSC != type) {
85 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
92 rte_atomic32_inc(&lsc);
94 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
99 struct prox_pktmbuf_reinit_args {
100 struct rte_mempool *mp;
101 struct lcore_cfg *lconf;
104 /* standard mbuf initialization procedure */
105 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
107 struct rte_mbuf *mbuf = _m;
109 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
110 mbuf->tx_offload = CALC_TX_OL(sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr));
112 mbuf->pkt.vlan_macip.f.l2_len = sizeof(prox_rte_ether_hdr);
113 mbuf->pkt.vlan_macip.f.l3_len = sizeof(prox_rte_ipv4_hdr);
116 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
119 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
121 struct prox_pktmbuf_reinit_args *init_args = arg;
125 obj += init_args->mp->header_size;
126 m = (struct rte_mbuf*)obj;
128 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
131 #define CONFIGURE_TX_OFFLOAD(flag) \
132 if (port_cfg->requested_tx_offload & flag) {\
133 if (port_cfg->disabled_tx_offload & flag) {\
134 plog_info("\t\t%s disabled by configuration\n", #flag);\
135 port_cfg->requested_tx_offload &= ~flag;\
136 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
137 port_cfg->port_conf.txmode.offloads |= flag;\
138 plog_info("\t\t%s enabled on port\n", #flag);\
139 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
140 port_cfg->tx_conf.offloads |= flag;\
141 plog_info("\t\t%s enabled on queue\n", #flag);\
143 port_cfg->requested_tx_offload &= ~flag;\
144 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
147 plog_info("\t\t%s disabled\n", #flag);\
150 #define CONFIGURE_RX_OFFLOAD(flag) \
151 if (port_cfg->requested_rx_offload & flag) {\
152 if (port_cfg->dev_info.rx_offload_capa & flag) {\
153 port_cfg->port_conf.rxmode.offloads |= flag;\
154 plog_info("\t\t%s enabled on port\n", #flag);\
155 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
156 port_cfg->rx_conf.offloads |= flag;\
157 plog_info("\t\t%s enabled on queue\n", #flag);\
159 port_cfg->requested_rx_offload &= ~flag;\
160 plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
163 plog_info("\t\t%s disabled\n", #flag);\
167 static void set_ip_address (char *devname, uint32_t *ip)
170 struct sockaddr_in in_addr;
173 memset(&ifreq, 0, sizeof(struct ifreq));
174 memset(&in_addr, 0, sizeof(struct sockaddr_in));
176 in_addr.sin_family = AF_INET;
177 in_addr.sin_addr = *(struct in_addr *)ip;
178 fd = socket(in_addr.sin_family, SOCK_DGRAM, 0);
180 strncpy(ifreq.ifr_name, devname, IFNAMSIZ);
181 ifreq.ifr_addr = *(struct sockaddr *)&in_addr;
182 rc = ioctl(fd, SIOCSIFADDR, &ifreq);
183 PROX_PANIC(rc < 0, "Failed to set IP address %d on device %s: error = %d\n", *ip, devname, errno);
187 /* initialize rte devices and check the number of available ports */
188 void init_rte_dev(int use_dummy_devices)
190 uint8_t nb_ports, port_id_max;
191 int port_id_last, rc = 0;
192 struct rte_eth_dev_info dev_info;
193 const struct rte_pci_device *pci_dev;
195 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
196 if (!prox_port_cfg[port_id].active) {
199 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
200 if (port_cfg->vdev[0]) {
201 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
202 char name[MAX_NAME_SIZE], tap[MAX_NAME_SIZE];
203 snprintf(tap, MAX_NAME_SIZE, "net_tap%d", port_id);
204 snprintf(name, MAX_NAME_SIZE, "iface=%s", port_cfg->vdev);
205 rc = rte_vdev_init(tap, name);
207 rc = eth_dev_null_create(tap, name, PROX_RTE_ETHER_MIN_LEN, 0);
209 PROX_PANIC(rc != 0, "Unable to create device %s %s\n", "net tap", port_cfg->vdev);
210 int vdev_port_id = rte_eth_dev_count() - 1;
211 PROX_PANIC(vdev_port_id >= PROX_MAX_PORTS, "Too many port defined %d >= %d\n", vdev_port_id, PROX_MAX_PORTS);
212 plog_info("\tCreating device %s, port %d\n", port_cfg->vdev, vdev_port_id);
213 prox_port_cfg[vdev_port_id].active = 1;
214 prox_port_cfg[vdev_port_id].dpdk_mapping = port_id;
215 prox_port_cfg[vdev_port_id].n_txq = 1;
216 strncpy(prox_port_cfg[vdev_port_id].name, port_cfg->vdev, MAX_NAME_SIZE);
217 prox_port_cfg[port_id].dpdk_mapping = vdev_port_id;
218 prox_port_cfg[vdev_port_id].ip = rte_be_to_cpu_32(prox_port_cfg[port_id].ip);
219 prox_port_cfg[port_id].ip = 0; // So only vdev has an IP associated
220 prox_port_cfg[vdev_port_id].type = prox_port_cfg[port_id].type;
221 if (prox_port_cfg[vdev_port_id].type == PROX_PORT_MAC_HW) {
222 // If DPDK port MAC set to HW, then make sure the vdev has the same MAC as DPDK port
223 prox_port_cfg[vdev_port_id].type = PROX_PORT_MAC_SET;
224 rte_eth_macaddr_get(port_id, &prox_port_cfg[vdev_port_id].eth_addr);
225 plog_info("\tDPDK port %d MAC address pre-configured to MAC from port %d: "MAC_BYTES_FMT"\n",
226 vdev_port_id, port_id, MAC_BYTES(prox_port_cfg[vdev_port_id].eth_addr.addr_bytes));
228 memcpy(&prox_port_cfg[vdev_port_id].eth_addr, &prox_port_cfg[port_id].eth_addr, sizeof(prox_port_cfg[port_id].eth_addr));
231 nb_ports = prox_rte_eth_dev_count_avail();
232 /* get available ports configuration */
233 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
235 if (use_dummy_devices) {
236 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
237 nb_ports = prox_last_port_active() + 1;
238 plog_info("Creating %u dummy devices\n", nb_ports);
240 char port_name[32] = "0dummy_dev";
241 for (uint32_t i = 0; i < nb_ports; ++i) {
242 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
243 rte_vdev_init(port_name, "size=64,copy=0");
245 eth_dev_null_create(port_name, 0, PROX_RTE_ETHER_MIN_LEN, 0);
250 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
253 else if (prox_last_port_active() != -1) {
254 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
255 plog_info("\tDPDK has found %u ports\n", nb_ports);
258 if (nb_ports > PROX_MAX_PORTS) {
259 plog_warn("\tWarning: I can deal with at most %u ports."
260 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
262 nb_ports = PROX_MAX_PORTS;
264 port_id_max = nb_ports - 1;
265 port_id_last = prox_last_port_active();
266 PROX_PANIC(port_id_last > port_id_max,
267 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
268 port_id_last, port_id_max);
270 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
271 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
272 /* skip ports that are not enabled */
273 if (!prox_port_cfg[port_id].active) {
276 plog_info("\tGetting info for rte dev %u\n", port_id);
277 rte_eth_dev_info_get(port_id, &dev_info);
278 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
279 port_cfg->socket = -1;
281 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
282 port_cfg->max_txq = dev_info.max_tx_queues;
283 port_cfg->max_rxq = dev_info.max_rx_queues;
284 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
285 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
286 port_cfg->min_tx_desc = dev_info.tx_desc_lim.nb_min;
287 port_cfg->max_tx_desc = dev_info.tx_desc_lim.nb_max;
288 port_cfg->min_rx_desc = dev_info.rx_desc_lim.nb_min;
289 port_cfg->max_rx_desc = dev_info.rx_desc_lim.nb_max;
291 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
292 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
293 plog_info("\tPort %u : %d<=nb_tx_desc<=%d %d<=nb_rx_desc<=%d\n", port_id, port_cfg->min_tx_desc, port_cfg->max_tx_desc, port_cfg->min_rx_desc, port_cfg->max_rx_desc);
295 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
296 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
297 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
298 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
300 prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
303 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
307 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
308 pci_dev = dev_info.pci_dev;
310 if (!dev_info.device)
312 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
317 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
318 "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
319 /* Try to find the device's numa node */
321 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
322 FILE* numa_node_fd = fopen(buf, "r");
324 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
325 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
327 port_cfg->socket = strtol(buf, 0, 0);
328 if (port_cfg->socket == -1) {
329 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
331 fclose(numa_node_fd);
334 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
335 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
336 plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
337 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
339 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
340 plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
341 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
343 // Some OVS versions reports that they support UDP offload and no IPv4 offload, but fails when UDP offload is enabled
344 if ((!strcmp(port_cfg->short_name, "virtio")) &&
345 ((port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) == 0) &&
346 (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
347 plog_info("\t\tDisabling UDP cksum on virtio\n");
348 port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
353 /* Create rte ring-backed devices */
354 uint8_t init_rte_ring_dev(void)
356 uint8_t nb_ring_dev = 0;
358 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
359 /* skip ports that are not enabled */
360 if (!prox_port_cfg[port_id].active) {
363 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
364 if (port_cfg->rx_ring[0] != '\0') {
365 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
367 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
368 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
369 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
370 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
372 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
373 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
375 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
384 static void print_port_capa(struct prox_port_cfg *port_cfg)
388 port_id = port_cfg - prox_port_cfg;
389 plog_info("\t*** Initializing port %u ***\n", port_id);
390 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
391 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
392 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
393 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
394 plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
396 if (port_cfg->max_link_speed != UINT32_MAX) {
397 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
400 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
401 plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
402 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
403 plog_info("VLAN STRIP | ");
404 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
405 plog_info("IPV4 CKSUM | ");
406 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
407 plog_info("UDP CKSUM | ");
408 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
409 plog_info("TCP CKSUM | ");
410 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
411 plog_info("TCP LRO | ");
412 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
413 plog_info("QINQ STRIP | ");
414 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
415 plog_info("OUTER_IPV4_CKSUM | ");
416 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
417 plog_info("MACSEC STRIP | ");
418 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
419 plog_info("HEADER SPLIT | ");
420 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
421 plog_info("VLAN FILTER | ");
422 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
423 plog_info("VLAN EXTEND | ");
424 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
425 plog_info("JUMBO FRAME | ");
426 #if defined(DEV_RX_OFFLOAD_CRC_STRIP)
427 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
428 plog_info("CRC STRIP | ");
430 #if defined(DEV_RX_OFFLOAD_KEEP_CRC)
431 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)
432 plog_info("KEEP CRC | ");
434 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
435 plog_info("SCATTER | ");
436 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
437 plog_info("TIMESTAMP | ");
438 if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
439 plog_info("SECURITY ");
442 plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
443 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
444 plog_info("VLAN INSERT | ");
445 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
446 plog_info("IPV4 CKSUM | ");
447 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
448 plog_info("UDP CKSUM | ");
449 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
450 plog_info("TCP CKSUM | ");
451 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
452 plog_info("SCTP CKSUM | ");
453 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
454 plog_info("TCP TS0 | ");
455 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
456 plog_info("UDP TSO | ");
457 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
458 plog_info("OUTER IPV4 CKSUM | ");
459 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
460 plog_info("QINQ INSERT | ");
461 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
462 plog_info("VLAN TNL TSO | ");
463 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
464 plog_info("GRE TNL TSO | ");
465 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
466 plog_info("IPIP TNL TSO | ");
467 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
468 plog_info("GENEVE TNL TSO | ");
469 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
470 plog_info("MACSEC INSERT | ");
471 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
472 plog_info("MT LOCKFREE | ");
473 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
474 plog_info("MULTI SEG | ");
475 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
476 plog_info("SECURITY | ");
477 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
478 plog_info("UDP TNL TSO | ");
479 if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
480 plog_info("IP TNL TSO | ");
483 plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
484 plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
485 plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
486 plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
487 plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
491 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
493 port_cfg->max_link_speed = UINT32_MAX;
495 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
496 // virtio and vmxnet3 reports fake max_link_speed
497 if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
498 // Get link_speed from highest capability from the port
499 // This will be used by gen and lat for extrapolation purposes
500 // The negotiated link_speed (as reported by rte_eth_link_get
501 // or rte_eth_link_get_nowait) might be reported too late
502 // and might result in wrong exrapolation, and hence should not be used
503 // for extrapolation purposes
504 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
505 port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
506 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
507 port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
508 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
509 port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
510 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
511 port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
512 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
513 port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
514 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
515 port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
516 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
517 port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
518 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
519 port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
520 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
521 port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
522 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
523 port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
524 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
525 port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
526 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
527 port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
533 static void init_port(struct prox_port_cfg *port_cfg)
535 static char dummy_pool_name[] = "0_dummy";
536 struct rte_eth_link link;
540 get_max_link_speed(port_cfg);
541 print_port_capa(port_cfg);
542 port_id = port_cfg - prox_port_cfg;
543 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
544 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
546 if (port_cfg->n_rxq == 0) {
547 /* not receiving on this port */
548 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
550 uint32_t mbuf_size = TX_MBUF_SIZE;
551 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
552 mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
554 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
555 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
556 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
558 sizeof(struct rte_pktmbuf_pool_private),
559 rte_pktmbuf_pool_init, NULL,
560 prox_pktmbuf_init, 0,
561 port_cfg->socket, 0);
562 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
563 port_cfg->socket, port_cfg->n_rxd);
564 dummy_pool_name[0]++;
566 // Most pmd should now support setting mtu
567 if (port_cfg->mtu + PROX_RTE_ETHER_HDR_LEN + PROX_RTE_ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
568 plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
569 port_cfg->mtu = port_cfg->max_rx_pkt_len;
571 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
572 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
574 plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
576 if (port_cfg->n_txq == 0) {
577 /* not sending on this port */
578 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
583 if (port_cfg->n_rxq > 1) {
584 // Enable RSS if multiple receive queues
585 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
586 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
587 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
588 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
589 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP|ETH_RSS_UDP;
591 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
595 // Make sure that the requested RSS offload is supported by the PMD
596 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
597 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
599 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
601 // rxmode such as hw src strip
602 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
603 #if defined (DEV_RX_OFFLOAD_CRC_STRIP)
604 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
606 #if defined (DEV_RX_OFFLOAD_KEEP_CRC)
607 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_KEEP_CRC);
609 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
610 CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
612 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
613 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
615 if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
616 port_cfg->port_conf.rxmode.jumbo_frame = 1;
620 // IPV4, UDP, SCTP Checksums
621 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
622 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
623 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
624 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
626 if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
627 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
628 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
630 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
631 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
632 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
636 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
637 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
639 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
640 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
641 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
642 } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
643 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
645 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
647 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
648 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
650 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
654 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
655 CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
657 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
658 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
660 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
663 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
664 port_id, port_cfg->n_rxq, port_cfg->n_txq);
666 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
667 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
669 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
670 !strcmp(port_cfg->short_name, "virtio") ||
671 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
672 !strcmp(port_cfg->short_name, "i40e") ||
674 !strcmp(port_cfg->short_name, "i40e_vf") ||
675 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
676 !strcmp(port_cfg->driver_name, "") || /* NULL device */
677 !strcmp(port_cfg->short_name, "vmxnet3")) {
678 port_cfg->port_conf.intr_conf.lsc = 0;
679 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
682 if (port_cfg->lsc_set_explicitely) {
683 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
684 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
686 if (port_cfg->n_txd < port_cfg->min_tx_desc) {
687 plog_info("\t\tNumber of TX descriptors is set to %d (minimum required for %s\n", port_cfg->min_tx_desc, port_cfg->short_name);
688 port_cfg->n_txd = port_cfg->min_tx_desc;
691 if (port_cfg->n_rxd < port_cfg->min_rx_desc) {
692 plog_info("\t\tNumber of RX descriptors is set to %d (minimum required for %s\n", port_cfg->min_rx_desc, port_cfg->short_name);
693 port_cfg->n_rxd = port_cfg->min_rx_desc;
696 if (port_cfg->n_txd > port_cfg->max_tx_desc) {
697 plog_info("\t\tNumber of TX descriptors is set to %d (maximum required for %s\n", port_cfg->max_tx_desc, port_cfg->short_name);
698 port_cfg->n_txd = port_cfg->max_tx_desc;
701 if (port_cfg->n_rxd > port_cfg->max_rx_desc) {
702 plog_info("\t\tNumber of RX descriptors is set to %d (maximum required for %s\n", port_cfg->max_rx_desc, port_cfg->short_name);
703 port_cfg->n_rxd = port_cfg->max_rx_desc;
706 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
707 port_cfg->n_txq, &port_cfg->port_conf);
708 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
710 if (port_cfg->port_conf.intr_conf.lsc) {
711 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
714 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
716 /* initialize TX queues first */
717 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
718 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
719 queue_id, port_cfg->socket, port_cfg->n_txd);
720 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
721 port_cfg->socket, &port_cfg->tx_conf);
722 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
725 /* initialize RX queues */
726 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
727 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
728 queue_id, port_id, port_cfg->socket,
729 port_cfg->n_rxd, port_cfg->pool[queue_id]);
730 ret = rte_eth_rx_queue_setup(port_id, queue_id,
732 port_cfg->socket, &port_cfg->rx_conf,
733 port_cfg->pool[queue_id]);
734 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
737 plog_info("\t\tStarting up port %u ...", port_id);
738 ret = rte_eth_dev_start(port_id);
740 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
741 plog_info(" done: ");
743 if (prox_port_cfg[port_id].ip) {
744 set_ip_address(prox_port_cfg[port_id].name, &prox_port_cfg[port_id].ip);
746 /* Getting link status can be done without waiting if Link
747 State Interrupt is enabled since in that case, if the link
748 is recognized as being down, an interrupt will notify that
750 if (port_cfg->port_conf.intr_conf.lsc)
751 rte_eth_link_get_nowait(port_id, &link);
753 rte_eth_link_get(port_id, &link);
755 port_cfg->link_up = link.link_status;
756 port_cfg->link_speed = link.link_speed;
758 if (link.link_status) {
759 plog_info("Link Up - speed %'u Mbps - %s\n",
761 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
762 "full-duplex" : "half-duplex");
765 plog_info("Link Down\n");
768 if (port_cfg->promiscuous) {
769 rte_eth_promiscuous_enable(port_id);
770 plog_info("\t\tport %u in promiscuous mode\n", port_id);
773 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
774 strcmp(port_cfg->short_name, "i40e") &&
775 strcmp(port_cfg->short_name, "i40e_vf") &&
776 strcmp(port_cfg->short_name, "vmxnet3")) {
777 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
778 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
780 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
783 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
784 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
786 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
790 if (port_cfg->nb_mc_addr) {
791 rte_eth_allmulticast_enable(port_id);
792 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
793 plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
794 port_cfg->nb_mc_addr = 0;
795 rte_eth_allmulticast_disable(port_id);
796 plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
798 plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
799 plog_info("\t\tport %u in multicast mode\n", port_id);
804 void init_port_all(void)
806 uint8_t max_port_idx = prox_last_port_active() + 1;
808 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
809 if (!prox_port_cfg[portid].active) {
812 init_port(&prox_port_cfg[portid]);
816 void close_ports_atexit(void)
818 uint8_t max_port_idx = prox_last_port_active() + 1;
820 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
821 if (!prox_port_cfg[portid].active) {
824 rte_eth_dev_close(portid);
828 void init_port_addr(void)
830 struct prox_port_cfg *port_cfg;
833 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
834 if (!prox_port_cfg[port_id].active) {
837 port_cfg = &prox_port_cfg[port_id];
839 switch (port_cfg->type) {
840 case PROX_PORT_MAC_HW:
841 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
843 case PROX_PORT_MAC_RAND:
844 prox_rte_eth_random_addr(port_cfg->eth_addr.addr_bytes);
846 case PROX_PORT_MAC_SET:
847 plog_info("Setting MAC to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
848 if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
849 plog_warn("port %u: failed to set mac address. Error = %d\n", port_id, rc);
855 int port_is_active(uint8_t port_id)
857 if (port_id > PROX_MAX_PORTS) {
858 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
862 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
863 if (!port_cfg->active) {
864 plog_info("Port %u is not active\n", port_id);