2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
44 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
47 int prox_nb_active_ports(void)
50 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
51 ret += prox_port_cfg[i].active;
56 int prox_last_port_active(void)
59 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
60 if (prox_port_cfg[i].active) {
67 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
68 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
69 __attribute__((unused)) void *ret_param)
71 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
72 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
73 __attribute__((unused)) void *ret_param)
75 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
79 if (RTE_ETH_EVENT_INTR_LSC != type) {
80 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
87 rte_atomic32_inc(&lsc);
89 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
94 struct prox_pktmbuf_reinit_args {
95 struct rte_mempool *mp;
96 struct lcore_cfg *lconf;
99 /* standard mbuf initialization procedure */
100 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
102 struct rte_mbuf *mbuf = _m;
104 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
105 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
107 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
108 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
111 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
114 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
116 struct prox_pktmbuf_reinit_args *init_args = arg;
120 obj += init_args->mp->header_size;
121 m = (struct rte_mbuf*)obj;
123 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
126 /* initialize rte devices and check the number of available ports */
127 void init_rte_dev(int use_dummy_devices)
129 uint8_t nb_ports, port_id_max;
131 struct rte_eth_dev_info dev_info;
132 const struct rte_pci_device *pci_dev;
134 nb_ports = rte_eth_dev_count();
135 /* get available ports configuration */
136 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
138 if (use_dummy_devices) {
139 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
140 nb_ports = prox_last_port_active() + 1;
141 plog_info("Creating %u dummy devices\n", nb_ports);
143 char port_name[32] = "0dummy_dev";
144 for (uint32_t i = 0; i < nb_ports; ++i) {
145 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
146 rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
148 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
153 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
156 else if (prox_last_port_active() != -1) {
157 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
158 plog_info("\tDPDK has found %u ports\n", nb_ports);
161 if (nb_ports > PROX_MAX_PORTS) {
162 plog_warn("\tWarning: I can deal with at most %u ports."
163 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
165 nb_ports = PROX_MAX_PORTS;
167 port_id_max = nb_ports - 1;
168 port_id_last = prox_last_port_active();
169 PROX_PANIC(port_id_last > port_id_max,
170 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
171 port_id_last, port_id_max);
173 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
174 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
175 /* skip ports that are not enabled */
176 if (!prox_port_cfg[port_id].active) {
179 plog_info("\tGetting info for rte dev %u\n", port_id);
180 rte_eth_dev_info_get(port_id, &dev_info);
181 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
182 port_cfg->socket = -1;
184 port_cfg->max_txq = dev_info.max_tx_queues;
185 port_cfg->max_rxq = dev_info.max_rx_queues;
186 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
187 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
189 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
190 pci_dev = dev_info.pci_dev;
192 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
197 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
198 "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
199 strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
200 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
202 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
203 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
204 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
205 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
207 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
210 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
214 /* Try to find the device's numa node */
216 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
217 FILE* numa_node_fd = fopen(buf, "r");
219 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
220 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
222 port_cfg->socket = strtol(buf, 0, 0);
223 if (port_cfg->socket == -1) {
224 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
226 fclose(numa_node_fd);
229 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
230 port_cfg->capabilities.tx_offload_cksum |= IPV4_CKSUM;
232 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) {
233 port_cfg->capabilities.tx_offload_cksum |= UDP_CKSUM;
238 /* Create rte ring-backed devices */
239 uint8_t init_rte_ring_dev(void)
241 uint8_t nb_ring_dev = 0;
243 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
244 /* skip ports that are not enabled */
245 if (!prox_port_cfg[port_id].active) {
248 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
249 if (port_cfg->rx_ring[0] != '\0') {
250 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
252 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
253 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
254 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
255 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
257 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
258 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
260 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
269 static void init_port(struct prox_port_cfg *port_cfg)
271 static char dummy_pool_name[] = "0_dummy";
272 struct rte_eth_link link;
276 port_id = port_cfg - prox_port_cfg;
277 plog_info("\t*** Initializing port %u ***\n", port_id);
278 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
279 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
280 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
282 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
283 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
285 if (port_cfg->n_rxq == 0) {
286 /* not receiving on this port */
287 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
289 uint32_t mbuf_size = TX_MBUF_SIZE;
290 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
291 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
292 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
294 sizeof(struct rte_pktmbuf_pool_private),
295 rte_pktmbuf_pool_init, NULL,
296 prox_pktmbuf_init, 0,
297 port_cfg->socket, 0);
298 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
299 port_cfg->socket, port_cfg->n_rxd);
300 dummy_pool_name[0]++;
302 // Most pmd should now support setting mtu
303 if (port_cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
304 plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
305 port_cfg->mtu = port_cfg->max_rx_pkt_len;
307 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
308 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
310 plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
312 if (port_cfg->n_txq == 0) {
313 /* not sending on this port */
314 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
319 if (port_cfg->n_rxq > 1) {
320 // Enable RSS if multiple receive queues
321 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
322 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
323 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
324 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
325 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONFRAG_IPV4_UDP;
327 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
331 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
332 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
334 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
336 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
337 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
339 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
341 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
342 plog_info("\t\tEnabling No TX MultiSegs on port %d\n", port_id);
344 plog_info("\t\tTX Multi segments enabled on port %d\n", port_id);
346 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
347 port_id, port_cfg->n_rxq, port_cfg->n_txq);
349 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
350 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
352 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
353 !strcmp(port_cfg->short_name, "virtio") ||
354 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
355 !strcmp(port_cfg->short_name, "i40e") ||
357 !strcmp(port_cfg->short_name, "i40e_vf") ||
358 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
359 !strcmp(port_cfg->driver_name, "") || /* NULL device */
360 !strcmp(port_cfg->short_name, "vmxnet3")) {
361 port_cfg->port_conf.intr_conf.lsc = 0;
362 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
365 if (port_cfg->lsc_set_explicitely) {
366 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
367 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
369 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
370 if (port_cfg->n_txd < 512) {
371 // Vmxnet3 driver requires minimum 512 tx descriptors
372 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
373 port_cfg->n_txd = 512;
377 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
378 port_cfg->n_txq, &port_cfg->port_conf);
379 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
381 if (port_cfg->port_conf.intr_conf.lsc) {
382 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
385 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
387 if (port_cfg->capabilities.tx_offload_cksum == 0) {
388 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
389 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
392 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
393 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
394 plog_info("\t\tDisabling multsegs on port %d as vmxnet3 does not support them\n", port_id);
397 /* initialize TX queues first */
398 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
399 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
400 queue_id, port_cfg->socket, port_cfg->n_txd);
401 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
402 port_cfg->socket, &port_cfg->tx_conf);
403 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
406 /* initialize RX queues */
407 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
408 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
409 queue_id, port_id, port_cfg->socket,
410 port_cfg->n_rxd, port_cfg->pool[queue_id]);
411 ret = rte_eth_rx_queue_setup(port_id, queue_id,
413 port_cfg->socket, &port_cfg->rx_conf,
414 port_cfg->pool[queue_id]);
415 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
418 plog_info("\t\tStarting up port %u ...", port_id);
419 ret = rte_eth_dev_start(port_id);
421 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
422 plog_info(" done: ");
424 /* Getting link status can be done without waiting if Link
425 State Interrupt is enabled since in that case, if the link
426 is recognized as being down, an interrupt will notify that
428 if (port_cfg->port_conf.intr_conf.lsc)
429 rte_eth_link_get_nowait(port_id, &link);
431 rte_eth_link_get(port_id, &link);
433 port_cfg->link_up = link.link_status;
434 port_cfg->link_speed = link.link_speed;
435 if (link.link_status) {
436 plog_info("Link Up - speed %'u Mbps - %s\n",
438 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
439 "full-duplex" : "half-duplex");
442 plog_info("Link Down\n");
445 if (port_cfg->promiscuous) {
446 rte_eth_promiscuous_enable(port_id);
447 plog_info("\t\tport %u in promiscuous mode\n", port_id);
450 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
451 strcmp(port_cfg->short_name, "i40e") &&
452 strcmp(port_cfg->short_name, "i40e_vf") &&
453 strcmp(port_cfg->short_name, "vmxnet3")) {
454 for (uint8_t i = 0; i < 16; ++i) {
455 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
457 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
459 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
461 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
467 void init_port_all(void)
469 uint8_t max_port_idx = prox_last_port_active() + 1;
471 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
472 if (!prox_port_cfg[portid].active) {
475 init_port(&prox_port_cfg[portid]);
479 void close_ports_atexit(void)
481 uint8_t max_port_idx = prox_last_port_active() + 1;
483 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
484 if (!prox_port_cfg[portid].active) {
487 rte_eth_dev_close(portid);
491 void init_port_addr(void)
493 struct prox_port_cfg *port_cfg;
495 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
496 if (!prox_port_cfg[port_id].active) {
499 port_cfg = &prox_port_cfg[port_id];
501 switch (port_cfg->type) {
502 case PROX_PORT_MAC_HW:
503 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
505 case PROX_PORT_MAC_RAND:
506 eth_random_addr(port_cfg->eth_addr.addr_bytes);
508 case PROX_PORT_MAC_SET:
514 int port_is_active(uint8_t port_id)
516 if (port_id > PROX_MAX_PORTS) {
517 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
521 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
522 if (!port_cfg->active) {
523 plog_info("Port %u is not active\n", port_id);