2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
44 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
47 int prox_nb_active_ports(void)
50 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
51 ret += prox_port_cfg[i].active;
56 int prox_last_port_active(void)
59 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
60 if (prox_port_cfg[i].active) {
67 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
68 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
69 __attribute__((unused)) void *ret_param)
71 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
72 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
73 __attribute__((unused)) void *ret_param)
75 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
79 if (RTE_ETH_EVENT_INTR_LSC != type) {
80 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
87 rte_atomic32_inc(&lsc);
89 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
94 struct prox_pktmbuf_reinit_args {
95 struct rte_mempool *mp;
96 struct lcore_cfg *lconf;
99 /* standard mbuf initialization procedure */
100 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
102 struct rte_mbuf *mbuf = _m;
104 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
105 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
107 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
108 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
111 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
114 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
116 struct prox_pktmbuf_reinit_args *init_args = arg;
120 obj += init_args->mp->header_size;
121 m = (struct rte_mbuf*)obj;
123 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
126 /* initialize rte devices and check the number of available ports */
127 void init_rte_dev(int use_dummy_devices)
129 uint8_t nb_ports, port_id_max;
131 struct rte_eth_dev_info dev_info;
133 nb_ports = rte_eth_dev_count();
134 /* get available ports configuration */
135 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
137 if (use_dummy_devices) {
138 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
139 nb_ports = prox_last_port_active() + 1;
140 plog_info("Creating %u dummy devices\n", nb_ports);
142 char port_name[32] = "0dummy_dev";
143 for (uint32_t i = 0; i < nb_ports; ++i) {
144 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
145 rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
147 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
152 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
155 else if (prox_last_port_active() != -1) {
156 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
157 plog_info("\tDPDK has found %u ports\n", nb_ports);
160 if (nb_ports > PROX_MAX_PORTS) {
161 plog_warn("\tWarning: I can deal with at most %u ports."
162 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
164 nb_ports = PROX_MAX_PORTS;
166 port_id_max = nb_ports - 1;
167 port_id_last = prox_last_port_active();
168 PROX_PANIC(port_id_last > port_id_max,
169 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
170 port_id_last, port_id_max);
172 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
173 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
174 /* skip ports that are not enabled */
175 if (!prox_port_cfg[port_id].active) {
178 plog_info("\tGetting info for rte dev %u\n", port_id);
179 rte_eth_dev_info_get(port_id, &dev_info);
180 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
181 port_cfg->socket = -1;
183 port_cfg->max_txq = dev_info.max_tx_queues;
184 port_cfg->max_rxq = dev_info.max_rx_queues;
186 if (!dev_info.pci_dev)
189 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
190 "%04x:%02x:%02x.%1x", dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
191 strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
192 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
194 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
195 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
196 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
197 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
199 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
202 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
206 /* Try to find the device's numa node */
208 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
209 FILE* numa_node_fd = fopen(buf, "r");
211 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
212 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
214 port_cfg->socket = strtol(buf, 0, 0);
215 if (port_cfg->socket == -1) {
216 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
218 fclose(numa_node_fd);
221 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
222 port_cfg->capabilities.tx_offload_cksum |= IPV4_CKSUM;
224 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) {
225 port_cfg->capabilities.tx_offload_cksum |= UDP_CKSUM;
230 /* Create rte ring-backed devices */
231 uint8_t init_rte_ring_dev(void)
233 uint8_t nb_ring_dev = 0;
235 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
236 /* skip ports that are not enabled */
237 if (!prox_port_cfg[port_id].active) {
240 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
241 if (port_cfg->rx_ring[0] != '\0') {
242 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
244 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
245 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
246 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
247 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
249 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
250 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
252 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
261 static void init_port(struct prox_port_cfg *port_cfg)
263 static char dummy_pool_name[] = "0_dummy";
264 struct rte_eth_link link;
268 port_id = port_cfg - prox_port_cfg;
269 plog_info("\t*** Initializing port %u ***\n", port_id);
270 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
271 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
272 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
274 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
275 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
277 if (port_cfg->n_rxq == 0) {
278 /* not receiving on this port */
279 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
281 uint32_t mbuf_size = MBUF_SIZE;
282 if (strcmp(port_cfg->short_name, "vmxnet3") == 0) {
283 mbuf_size = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
285 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
286 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
287 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
289 sizeof(struct rte_pktmbuf_pool_private),
290 rte_pktmbuf_pool_init, NULL,
291 prox_pktmbuf_init, 0,
292 port_cfg->socket, 0);
293 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
294 port_cfg->socket, port_cfg->n_rxd);
295 dummy_pool_name[0]++;
297 // Most pmd do not support setting mtu yet...
298 if (!strcmp(port_cfg->short_name, "ixgbe")) {
299 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
300 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
301 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
304 if (port_cfg->n_txq == 0) {
305 /* not sending on this port */
306 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
311 if (port_cfg->n_rxq > 1) {
312 // Enable RSS if multiple receive queues
313 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
314 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
315 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
316 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
317 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONFRAG_IPV4_UDP;
319 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
323 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
324 port_id, port_cfg->n_rxq, port_cfg->n_txq);
326 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
327 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
329 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
330 !strcmp(port_cfg->short_name, "virtio") ||
331 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
332 !strcmp(port_cfg->short_name, "i40e") ||
334 !strcmp(port_cfg->short_name, "i40e_vf") ||
335 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
336 !strcmp(port_cfg->driver_name, "") || /* NULL device */
337 !strcmp(port_cfg->short_name, "vmxnet3")) {
338 port_cfg->port_conf.intr_conf.lsc = 0;
339 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
342 if (port_cfg->lsc_set_explicitely) {
343 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
344 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
346 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
347 if (port_cfg->n_txd < 512) {
348 // Vmxnet3 driver requires minimum 512 tx descriptors
349 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
350 port_cfg->n_txd = 512;
354 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
355 port_cfg->n_txq, &port_cfg->port_conf);
356 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
358 if (port_cfg->port_conf.intr_conf.lsc) {
359 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
362 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
364 /* initialize RX queues */
365 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
366 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
367 queue_id, port_id, port_cfg->socket,
368 port_cfg->n_rxd, port_cfg->pool[queue_id]);
370 ret = rte_eth_rx_queue_setup(port_id, queue_id,
372 port_cfg->socket, &port_cfg->rx_conf,
373 port_cfg->pool[queue_id]);
375 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
377 if (!strcmp(port_cfg->short_name, "virtio")) {
378 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
379 plog_info("\t\tDisabling TX offloads (virtio does not support TX offloads)\n");
382 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
383 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS | ETH_TXQ_FLAGS_NOMULTSEGS;
384 plog_info("\t\tDisabling TX offloads and multsegs on port %d as vmxnet3 does not support them\n", port_id);
386 /* initialize one TX queue per logical core on each port */
387 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
388 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
389 queue_id, port_cfg->socket, port_cfg->n_txd);
390 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
391 port_cfg->socket, &port_cfg->tx_conf);
392 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
395 plog_info("\t\tStarting up port %u ...", port_id);
396 ret = rte_eth_dev_start(port_id);
398 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
399 plog_info(" done: ");
401 /* Getting link status can be done without waiting if Link
402 State Interrupt is enabled since in that case, if the link
403 is recognized as being down, an interrupt will notify that
405 if (port_cfg->port_conf.intr_conf.lsc)
406 rte_eth_link_get_nowait(port_id, &link);
408 rte_eth_link_get(port_id, &link);
410 port_cfg->link_up = link.link_status;
411 port_cfg->link_speed = link.link_speed;
412 if (link.link_status) {
413 plog_info("Link Up - speed %'u Mbps - %s\n",
415 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
416 "full-duplex" : "half-duplex");
419 plog_info("Link Down\n");
422 if (port_cfg->promiscuous) {
423 rte_eth_promiscuous_enable(port_id);
424 plog_info("\t\tport %u in promiscuous mode\n", port_id);
427 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
428 strcmp(port_cfg->short_name, "i40e") &&
429 strcmp(port_cfg->short_name, "i40e_vf") &&
430 strcmp(port_cfg->short_name, "vmxnet3")) {
431 for (uint8_t i = 0; i < 16; ++i) {
432 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
434 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
436 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
438 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
444 void init_port_all(void)
446 uint8_t max_port_idx = prox_last_port_active() + 1;
448 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
449 if (!prox_port_cfg[portid].active) {
452 init_port(&prox_port_cfg[portid]);
456 void close_ports_atexit(void)
458 uint8_t max_port_idx = prox_last_port_active() + 1;
460 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
461 if (!prox_port_cfg[portid].active) {
464 rte_eth_dev_close(portid);
468 void init_port_addr(void)
470 struct prox_port_cfg *port_cfg;
472 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
473 if (!prox_port_cfg[port_id].active) {
476 port_cfg = &prox_port_cfg[port_id];
478 switch (port_cfg->type) {
479 case PROX_PORT_MAC_HW:
480 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
482 case PROX_PORT_MAC_RAND:
483 eth_random_addr(port_cfg->eth_addr.addr_bytes);
485 case PROX_PORT_MAC_SET:
491 int port_is_active(uint8_t port_id)
493 if (port_id > PROX_MAX_PORTS) {
494 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
498 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
499 if (!port_cfg->active) {
500 plog_info("Port %u is not active\n", port_id);