2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
44 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
47 int prox_nb_active_ports(void)
50 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
51 ret += prox_port_cfg[i].active;
56 int prox_last_port_active(void)
59 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
60 if (prox_port_cfg[i].active) {
67 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
68 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
69 __attribute__((unused)) void *ret_param)
71 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
72 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
73 __attribute__((unused)) void *ret_param)
75 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
79 if (RTE_ETH_EVENT_INTR_LSC != type) {
80 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
87 rte_atomic32_inc(&lsc);
89 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
94 struct prox_pktmbuf_reinit_args {
95 struct rte_mempool *mp;
96 struct lcore_cfg *lconf;
99 /* standard mbuf initialization procedure */
100 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
102 struct rte_mbuf *mbuf = _m;
104 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
105 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
107 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
108 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
111 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
114 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
116 struct prox_pktmbuf_reinit_args *init_args = arg;
120 obj += init_args->mp->header_size;
121 m = (struct rte_mbuf*)obj;
123 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
126 /* initialize rte devices and check the number of available ports */
127 void init_rte_dev(int use_dummy_devices)
129 uint8_t nb_ports, port_id_max;
131 struct rte_eth_dev_info dev_info;
133 nb_ports = rte_eth_dev_count();
134 /* get available ports configuration */
135 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
137 if (use_dummy_devices) {
138 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
139 nb_ports = prox_last_port_active() + 1;
140 plog_info("Creating %u dummy devices\n", nb_ports);
142 char port_name[32] = "0dummy_dev";
143 for (uint32_t i = 0; i < nb_ports; ++i) {
144 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
145 rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
147 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
152 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
155 else if (prox_last_port_active() != -1) {
156 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
157 plog_info("\tDPDK has found %u ports\n", nb_ports);
160 if (nb_ports > PROX_MAX_PORTS) {
161 plog_warn("\tWarning: I can deal with at most %u ports."
162 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
164 nb_ports = PROX_MAX_PORTS;
166 port_id_max = nb_ports - 1;
167 port_id_last = prox_last_port_active();
168 PROX_PANIC(port_id_last > port_id_max,
169 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
170 port_id_last, port_id_max);
172 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
173 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
174 /* skip ports that are not enabled */
175 if (!prox_port_cfg[port_id].active) {
178 plog_info("\tGetting info for rte dev %u\n", port_id);
179 rte_eth_dev_info_get(port_id, &dev_info);
180 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
181 port_cfg->socket = -1;
183 port_cfg->max_txq = dev_info.max_tx_queues;
184 port_cfg->max_rxq = dev_info.max_rx_queues;
185 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
186 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
188 if (!dev_info.pci_dev)
191 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
192 "%04x:%02x:%02x.%1x", dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
193 strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
194 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d, max_rx_pktlen = %d, min_rx_bufsize = %d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq, port_cfg->max_rx_pkt_len, port_cfg->min_rx_bufsize);
196 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
197 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
198 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
199 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
201 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
204 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
208 /* Try to find the device's numa node */
210 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
211 FILE* numa_node_fd = fopen(buf, "r");
213 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
214 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
216 port_cfg->socket = strtol(buf, 0, 0);
217 if (port_cfg->socket == -1) {
218 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
220 fclose(numa_node_fd);
223 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
224 port_cfg->capabilities.tx_offload_cksum |= IPV4_CKSUM;
226 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) {
227 port_cfg->capabilities.tx_offload_cksum |= UDP_CKSUM;
232 /* Create rte ring-backed devices */
233 uint8_t init_rte_ring_dev(void)
235 uint8_t nb_ring_dev = 0;
237 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
238 /* skip ports that are not enabled */
239 if (!prox_port_cfg[port_id].active) {
242 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
243 if (port_cfg->rx_ring[0] != '\0') {
244 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
246 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
247 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
248 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
249 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
251 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
252 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
254 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
263 static void init_port(struct prox_port_cfg *port_cfg)
265 static char dummy_pool_name[] = "0_dummy";
266 struct rte_eth_link link;
270 port_id = port_cfg - prox_port_cfg;
271 plog_info("\t*** Initializing port %u ***\n", port_id);
272 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
273 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
274 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
276 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
277 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
279 if (port_cfg->n_rxq == 0) {
280 /* not receiving on this port */
281 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
283 uint32_t mbuf_size = TX_MBUF_SIZE;
284 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
285 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
286 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
288 sizeof(struct rte_pktmbuf_pool_private),
289 rte_pktmbuf_pool_init, NULL,
290 prox_pktmbuf_init, 0,
291 port_cfg->socket, 0);
292 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
293 port_cfg->socket, port_cfg->n_rxd);
294 dummy_pool_name[0]++;
296 // Most pmd should now support setting mtu
297 if (port_cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
298 plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
299 port_cfg->mtu = port_cfg->max_rx_pkt_len;
301 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
302 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
304 plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
306 if (port_cfg->n_txq == 0) {
307 /* not sending on this port */
308 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
313 if (port_cfg->n_rxq > 1) {
314 // Enable RSS if multiple receive queues
315 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
316 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
317 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
318 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
319 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONFRAG_IPV4_UDP;
321 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
324 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
325 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
327 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
329 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
330 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
332 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
334 if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
335 plog_info("\t\tEnabling No TX MultiSegs on port %d\n", port_id);
337 plog_info("\t\tTX Multi segments enabled on port %d\n", port_id);
339 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
340 port_id, port_cfg->n_rxq, port_cfg->n_txq);
342 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
343 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
345 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
346 !strcmp(port_cfg->short_name, "virtio") ||
347 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
348 !strcmp(port_cfg->short_name, "i40e") ||
350 !strcmp(port_cfg->short_name, "i40e_vf") ||
351 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
352 !strcmp(port_cfg->driver_name, "") || /* NULL device */
353 !strcmp(port_cfg->short_name, "vmxnet3")) {
354 port_cfg->port_conf.intr_conf.lsc = 0;
355 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
358 if (port_cfg->lsc_set_explicitely) {
359 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
360 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
362 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
363 if (port_cfg->n_txd < 512) {
364 // Vmxnet3 driver requires minimum 512 tx descriptors
365 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
366 port_cfg->n_txd = 512;
370 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
371 port_cfg->n_txq, &port_cfg->port_conf);
372 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
374 if (port_cfg->port_conf.intr_conf.lsc) {
375 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
378 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
380 /* initialize RX queues */
381 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
382 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
383 queue_id, port_id, port_cfg->socket,
384 port_cfg->n_rxd, port_cfg->pool[queue_id]);
386 ret = rte_eth_rx_queue_setup(port_id, queue_id,
388 port_cfg->socket, &port_cfg->rx_conf,
389 port_cfg->pool[queue_id]);
391 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
393 if (port_cfg->capabilities.tx_offload_cksum == 0) {
394 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
395 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
398 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
399 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
400 plog_info("\t\tDisabling multsegs on port %d as vmxnet3 does not support them\n", port_id);
402 /* initialize one TX queue per logical core on each port */
403 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
404 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
405 queue_id, port_cfg->socket, port_cfg->n_txd);
406 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
407 port_cfg->socket, &port_cfg->tx_conf);
408 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
411 plog_info("\t\tStarting up port %u ...", port_id);
412 ret = rte_eth_dev_start(port_id);
414 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
415 plog_info(" done: ");
417 /* Getting link status can be done without waiting if Link
418 State Interrupt is enabled since in that case, if the link
419 is recognized as being down, an interrupt will notify that
421 if (port_cfg->port_conf.intr_conf.lsc)
422 rte_eth_link_get_nowait(port_id, &link);
424 rte_eth_link_get(port_id, &link);
426 port_cfg->link_up = link.link_status;
427 port_cfg->link_speed = link.link_speed;
428 if (link.link_status) {
429 plog_info("Link Up - speed %'u Mbps - %s\n",
431 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
432 "full-duplex" : "half-duplex");
435 plog_info("Link Down\n");
438 if (port_cfg->promiscuous) {
439 rte_eth_promiscuous_enable(port_id);
440 plog_info("\t\tport %u in promiscuous mode\n", port_id);
443 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
444 strcmp(port_cfg->short_name, "i40e") &&
445 strcmp(port_cfg->short_name, "i40e_vf") &&
446 strcmp(port_cfg->short_name, "vmxnet3")) {
447 for (uint8_t i = 0; i < 16; ++i) {
448 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
450 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
452 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
454 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
460 void init_port_all(void)
462 uint8_t max_port_idx = prox_last_port_active() + 1;
464 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
465 if (!prox_port_cfg[portid].active) {
468 init_port(&prox_port_cfg[portid]);
472 void close_ports_atexit(void)
474 uint8_t max_port_idx = prox_last_port_active() + 1;
476 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
477 if (!prox_port_cfg[portid].active) {
480 rte_eth_dev_close(portid);
484 void init_port_addr(void)
486 struct prox_port_cfg *port_cfg;
488 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
489 if (!prox_port_cfg[port_id].active) {
492 port_cfg = &prox_port_cfg[port_id];
494 switch (port_cfg->type) {
495 case PROX_PORT_MAC_HW:
496 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
498 case PROX_PORT_MAC_RAND:
499 eth_random_addr(port_cfg->eth_addr.addr_bytes);
501 case PROX_PORT_MAC_SET:
507 int port_is_active(uint8_t port_id)
509 if (port_id > PROX_MAX_PORTS) {
510 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
514 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
515 if (!port_cfg->active) {
516 plog_info("Port %u is not active\n", port_id);