2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
41 #include "prox_cksum.h"
43 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
46 int prox_nb_active_ports(void)
49 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
50 ret += prox_port_cfg[i].active;
55 int prox_last_port_active(void)
58 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
59 if (prox_port_cfg[i].active) {
66 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
67 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
68 __attribute__((unused)) void *ret_param)
70 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
71 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
72 __attribute__((unused)) void *ret_param)
74 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
78 if (RTE_ETH_EVENT_INTR_LSC != type) {
79 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
86 rte_atomic32_inc(&lsc);
88 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
93 struct prox_pktmbuf_reinit_args {
94 struct rte_mempool *mp;
95 struct lcore_cfg *lconf;
98 /* standard mbuf initialization procedure */
99 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
101 struct rte_mbuf *mbuf = _m;
103 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
104 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
106 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
107 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
110 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
113 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
115 struct prox_pktmbuf_reinit_args *init_args = arg;
119 obj += init_args->mp->header_size;
120 m = (struct rte_mbuf*)obj;
122 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
125 /* initialize rte devices and check the number of available ports */
126 void init_rte_dev(int use_dummy_devices)
128 uint8_t nb_ports, port_id_max, port_id_last;
129 struct rte_eth_dev_info dev_info;
131 nb_ports = rte_eth_dev_count();
132 /* get available ports configuration */
133 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
135 if (use_dummy_devices) {
136 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
137 nb_ports = prox_last_port_active() + 1;
138 plog_info("Creating %u dummy devices\n", nb_ports);
140 char port_name[32] = "0dummy_dev";
141 for (uint32_t i = 0; i < nb_ports; ++i) {
142 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
143 rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
145 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
150 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
154 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
155 plog_info("\tDPDK has found %u ports\n", nb_ports);
158 if (nb_ports > PROX_MAX_PORTS) {
159 plog_warn("\tWarning: I can deal with at most %u ports."
160 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
162 nb_ports = PROX_MAX_PORTS;
164 port_id_max = nb_ports - 1;
165 port_id_last = prox_last_port_active();
166 PROX_PANIC(port_id_last > port_id_max,
167 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
168 port_id_last, port_id_max);
170 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
171 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
172 /* skip ports that are not enabled */
173 if (!prox_port_cfg[port_id].active) {
176 plog_info("\tGetting info for rte dev %u\n", port_id);
177 rte_eth_dev_info_get(port_id, &dev_info);
178 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
179 port_cfg->socket = -1;
181 port_cfg->max_txq = dev_info.max_tx_queues;
182 port_cfg->max_rxq = dev_info.max_rx_queues;
184 if (!dev_info.pci_dev)
187 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
188 "%04x:%02x:%02x.%1x", dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
189 strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
190 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
192 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
193 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
194 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
195 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
197 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
200 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
204 /* Try to find the device's numa node */
206 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
207 FILE* numa_node_fd = fopen(buf, "r");
209 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
210 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
212 port_cfg->socket = strtol(buf, 0, 0);
213 if (port_cfg->socket == -1) {
214 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
216 fclose(numa_node_fd);
219 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
220 port_cfg->capabilities.tx_offload_cksum |= IPV4_CKSUM;
222 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) {
223 port_cfg->capabilities.tx_offload_cksum |= UDP_CKSUM;
228 /* Create rte ring-backed devices */
229 uint8_t init_rte_ring_dev(void)
231 uint8_t nb_ring_dev = 0;
233 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
234 /* skip ports that are not enabled */
235 if (!prox_port_cfg[port_id].active) {
238 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
239 if (port_cfg->rx_ring[0] != '\0') {
240 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
242 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
243 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
244 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
245 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
247 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
248 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
250 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
259 static void init_port(struct prox_port_cfg *port_cfg)
261 static char dummy_pool_name[] = "0_dummy";
262 struct rte_eth_link link;
266 port_id = port_cfg - prox_port_cfg;
267 plog_info("\t*** Initializing port %u ***\n", port_id);
268 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
269 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
270 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
272 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
273 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
275 if (port_cfg->n_rxq == 0) {
276 /* not receiving on this port */
277 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
279 uint32_t mbuf_size = MBUF_SIZE;
280 if (strcmp(port_cfg->short_name, "vmxnet3") == 0) {
281 mbuf_size = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
283 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
284 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
285 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
287 sizeof(struct rte_pktmbuf_pool_private),
288 rte_pktmbuf_pool_init, NULL,
289 prox_pktmbuf_init, 0,
290 port_cfg->socket, 0);
291 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
292 port_cfg->socket, port_cfg->n_rxd);
293 dummy_pool_name[0]++;
295 // Most pmd do not support setting mtu yet...
296 if (!strcmp(port_cfg->short_name, "ixgbe")) {
297 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
298 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
299 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
302 if (port_cfg->n_txq == 0) {
303 /* not sending on this port */
304 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
309 if (port_cfg->n_rxq > 1) {
310 // Enable RSS if multiple receive queues
311 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
312 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
313 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
314 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
315 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONFRAG_IPV4_UDP;
317 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
321 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
322 port_id, port_cfg->n_rxq, port_cfg->n_txq);
324 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
325 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
327 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
328 !strcmp(port_cfg->short_name, "virtio") ||
329 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
330 !strcmp(port_cfg->short_name, "i40e") ||
332 !strcmp(port_cfg->short_name, "i40e_vf") ||
333 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
334 !strcmp(port_cfg->driver_name, "") || /* NULL device */
335 !strcmp(port_cfg->short_name, "vmxnet3")) {
336 port_cfg->port_conf.intr_conf.lsc = 0;
337 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
340 if (port_cfg->lsc_set_explicitely) {
341 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
342 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
344 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
345 if (port_cfg->n_txd < 512) {
346 // Vmxnet3 driver requires minimum 512 tx descriptors
347 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
348 port_cfg->n_txd = 512;
352 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
353 port_cfg->n_txq, &port_cfg->port_conf);
354 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
356 if (port_cfg->port_conf.intr_conf.lsc) {
357 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
360 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
362 /* initialize RX queues */
363 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
364 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
365 queue_id, port_id, port_cfg->socket,
366 port_cfg->n_rxd, port_cfg->pool[queue_id]);
368 ret = rte_eth_rx_queue_setup(port_id, queue_id,
370 port_cfg->socket, &port_cfg->rx_conf,
371 port_cfg->pool[queue_id]);
373 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
375 if (!strcmp(port_cfg->short_name, "virtio")) {
376 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
377 plog_info("\t\tDisabling TX offloads (virtio does not support TX offloads)\n");
380 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
381 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS | ETH_TXQ_FLAGS_NOMULTSEGS;
382 plog_info("\t\tDisabling TX offloads and multsegs on port %d as vmxnet3 does not support them\n", port_id);
384 /* initialize one TX queue per logical core on each port */
385 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
386 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
387 queue_id, port_cfg->socket, port_cfg->n_txd);
388 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
389 port_cfg->socket, &port_cfg->tx_conf);
390 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
393 plog_info("\t\tStarting up port %u ...", port_id);
394 ret = rte_eth_dev_start(port_id);
396 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
397 plog_info(" done: ");
399 /* Getting link status can be done without waiting if Link
400 State Interrupt is enabled since in that case, if the link
401 is recognized as being down, an interrupt will notify that
403 if (port_cfg->port_conf.intr_conf.lsc)
404 rte_eth_link_get_nowait(port_id, &link);
406 rte_eth_link_get(port_id, &link);
408 port_cfg->link_up = link.link_status;
409 port_cfg->link_speed = link.link_speed;
410 if (link.link_status) {
411 plog_info("Link Up - speed %'u Mbps - %s\n",
413 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
414 "full-duplex" : "half-duplex");
417 plog_info("Link Down\n");
420 if (port_cfg->promiscuous) {
421 rte_eth_promiscuous_enable(port_id);
422 plog_info("\t\tport %u in promiscuous mode\n", port_id);
425 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
426 strcmp(port_cfg->short_name, "i40e") &&
427 strcmp(port_cfg->short_name, "i40e_vf") &&
428 strcmp(port_cfg->short_name, "vmxnet3")) {
429 for (uint8_t i = 0; i < 16; ++i) {
430 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
432 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
434 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
436 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
442 void init_port_all(void)
444 uint8_t max_port_idx = prox_last_port_active() + 1;
446 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
447 if (!prox_port_cfg[portid].active) {
450 init_port(&prox_port_cfg[portid]);
454 void close_ports_atexit(void)
456 uint8_t max_port_idx = prox_last_port_active() + 1;
458 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
459 if (!prox_port_cfg[portid].active) {
462 rte_eth_dev_close(portid);
466 void init_port_addr(void)
468 struct prox_port_cfg *port_cfg;
470 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
471 if (!prox_port_cfg[port_id].active) {
474 port_cfg = &prox_port_cfg[port_id];
476 switch (port_cfg->type) {
477 case PROX_PORT_MAC_HW:
478 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
480 case PROX_PORT_MAC_RAND:
481 eth_random_addr(port_cfg->eth_addr.addr_bytes);
483 case PROX_PORT_MAC_SET:
489 int port_is_active(uint8_t port_id)
491 if (port_id > PROX_MAX_PORTS) {
492 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
496 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
497 if (!port_cfg->active) {
498 plog_info("Port %u is not active\n", port_id);