2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0)) && (RTE_VERSION <= RTE_VERSION_NUM(17,5,0,1))
23 #include <rte_eth_null.h>
26 #include "prox_port_cfg.h"
27 #include "prox_globals.h"
33 #include "prox_cksum.h"
35 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
38 int prox_nb_active_ports(void)
41 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
42 ret += prox_port_cfg[i].active;
47 int prox_last_port_active(void)
50 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
51 if (prox_port_cfg[i].active) {
58 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
60 struct rte_eth_link link;
62 if (RTE_ETH_EVENT_INTR_LSC != type) {
66 rte_atomic32_inc(&lsc);
69 struct prox_pktmbuf_reinit_args {
70 struct rte_mempool *mp;
71 struct lcore_cfg *lconf;
74 /* standard mbuf initialization procedure */
75 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
77 struct rte_mbuf *mbuf = _m;
79 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
80 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
82 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
83 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
86 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
89 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
91 struct prox_pktmbuf_reinit_args *init_args = arg;
95 obj += init_args->mp->header_size;
96 m = (struct rte_mbuf*)obj;
98 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
101 /* initialize rte devices and check the number of available ports */
102 void init_rte_dev(int use_dummy_devices)
104 uint8_t nb_ports, port_id_max, port_id_last;
105 struct rte_eth_dev_info dev_info;
107 nb_ports = rte_eth_dev_count();
108 /* get available ports configuration */
109 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
111 if (use_dummy_devices) {
112 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0)) && (RTE_VERSION <= RTE_VERSION_NUM(17,5,0,1))
113 nb_ports = prox_last_port_active() + 1;
114 plog_info("Creating %u dummy devices\n", nb_ports);
116 char port_name[32] = "0dummy_dev";
117 for (uint32_t i = 0; i < nb_ports; ++i) {
118 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
122 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
126 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
127 plog_info("\tDPDK has found %u ports\n", nb_ports);
130 if (nb_ports > PROX_MAX_PORTS) {
131 plog_warn("\tWarning: I can deal with at most %u ports."
132 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
134 nb_ports = PROX_MAX_PORTS;
136 port_id_max = nb_ports - 1;
137 port_id_last = prox_last_port_active();
138 PROX_PANIC(port_id_last > port_id_max,
139 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
140 port_id_last, port_id_max);
142 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
143 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
144 /* skip ports that are not enabled */
145 if (!prox_port_cfg[port_id].active) {
148 plog_info("\tGetting info for rte dev %u\n", port_id);
149 rte_eth_dev_info_get(port_id, &dev_info);
150 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
151 port_cfg->socket = -1;
153 port_cfg->max_txq = dev_info.max_tx_queues;
154 port_cfg->max_rxq = dev_info.max_rx_queues;
156 if (!dev_info.pci_dev)
159 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
160 "%04x:%02x:%02x.%1x", dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
161 strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
162 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
164 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
165 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
166 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
167 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
169 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
172 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
176 /* Try to find the device's numa node */
178 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
179 FILE* numa_node_fd = fopen(buf, "r");
181 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
182 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
184 port_cfg->socket = strtol(buf, 0, 0);
185 if (port_cfg->socket == -1) {
186 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
188 fclose(numa_node_fd);
191 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
192 port_cfg->capabilities.tx_offload_cksum |= IPV4_CKSUM;
194 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) {
195 port_cfg->capabilities.tx_offload_cksum |= UDP_CKSUM;
200 /* Create rte ring-backed devices */
201 uint8_t init_rte_ring_dev(void)
203 uint8_t nb_ring_dev = 0;
205 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
206 /* skip ports that are not enabled */
207 if (!prox_port_cfg[port_id].active) {
210 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
211 if (port_cfg->rx_ring[0] != '\0') {
212 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
214 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
215 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
216 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
217 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
219 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
220 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
222 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
231 static void init_port(struct prox_port_cfg *port_cfg)
233 static char dummy_pool_name[] = "0_dummy";
234 struct rte_eth_link link;
238 port_id = port_cfg - prox_port_cfg;
239 plog_info("\t*** Initializing port %u ***\n", port_id);
240 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
241 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
242 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
244 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
245 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
247 if (port_cfg->n_rxq == 0) {
248 /* not receiving on this port */
249 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
251 uint32_t mbuf_size = MBUF_SIZE;
252 if (strcmp(port_cfg->short_name, "vmxnet3") == 0) {
253 mbuf_size = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
255 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
256 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
257 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
259 sizeof(struct rte_pktmbuf_pool_private),
260 rte_pktmbuf_pool_init, NULL,
261 prox_pktmbuf_init, 0,
262 port_cfg->socket, 0);
263 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
264 port_cfg->socket, port_cfg->n_rxd);
265 dummy_pool_name[0]++;
267 // Most pmd do not support setting mtu yet...
268 if (!strcmp(port_cfg->short_name, "ixgbe")) {
269 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
270 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
271 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
274 if (port_cfg->n_txq == 0) {
275 /* not sending on this port */
276 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
281 if (port_cfg->n_rxq > 1) {
282 // Enable RSS if multiple receive queues
283 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
284 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
285 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
286 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
287 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONFRAG_IPV4_UDP;
289 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
293 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
294 port_id, port_cfg->n_rxq, port_cfg->n_txq);
296 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
297 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
299 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
300 !strcmp(port_cfg->short_name, "virtio") ||
301 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
302 !strcmp(port_cfg->short_name, "i40e") ||
304 !strcmp(port_cfg->short_name, "i40e_vf") ||
305 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
306 !strcmp(port_cfg->driver_name, "") || /* NULL device */
307 !strcmp(port_cfg->short_name, "vmxnet3")) {
308 port_cfg->port_conf.intr_conf.lsc = 0;
309 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
312 if (port_cfg->lsc_set_explicitely) {
313 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
314 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
316 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
317 if (port_cfg->n_txd < 512) {
318 // Vmxnet3 driver requires minimum 512 tx descriptors
319 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
320 port_cfg->n_txd = 512;
324 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
325 port_cfg->n_txq, &port_cfg->port_conf);
326 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
328 if (port_cfg->port_conf.intr_conf.lsc) {
329 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
332 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
334 /* initialize RX queues */
335 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
336 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
337 queue_id, port_id, port_cfg->socket,
338 port_cfg->n_rxd, port_cfg->pool[queue_id]);
340 ret = rte_eth_rx_queue_setup(port_id, queue_id,
342 port_cfg->socket, &port_cfg->rx_conf,
343 port_cfg->pool[queue_id]);
345 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
347 if (!strcmp(port_cfg->short_name, "virtio")) {
348 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
349 plog_info("\t\tDisabling TX offloads (virtio does not support TX offloads)\n");
352 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
353 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS | ETH_TXQ_FLAGS_NOMULTSEGS;
354 plog_info("\t\tDisabling TX offloads and multsegs on port %d as vmxnet3 does not support them\n", port_id);
356 /* initialize one TX queue per logical core on each port */
357 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
358 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
359 queue_id, port_cfg->socket, port_cfg->n_txd);
360 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
361 port_cfg->socket, &port_cfg->tx_conf);
362 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
365 plog_info("\t\tStarting up port %u ...", port_id);
366 ret = rte_eth_dev_start(port_id);
368 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
369 plog_info(" done: ");
371 /* Getting link status can be done without waiting if Link
372 State Interrupt is enabled since in that case, if the link
373 is recognized as being down, an interrupt will notify that
375 if (port_cfg->port_conf.intr_conf.lsc)
376 rte_eth_link_get_nowait(port_id, &link);
378 rte_eth_link_get(port_id, &link);
380 port_cfg->link_up = link.link_status;
381 port_cfg->link_speed = link.link_speed;
382 if (link.link_status) {
383 plog_info("Link Up - speed %'u Mbps - %s\n",
385 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
386 "full-duplex" : "half-duplex");
389 plog_info("Link Down\n");
392 if (port_cfg->promiscuous) {
393 rte_eth_promiscuous_enable(port_id);
394 plog_info("\t\tport %u in promiscuous mode\n", port_id);
397 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
398 strcmp(port_cfg->short_name, "i40e") &&
399 strcmp(port_cfg->short_name, "i40e_vf") &&
400 strcmp(port_cfg->short_name, "vmxnet3")) {
401 for (uint8_t i = 0; i < 16; ++i) {
402 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
404 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
406 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
408 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
414 void init_port_all(void)
416 uint8_t max_port_idx = prox_last_port_active() + 1;
418 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
419 if (!prox_port_cfg[portid].active) {
422 init_port(&prox_port_cfg[portid]);
426 void close_ports_atexit(void)
428 uint8_t max_port_idx = prox_last_port_active() + 1;
430 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
431 if (!prox_port_cfg[portid].active) {
434 rte_eth_dev_close(portid);
438 void init_port_addr(void)
440 struct prox_port_cfg *port_cfg;
442 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
443 if (!prox_port_cfg[port_id].active) {
446 port_cfg = &prox_port_cfg[port_id];
448 switch (port_cfg->type) {
449 case PROX_PORT_MAC_HW:
450 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
452 case PROX_PORT_MAC_RAND:
453 eth_random_addr(port_cfg->eth_addr.addr_bytes);
455 case PROX_PORT_MAC_SET:
461 int port_is_active(uint8_t port_id)
463 if (port_id > PROX_MAX_PORTS) {
464 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
468 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
469 if (!port_cfg->active) {
470 plog_info("Port %u is not active\n", port_id);