2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0)) && (RTE_VERSION <= RTE_VERSION_NUM(17,5,0,1))
23 #include <rte_eth_null.h>
26 #include "prox_port_cfg.h"
27 #include "prox_globals.h"
33 #include "prox_cksum.h"
35 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
38 int prox_nb_active_ports(void)
41 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
42 ret += prox_port_cfg[i].active;
47 int prox_last_port_active(void)
50 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
51 if (prox_port_cfg[i].active) {
58 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
59 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
60 __attribute__((unused)) void *ret_param)
62 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
65 if (RTE_ETH_EVENT_INTR_LSC != type) {
66 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
73 rte_atomic32_inc(&lsc);
75 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
80 struct prox_pktmbuf_reinit_args {
81 struct rte_mempool *mp;
82 struct lcore_cfg *lconf;
85 /* standard mbuf initialization procedure */
86 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
88 struct rte_mbuf *mbuf = _m;
90 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
91 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
93 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
94 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
97 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
100 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
102 struct prox_pktmbuf_reinit_args *init_args = arg;
106 obj += init_args->mp->header_size;
107 m = (struct rte_mbuf*)obj;
109 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
112 /* initialize rte devices and check the number of available ports */
113 void init_rte_dev(int use_dummy_devices)
115 uint8_t nb_ports, port_id_max, port_id_last;
116 struct rte_eth_dev_info dev_info;
118 nb_ports = rte_eth_dev_count();
119 /* get available ports configuration */
120 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
122 if (use_dummy_devices) {
123 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0)) && (RTE_VERSION <= RTE_VERSION_NUM(17,5,0,1))
124 nb_ports = prox_last_port_active() + 1;
125 plog_info("Creating %u dummy devices\n", nb_ports);
127 char port_name[32] = "0dummy_dev";
128 for (uint32_t i = 0; i < nb_ports; ++i) {
129 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
133 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
137 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
138 plog_info("\tDPDK has found %u ports\n", nb_ports);
141 if (nb_ports > PROX_MAX_PORTS) {
142 plog_warn("\tWarning: I can deal with at most %u ports."
143 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
145 nb_ports = PROX_MAX_PORTS;
147 port_id_max = nb_ports - 1;
148 port_id_last = prox_last_port_active();
149 PROX_PANIC(port_id_last > port_id_max,
150 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
151 port_id_last, port_id_max);
153 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
154 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
155 /* skip ports that are not enabled */
156 if (!prox_port_cfg[port_id].active) {
159 plog_info("\tGetting info for rte dev %u\n", port_id);
160 rte_eth_dev_info_get(port_id, &dev_info);
161 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
162 port_cfg->socket = -1;
164 port_cfg->max_txq = dev_info.max_tx_queues;
165 port_cfg->max_rxq = dev_info.max_rx_queues;
167 if (!dev_info.pci_dev)
170 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
171 "%04x:%02x:%02x.%1x", dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
172 strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
173 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
175 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
176 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
177 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
178 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
180 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
183 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
187 /* Try to find the device's numa node */
189 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
190 FILE* numa_node_fd = fopen(buf, "r");
192 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
193 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
195 port_cfg->socket = strtol(buf, 0, 0);
196 if (port_cfg->socket == -1) {
197 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
199 fclose(numa_node_fd);
202 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
203 port_cfg->capabilities.tx_offload_cksum |= IPV4_CKSUM;
205 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) {
206 port_cfg->capabilities.tx_offload_cksum |= UDP_CKSUM;
211 /* Create rte ring-backed devices */
212 uint8_t init_rte_ring_dev(void)
214 uint8_t nb_ring_dev = 0;
216 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
217 /* skip ports that are not enabled */
218 if (!prox_port_cfg[port_id].active) {
221 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
222 if (port_cfg->rx_ring[0] != '\0') {
223 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
225 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
226 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
227 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
228 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
230 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
231 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
233 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
242 static void init_port(struct prox_port_cfg *port_cfg)
244 static char dummy_pool_name[] = "0_dummy";
245 struct rte_eth_link link;
249 port_id = port_cfg - prox_port_cfg;
250 plog_info("\t*** Initializing port %u ***\n", port_id);
251 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
252 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
253 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
255 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
256 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
258 if (port_cfg->n_rxq == 0) {
259 /* not receiving on this port */
260 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
262 uint32_t mbuf_size = MBUF_SIZE;
263 if (strcmp(port_cfg->short_name, "vmxnet3") == 0) {
264 mbuf_size = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
266 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
267 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
268 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
270 sizeof(struct rte_pktmbuf_pool_private),
271 rte_pktmbuf_pool_init, NULL,
272 prox_pktmbuf_init, 0,
273 port_cfg->socket, 0);
274 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
275 port_cfg->socket, port_cfg->n_rxd);
276 dummy_pool_name[0]++;
278 // Most pmd do not support setting mtu yet...
279 if (!strcmp(port_cfg->short_name, "ixgbe")) {
280 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
281 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
282 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
285 if (port_cfg->n_txq == 0) {
286 /* not sending on this port */
287 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
292 if (port_cfg->n_rxq > 1) {
293 // Enable RSS if multiple receive queues
294 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
295 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
296 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
297 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
298 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONFRAG_IPV4_UDP;
300 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
304 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
305 port_id, port_cfg->n_rxq, port_cfg->n_txq);
307 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
308 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
310 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
311 !strcmp(port_cfg->short_name, "virtio") ||
312 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
313 !strcmp(port_cfg->short_name, "i40e") ||
315 !strcmp(port_cfg->short_name, "i40e_vf") ||
316 !strcmp(port_cfg->short_name, "avp") || /* Wind River */
317 !strcmp(port_cfg->driver_name, "") || /* NULL device */
318 !strcmp(port_cfg->short_name, "vmxnet3")) {
319 port_cfg->port_conf.intr_conf.lsc = 0;
320 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
323 if (port_cfg->lsc_set_explicitely) {
324 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
325 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
327 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
328 if (port_cfg->n_txd < 512) {
329 // Vmxnet3 driver requires minimum 512 tx descriptors
330 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
331 port_cfg->n_txd = 512;
335 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
336 port_cfg->n_txq, &port_cfg->port_conf);
337 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
339 if (port_cfg->port_conf.intr_conf.lsc) {
340 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
343 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
345 /* initialize RX queues */
346 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
347 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
348 queue_id, port_id, port_cfg->socket,
349 port_cfg->n_rxd, port_cfg->pool[queue_id]);
351 ret = rte_eth_rx_queue_setup(port_id, queue_id,
353 port_cfg->socket, &port_cfg->rx_conf,
354 port_cfg->pool[queue_id]);
356 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
358 if (!strcmp(port_cfg->short_name, "virtio")) {
359 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
360 plog_info("\t\tDisabling TX offloads (virtio does not support TX offloads)\n");
363 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
364 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS | ETH_TXQ_FLAGS_NOMULTSEGS;
365 plog_info("\t\tDisabling TX offloads and multsegs on port %d as vmxnet3 does not support them\n", port_id);
367 /* initialize one TX queue per logical core on each port */
368 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
369 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
370 queue_id, port_cfg->socket, port_cfg->n_txd);
371 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
372 port_cfg->socket, &port_cfg->tx_conf);
373 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
376 plog_info("\t\tStarting up port %u ...", port_id);
377 ret = rte_eth_dev_start(port_id);
379 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
380 plog_info(" done: ");
382 /* Getting link status can be done without waiting if Link
383 State Interrupt is enabled since in that case, if the link
384 is recognized as being down, an interrupt will notify that
386 if (port_cfg->port_conf.intr_conf.lsc)
387 rte_eth_link_get_nowait(port_id, &link);
389 rte_eth_link_get(port_id, &link);
391 port_cfg->link_up = link.link_status;
392 port_cfg->link_speed = link.link_speed;
393 if (link.link_status) {
394 plog_info("Link Up - speed %'u Mbps - %s\n",
396 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
397 "full-duplex" : "half-duplex");
400 plog_info("Link Down\n");
403 if (port_cfg->promiscuous) {
404 rte_eth_promiscuous_enable(port_id);
405 plog_info("\t\tport %u in promiscuous mode\n", port_id);
408 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
409 strcmp(port_cfg->short_name, "i40e") &&
410 strcmp(port_cfg->short_name, "i40e_vf") &&
411 strcmp(port_cfg->short_name, "vmxnet3")) {
412 for (uint8_t i = 0; i < 16; ++i) {
413 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
415 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
417 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
419 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
425 void init_port_all(void)
427 uint8_t max_port_idx = prox_last_port_active() + 1;
429 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
430 if (!prox_port_cfg[portid].active) {
433 init_port(&prox_port_cfg[portid]);
437 void close_ports_atexit(void)
439 uint8_t max_port_idx = prox_last_port_active() + 1;
441 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
442 if (!prox_port_cfg[portid].active) {
445 rte_eth_dev_close(portid);
449 void init_port_addr(void)
451 struct prox_port_cfg *port_cfg;
453 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
454 if (!prox_port_cfg[port_id].active) {
457 port_cfg = &prox_port_cfg[port_id];
459 switch (port_cfg->type) {
460 case PROX_PORT_MAC_HW:
461 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
463 case PROX_PORT_MAC_RAND:
464 eth_random_addr(port_cfg->eth_addr.addr_bytes);
466 case PROX_PORT_MAC_SET:
472 int port_is_active(uint8_t port_id)
474 if (port_id > PROX_MAX_PORTS) {
475 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
479 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
480 if (!port_cfg->active) {
481 plog_info("Port %u is not active\n", port_id);