2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0)) && (RTE_VERSION <= RTE_VERSION_NUM(17,5,0,1))
23 #include <rte_eth_null.h>
26 #include "prox_port_cfg.h"
27 #include "prox_globals.h"
33 #include "prox_cksum.h"
35 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
38 int prox_nb_active_ports(void)
41 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
42 ret += prox_port_cfg[i].active;
47 int prox_last_port_active(void)
50 for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
51 if (prox_port_cfg[i].active) {
58 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
60 struct rte_eth_link link;
62 if (RTE_ETH_EVENT_INTR_LSC != type) {
66 rte_atomic32_inc(&lsc);
69 struct prox_pktmbuf_reinit_args {
70 struct rte_mempool *mp;
71 struct lcore_cfg *lconf;
74 /* standard mbuf initialization procedure */
75 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
77 struct rte_mbuf *mbuf = _m;
79 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
80 mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
82 mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
83 mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
86 rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
89 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
91 struct prox_pktmbuf_reinit_args *init_args = arg;
95 obj += init_args->mp->header_size;
96 m = (struct rte_mbuf*)obj;
98 prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
101 /* initialize rte devices and check the number of available ports */
102 void init_rte_dev(int use_dummy_devices)
104 uint8_t nb_ports, port_id_max, port_id_last;
105 struct rte_eth_dev_info dev_info;
107 nb_ports = rte_eth_dev_count();
108 /* get available ports configuration */
109 PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
111 if (use_dummy_devices) {
112 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0)) && (RTE_VERSION <= RTE_VERSION_NUM(17,5,0,1))
113 nb_ports = prox_last_port_active() + 1;
114 plog_info("Creating %u dummy devices\n", nb_ports);
116 char port_name[32] = "0dummy_dev";
117 for (uint32_t i = 0; i < nb_ports; ++i) {
118 eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
122 PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
126 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
127 plog_info("\tDPDK has found %u ports\n", nb_ports);
130 if (nb_ports > PROX_MAX_PORTS) {
131 plog_warn("\tWarning: I can deal with at most %u ports."
132 " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
134 nb_ports = PROX_MAX_PORTS;
136 port_id_max = nb_ports - 1;
137 port_id_last = prox_last_port_active();
138 PROX_PANIC(port_id_last > port_id_max,
139 "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
140 port_id_last, port_id_max);
142 /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
143 for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
144 /* skip ports that are not enabled */
145 if (!prox_port_cfg[port_id].active) {
148 plog_info("\tGetting info for rte dev %u\n", port_id);
149 rte_eth_dev_info_get(port_id, &dev_info);
150 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
151 port_cfg->socket = -1;
153 port_cfg->max_txq = dev_info.max_tx_queues;
154 port_cfg->max_rxq = dev_info.max_rx_queues;
156 if (!dev_info.pci_dev)
159 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
160 "%04x:%02x:%02x.%1x", dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus, dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
161 strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
162 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
164 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
165 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
166 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
167 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
169 strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
172 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
176 /* Try to find the device's numa node */
178 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
179 FILE* numa_node_fd = fopen(buf, "r");
181 if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
182 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
184 port_cfg->socket = strtol(buf, 0, 0);
185 if (port_cfg->socket == -1) {
186 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
188 fclose(numa_node_fd);
191 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
192 port_cfg->capabilities.tx_offload_cksum |= IPV4_CKSUM;
194 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) {
195 port_cfg->capabilities.tx_offload_cksum |= UDP_CKSUM;
200 /* Create rte ring-backed devices */
201 uint8_t init_rte_ring_dev(void)
203 uint8_t nb_ring_dev = 0;
205 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
206 /* skip ports that are not enabled */
207 if (!prox_port_cfg[port_id].active) {
210 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
211 if (port_cfg->rx_ring[0] != '\0') {
212 plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
214 struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
215 PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
216 struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
217 PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
219 int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
220 PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
222 port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
231 static void init_port(struct prox_port_cfg *port_cfg)
233 static char dummy_pool_name[] = "0_dummy";
234 struct rte_eth_link link;
238 port_id = port_cfg - prox_port_cfg;
239 plog_info("\t*** Initializing port %u ***\n", port_id);
240 plog_info("\t\tPort name is set to %s\n", port_cfg->name);
241 plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
242 plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
244 PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
245 "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
247 if (port_cfg->n_rxq == 0) {
248 /* not receiving on this port */
249 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
251 uint32_t mbuf_size = MBUF_SIZE;
252 if (strcmp(port_cfg->short_name, "vmxnet3") == 0) {
253 mbuf_size = MBUF_SIZE + RTE_PKTMBUF_HEADROOM;
255 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
256 port_cfg->socket, port_cfg->n_rxd, mbuf_size);
257 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
259 sizeof(struct rte_pktmbuf_pool_private),
260 rte_pktmbuf_pool_init, NULL,
261 prox_pktmbuf_init, 0,
262 port_cfg->socket, 0);
263 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
264 port_cfg->socket, port_cfg->n_rxd);
265 dummy_pool_name[0]++;
267 // Most pmd do not support setting mtu yet...
268 if (!strcmp(port_cfg->short_name, "ixgbe")) {
269 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
270 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
271 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
274 if (port_cfg->n_txq == 0) {
275 /* not sending on this port */
276 plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
281 if (port_cfg->n_rxq > 1) {
282 // Enable RSS if multiple receive queues
283 port_cfg->port_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
284 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key = toeplitz_init_key;
285 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len = TOEPLITZ_KEY_LEN;
286 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
287 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONFRAG_IPV4_UDP;
289 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
293 plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
294 port_id, port_cfg->n_rxq, port_cfg->n_txq);
296 PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
297 PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
299 if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
300 !strcmp(port_cfg->short_name, "virtio") ||
301 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
302 !strcmp(port_cfg->short_name, "i40e") ||
304 !strcmp(port_cfg->short_name, "i40e_vf") ||
305 !strcmp(port_cfg->driver_name, "") || /* NULL device */
306 !strcmp(port_cfg->short_name, "vmxnet3")) {
307 port_cfg->port_conf.intr_conf.lsc = 0;
308 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
311 if (port_cfg->lsc_set_explicitely) {
312 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
313 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
315 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
316 if (port_cfg->n_txd < 512) {
317 // Vmxnet3 driver requires minimum 512 tx descriptors
318 plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
319 port_cfg->n_txd = 512;
323 ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
324 port_cfg->n_txq, &port_cfg->port_conf);
325 PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
327 if (port_cfg->port_conf.intr_conf.lsc) {
328 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
331 plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
333 /* initialize RX queues */
334 for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
335 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
336 queue_id, port_id, port_cfg->socket,
337 port_cfg->n_rxd, port_cfg->pool[queue_id]);
339 ret = rte_eth_rx_queue_setup(port_id, queue_id,
341 port_cfg->socket, &port_cfg->rx_conf,
342 port_cfg->pool[queue_id]);
344 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
346 if (!strcmp(port_cfg->short_name, "virtio")) {
347 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
348 plog_info("\t\tDisabling TX offloads (virtio does not support TX offloads)\n");
351 if (!strcmp(port_cfg->short_name, "vmxnet3")) {
352 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS | ETH_TXQ_FLAGS_NOMULTSEGS;
353 plog_info("\t\tDisabling TX offloads and multsegs on port %d as vmxnet3 does not support them\n", port_id);
355 /* initialize one TX queue per logical core on each port */
356 for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
357 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
358 queue_id, port_cfg->socket, port_cfg->n_txd);
359 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
360 port_cfg->socket, &port_cfg->tx_conf);
361 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
364 plog_info("\t\tStarting up port %u ...", port_id);
365 ret = rte_eth_dev_start(port_id);
367 PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
368 plog_info(" done: ");
370 /* Getting link status can be done without waiting if Link
371 State Interrupt is enabled since in that case, if the link
372 is recognized as being down, an interrupt will notify that
374 if (port_cfg->port_conf.intr_conf.lsc)
375 rte_eth_link_get_nowait(port_id, &link);
377 rte_eth_link_get(port_id, &link);
379 port_cfg->link_up = link.link_status;
380 port_cfg->link_speed = link.link_speed;
381 if (link.link_status) {
382 plog_info("Link Up - speed %'u Mbps - %s\n",
384 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
385 "full-duplex" : "half-duplex");
388 plog_info("Link Down\n");
391 if (port_cfg->promiscuous) {
392 rte_eth_promiscuous_enable(port_id);
393 plog_info("\t\tport %u in promiscuous mode\n", port_id);
396 if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
397 strcmp(port_cfg->short_name, "i40e") &&
398 strcmp(port_cfg->short_name, "i40e_vf") &&
399 strcmp(port_cfg->short_name, "vmxnet3")) {
400 for (uint8_t i = 0; i < 16; ++i) {
401 ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
403 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
405 ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
407 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
413 void init_port_all(void)
415 uint8_t max_port_idx = prox_last_port_active() + 1;
417 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
418 if (!prox_port_cfg[portid].active) {
421 init_port(&prox_port_cfg[portid]);
425 void close_ports_atexit(void)
427 uint8_t max_port_idx = prox_last_port_active() + 1;
429 for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
430 if (!prox_port_cfg[portid].active) {
433 rte_eth_dev_close(portid);
437 void init_port_addr(void)
439 struct prox_port_cfg *port_cfg;
441 for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
442 if (!prox_port_cfg[port_id].active) {
445 port_cfg = &prox_port_cfg[port_id];
447 switch (port_cfg->type) {
448 case PROX_PORT_MAC_HW:
449 rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
451 case PROX_PORT_MAC_RAND:
452 eth_random_addr(port_cfg->eth_addr.addr_bytes);
454 case PROX_PORT_MAC_SET:
460 int port_is_active(uint8_t port_id)
462 if (port_id > PROX_MAX_PORTS) {
463 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
467 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
468 if (!port_cfg->active) {
469 plog_info("Port %u is not active\n", port_id);