9af141f2212ae8e0c096fbe0ba17e872543f2a9b
[samplevnf.git] / VNFs / DPPD-PROX / prox_port_cfg.c
1 /*
2 // Copyright (c) 2010-2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <string.h>
18 #include <stdio.h>
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
21 #include <rte_mbuf.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
24 #else
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
26 #include <rte_dev.h>
27 #else
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
30 #endif
31 #endif
32 #endif
33
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
36 #include "log.h"
37 #include "quit.h"
38 #include "defaults.h"
39 #include "toeplitz.h"
40 #include "defines.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
43 #include "prox_compat.h"
44 #include "rte_ethdev.h"
45
46 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
47 rte_atomic32_t lsc;
48
49 int prox_nb_active_ports(void)
50 {
51         int ret = 0;
52         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
53                 ret += prox_port_cfg[i].active;
54         }
55         return ret;
56 }
57
58 int prox_last_port_active(void)
59 {
60         int ret = -1;
61         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
62                 if (prox_port_cfg[i].active) {
63                         ret = i;
64                 }
65         }
66         return ret;
67 }
68
69 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
70 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
71         __attribute__((unused)) void *ret_param)
72 #else
73 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
74 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
75         __attribute__((unused)) void *ret_param)
76 #else
77 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
78 #endif
79 #endif
80 {
81         if (RTE_ETH_EVENT_INTR_LSC != type) {
82 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
83                 return -1;
84 #else
85                 return;
86 #endif
87         }
88
89         rte_atomic32_inc(&lsc);
90
91 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
92         return 0;
93 #endif
94 }
95
96 struct prox_pktmbuf_reinit_args {
97         struct rte_mempool *mp;
98         struct lcore_cfg   *lconf;
99 };
100
101 /* standard mbuf initialization procedure */
102 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
103 {
104         struct rte_mbuf *mbuf = _m;
105
106 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
107         mbuf->tx_offload = CALC_TX_OL(sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr));
108 #else
109         mbuf->pkt.vlan_macip.f.l2_len = sizeof(prox_rte_ether_hdr);
110         mbuf->pkt.vlan_macip.f.l3_len = sizeof(prox_rte_ipv4_hdr);
111 #endif
112
113         rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
114 }
115
116 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
117 {
118         struct prox_pktmbuf_reinit_args *init_args = arg;
119         struct rte_mbuf *m;
120         char* obj = start;
121
122         obj += init_args->mp->header_size;
123         m = (struct rte_mbuf*)obj;
124
125         prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
126 }
127
128 #define CONFIGURE_TX_OFFLOAD(flag)                                           \
129         if (port_cfg->requested_tx_offload & flag)                              {\
130                 if (port_cfg->disabled_tx_offload & flag)                       {\
131                         plog_info("\t\t%s disabled by configuration\n", #flag);\
132                         port_cfg->requested_tx_offload &= ~flag;\
133                 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
134                         port_cfg->port_conf.txmode.offloads |= flag;\
135                         plog_info("\t\t%s enabled on port\n", #flag);\
136                 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
137                         port_cfg->tx_conf.offloads |= flag;\
138                         plog_info("\t\t%s enabled on queue\n", #flag);\
139                 } else {\
140                         port_cfg->requested_tx_offload &= ~flag;\
141                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
142                 }\
143         } else {\
144                 plog_info("\t\t%s disabled\n", #flag);\
145         }\
146
147 #define CONFIGURE_RX_OFFLOAD(flag)                                           \
148         if (port_cfg->requested_rx_offload & flag)                              {\
149                 if (port_cfg->dev_info.rx_offload_capa & flag) {\
150                         port_cfg->port_conf.rxmode.offloads |= flag;\
151                         plog_info("\t\t%s enabled on port\n", #flag);\
152                 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
153                         port_cfg->rx_conf.offloads |= flag;\
154                         plog_info("\t\t%s enabled on queue\n", #flag);\
155                 } else {\
156                         port_cfg->requested_rx_offload &= ~flag;\
157                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
158                 }\
159         } else {\
160                 plog_info("\t\t%s disabled\n", #flag);\
161         }\
162
163
164 /* initialize rte devices and check the number of available ports */
165 void init_rte_dev(int use_dummy_devices)
166 {
167         uint8_t nb_ports, port_id_max;
168         int port_id_last;
169         struct rte_eth_dev_info dev_info;
170         const struct rte_pci_device *pci_dev;
171
172         nb_ports = rte_eth_dev_count();
173         /* get available ports configuration */
174         PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
175
176         if (use_dummy_devices) {
177 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
178                 nb_ports = prox_last_port_active() + 1;
179                 plog_info("Creating %u dummy devices\n", nb_ports);
180
181                 char port_name[32] = "0dummy_dev";
182                 for (uint32_t i = 0; i < nb_ports; ++i) {
183 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
184                         rte_vdev_init(port_name, "size=64,copy=0");
185 #else
186                         eth_dev_null_create(port_name, 0, PROX_RTE_ETHER_MIN_LEN, 0);
187 #endif
188                         port_name[0]++;
189                 }
190 #else
191         PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
192 #endif
193         }
194         else if (prox_last_port_active() != -1) {
195                 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
196                 plog_info("\tDPDK has found %u ports\n", nb_ports);
197         }
198
199         if (nb_ports > PROX_MAX_PORTS) {
200                 plog_warn("\tWarning: I can deal with at most %u ports."
201                         " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
202
203                 nb_ports = PROX_MAX_PORTS;
204         }
205         port_id_max = nb_ports - 1;
206         port_id_last = prox_last_port_active();
207         PROX_PANIC(port_id_last > port_id_max,
208                    "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
209                    port_id_last, port_id_max);
210
211         /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
212         for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
213                 /* skip ports that are not enabled */
214                 if (!prox_port_cfg[port_id].active) {
215                         continue;
216                 }
217                 plog_info("\tGetting info for rte dev %u\n", port_id);
218                 rte_eth_dev_info_get(port_id, &dev_info);
219                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
220                 port_cfg->socket = -1;
221
222                 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
223                 port_cfg->max_txq = dev_info.max_tx_queues;
224                 port_cfg->max_rxq = dev_info.max_rx_queues;
225                 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
226                 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
227                 port_cfg->min_tx_desc = dev_info.tx_desc_lim.nb_min;
228                 port_cfg->max_tx_desc = dev_info.tx_desc_lim.nb_max;
229                 port_cfg->min_rx_desc = dev_info.rx_desc_lim.nb_min;
230                 port_cfg->max_rx_desc = dev_info.rx_desc_lim.nb_max;
231
232                 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
233                 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
234                 plog_info("\tPort %u : %d<=nb_tx_desc<=%d %d<=nb_rx_desc<=%d\n", port_id, port_cfg->min_tx_desc, port_cfg->max_tx_desc, port_cfg->min_rx_desc, port_cfg->max_rx_desc);
235
236                 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
237                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
238                 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
239                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
240                 } else {
241                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
242                 }
243                 char *ptr;
244                 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
245                         *ptr = '\x0';
246                 }
247
248 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
249                 pci_dev = dev_info.pci_dev;
250 #else
251                 if (!dev_info.device)
252                         continue;
253                 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
254 #endif
255                 if (!pci_dev)
256                         continue;
257
258                 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
259                          "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
260                 /* Try to find the device's numa node */
261                 char buf[1024];
262                 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
263                 FILE* numa_node_fd = fopen(buf, "r");
264                 if (numa_node_fd) {
265                         if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
266                                 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
267                         }
268                         port_cfg->socket = strtol(buf, 0, 0);
269                         if (port_cfg->socket == -1) {
270                                 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
271                         }
272                         fclose(numa_node_fd);
273                 }
274
275                 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
276                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
277                         plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
278                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
279                 }
280                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
281                         plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
282                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
283                 }
284                 // Some OVS versions reports that they support UDP offload and no IPv4 offload, but fails when UDP offload is enabled
285                 if ((!strcmp(port_cfg->short_name, "virtio")) &&
286                         ((port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) == 0) &&
287                         (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
288                         plog_info("\t\tDisabling UDP cksum on virtio\n");
289                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
290                 }
291         }
292 }
293
294 /* Create rte ring-backed devices */
295 uint8_t init_rte_ring_dev(void)
296 {
297         uint8_t nb_ring_dev = 0;
298
299         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
300                 /* skip ports that are not enabled */
301                 if (!prox_port_cfg[port_id].active) {
302                         continue;
303                 }
304                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
305                 if (port_cfg->rx_ring[0] != '\0') {
306                         plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
307
308                         struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
309                         PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
310                         struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
311                         PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
312
313                         int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
314                         PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
315
316                         port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
317
318                         nb_ring_dev++;
319                 }
320         }
321
322         return nb_ring_dev;
323 }
324
325 static void print_port_capa(struct prox_port_cfg *port_cfg)
326 {
327         uint8_t port_id;
328
329         port_id = port_cfg - prox_port_cfg;
330         plog_info("\t*** Initializing port %u ***\n", port_id);
331         plog_info("\t\tPort name is set to %s\n", port_cfg->name);
332         plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
333         plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
334 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
335         plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
336 #endif
337         if (port_cfg->max_link_speed != UINT32_MAX) {
338                 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
339         }
340
341 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
342         plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
343         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
344                 plog_info("VLAN STRIP | ");
345         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
346                 plog_info("IPV4 CKSUM | ");
347         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
348                 plog_info("UDP CKSUM | ");
349         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
350                 plog_info("TCP CKSUM | ");
351         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
352                 plog_info("TCP LRO | ");
353         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
354                 plog_info("QINQ STRIP | ");
355         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
356                 plog_info("OUTER_IPV4_CKSUM | ");
357         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
358                 plog_info("MACSEC STRIP | ");
359         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
360                 plog_info("HEADER SPLIT | ");
361         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
362                 plog_info("VLAN FILTER | ");
363         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
364                 plog_info("VLAN EXTEND | ");
365         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
366                 plog_info("JUMBO FRAME | ");
367 #if defined(DEV_RX_OFFLOAD_CRC_STRIP)
368         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
369                 plog_info("CRC STRIP | ");
370 #endif
371 #if defined(DEV_RX_OFFLOAD_KEEP_CRC)
372         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)
373                 plog_info("KEEP CRC | ");
374 #endif
375         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
376                 plog_info("SCATTER | ");
377         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
378                 plog_info("TIMESTAMP | ");
379         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
380                 plog_info("SECURITY ");
381         plog_info("\n");
382
383         plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
384         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
385                 plog_info("VLAN INSERT | ");
386         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
387                 plog_info("IPV4 CKSUM | ");
388         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
389                 plog_info("UDP CKSUM | ");
390         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
391                 plog_info("TCP CKSUM | ");
392         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
393                 plog_info("SCTP CKSUM | ");
394         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
395                 plog_info("TCP TS0 | ");
396         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
397                 plog_info("UDP TSO | ");
398         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
399                 plog_info("OUTER IPV4 CKSUM | ");
400         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
401                 plog_info("QINQ INSERT | ");
402         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
403                 plog_info("VLAN TNL TSO | ");
404         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
405                 plog_info("GRE TNL TSO | ");
406         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
407                 plog_info("IPIP TNL TSO | ");
408         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
409                 plog_info("GENEVE TNL TSO | ");
410         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
411                 plog_info("MACSEC INSERT | ");
412         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
413                 plog_info("MT LOCKFREE | ");
414         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
415                 plog_info("MULTI SEG | ");
416         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
417                 plog_info("SECURITY | ");
418         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
419                 plog_info("UDP TNL TSO | ");
420         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
421                 plog_info("IP TNL TSO | ");
422         plog_info("\n");
423
424         plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
425         plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
426         plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
427         plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
428         plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
429 #endif
430 }
431
432 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
433 {
434         port_cfg->max_link_speed = UINT32_MAX;
435
436 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
437         // virtio and vmxnet3 reports fake max_link_speed
438         if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
439                 // Get link_speed from highest capability from the port
440                 // This will be used by gen and lat for extrapolation purposes
441                 // The negotiated link_speed (as reported by rte_eth_link_get
442                 // or rte_eth_link_get_nowait) might be reported too late
443                 // and might result in wrong exrapolation, and hence should not be used
444                 // for extrapolation purposes
445                 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
446                         port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
447                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
448                         port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
449                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
450                         port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
451                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
452                         port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
453                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
454                         port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
455                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
456                         port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
457                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
458                         port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
459                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
460                         port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
461                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
462                         port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
463                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
464                         port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
465                 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
466                         port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
467                 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
468                         port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
469
470         }
471 #endif
472 }
473
474 static void init_port(struct prox_port_cfg *port_cfg)
475 {
476         static char dummy_pool_name[] = "0_dummy";
477         struct rte_eth_link link;
478         uint8_t port_id;
479         int ret;
480
481         get_max_link_speed(port_cfg);
482         print_port_capa(port_cfg);
483         port_id = port_cfg - prox_port_cfg;
484         PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
485                    "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
486
487         if (port_cfg->n_rxq == 0) {
488                 /* not receiving on this port */
489                 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
490                 port_cfg->n_rxq = 1;
491                 uint32_t mbuf_size = TX_MBUF_SIZE;
492                 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
493                         mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
494
495                 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
496                           port_cfg->socket, port_cfg->n_rxd, mbuf_size);
497                 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
498                                                        0,
499                                                        sizeof(struct rte_pktmbuf_pool_private),
500                                                        rte_pktmbuf_pool_init, NULL,
501                                                        prox_pktmbuf_init, 0,
502                                                        port_cfg->socket, 0);
503                 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
504                            port_cfg->socket, port_cfg->n_rxd);
505                 dummy_pool_name[0]++;
506         } else {
507                 // Most pmd should now support setting mtu
508                 if (port_cfg->mtu + PROX_RTE_ETHER_HDR_LEN + PROX_RTE_ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
509                         plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
510                         port_cfg->mtu = port_cfg->max_rx_pkt_len;
511                 }
512                 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
513                 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
514                 if (ret)
515                         plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
516
517                 if (port_cfg->n_txq == 0) {
518                         /* not sending on this port */
519                         plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
520                         port_cfg->n_txq = 1;
521                 }
522         }
523
524         if (port_cfg->n_rxq > 1)  {
525                 // Enable RSS if multiple receive queues
526                 port_cfg->port_conf.rxmode.mq_mode                      |= ETH_MQ_RX_RSS;
527                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key        = toeplitz_init_key;
528                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len    = TOEPLITZ_KEY_LEN;
529 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
530                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IP|ETH_RSS_UDP;
531 #else
532                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
533 #endif
534         }
535
536         // Make sure that the requested RSS offload is supported by the PMD
537 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
538         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
539 #endif
540         plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
541
542         // rxmode such as hw src strip
543 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
544 #if defined (DEV_RX_OFFLOAD_CRC_STRIP)
545         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
546 #endif
547 #if defined (DEV_RX_OFFLOAD_KEEP_CRC)
548         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_KEEP_CRC);
549 #endif
550         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
551         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
552 #else
553         if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
554                 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
555         }
556         if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
557                 port_cfg->port_conf.rxmode.jumbo_frame = 1;
558         }
559 #endif
560
561         // IPV4, UDP, SCTP Checksums
562 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
563         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
564         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
565         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
566 #else
567         if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
568                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
569                 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
570         }
571         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
572                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
573                 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
574         }
575 #endif
576         // Multi Segments
577 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
578         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
579 #else
580         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
581                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
582                 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
583         } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
584                 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
585         else
586                 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
587
588         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
589                 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
590         else
591                 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
592 #endif
593
594         // Refcount
595 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
596         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
597 #else
598         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
599                 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
600         else
601                 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
602 #endif
603
604         plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
605                   port_id, port_cfg->n_rxq, port_cfg->n_txq);
606
607         PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
608         PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
609
610         if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
611             !strcmp(port_cfg->short_name, "virtio") ||
612 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
613             !strcmp(port_cfg->short_name, "i40e") ||
614 #endif
615             !strcmp(port_cfg->short_name, "i40e_vf") ||
616             !strcmp(port_cfg->short_name, "avp") || /* Wind River */
617             !strcmp(port_cfg->driver_name, "") || /* NULL device */
618             !strcmp(port_cfg->short_name, "vmxnet3")) {
619                 port_cfg->port_conf.intr_conf.lsc = 0;
620                 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
621         }
622
623         if (port_cfg->lsc_set_explicitely) {
624                 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
625                 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
626         }
627         if (port_cfg->n_txd < port_cfg->min_tx_desc) {
628                 plog_info("\t\tNumber of TX descriptors is set to %d (minimum required for %s\n", port_cfg->min_tx_desc, port_cfg->short_name);
629                 port_cfg->n_txd = port_cfg->min_tx_desc;
630         }
631
632         if (port_cfg->n_rxd < port_cfg->min_rx_desc) {
633                 plog_info("\t\tNumber of RX descriptors is set to %d (minimum required for %s\n", port_cfg->min_rx_desc, port_cfg->short_name);
634                 port_cfg->n_rxd = port_cfg->min_rx_desc;
635         }
636
637         if (port_cfg->n_txd > port_cfg->max_tx_desc) {
638                 plog_info("\t\tNumber of TX descriptors is set to %d (maximum required for %s\n", port_cfg->max_tx_desc, port_cfg->short_name);
639                 port_cfg->n_txd = port_cfg->max_tx_desc;
640         }
641
642         if (port_cfg->n_rxd > port_cfg->max_rx_desc) {
643                 plog_info("\t\tNumber of RX descriptors is set to %d (maximum required for %s\n", port_cfg->max_rx_desc, port_cfg->short_name);
644                 port_cfg->n_rxd = port_cfg->max_rx_desc;
645         }
646
647         ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
648                                     port_cfg->n_txq, &port_cfg->port_conf);
649         PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
650
651         if (port_cfg->port_conf.intr_conf.lsc) {
652                 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
653         }
654
655         plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
656
657         /* initialize TX queues first */
658         for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
659                 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
660                           queue_id, port_cfg->socket, port_cfg->n_txd);
661                 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
662                                              port_cfg->socket, &port_cfg->tx_conf);
663                 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
664         }
665
666         /* initialize RX queues */
667         for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
668                 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
669                           queue_id, port_id, port_cfg->socket,
670                           port_cfg->n_rxd, port_cfg->pool[queue_id]);
671                 ret = rte_eth_rx_queue_setup(port_id, queue_id,
672                                              port_cfg->n_rxd,
673                                              port_cfg->socket, &port_cfg->rx_conf,
674                                              port_cfg->pool[queue_id]);
675                 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
676         }
677
678         plog_info("\t\tStarting up port %u ...", port_id);
679         ret = rte_eth_dev_start(port_id);
680
681         PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
682         plog_info(" done: ");
683
684         /* Getting link status can be done without waiting if Link
685            State Interrupt is enabled since in that case, if the link
686            is recognized as being down, an interrupt will notify that
687            it has gone up. */
688         if (port_cfg->port_conf.intr_conf.lsc)
689                 rte_eth_link_get_nowait(port_id, &link);
690         else
691                 rte_eth_link_get(port_id, &link);
692
693         port_cfg->link_up = link.link_status;
694         port_cfg->link_speed = link.link_speed;
695
696         if (link.link_status) {
697                 plog_info("Link Up - speed %'u Mbps - %s\n",
698                           link.link_speed,
699                           (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
700                           "full-duplex" : "half-duplex");
701         }
702         else {
703                 plog_info("Link Down\n");
704         }
705
706         if (port_cfg->promiscuous) {
707                 rte_eth_promiscuous_enable(port_id);
708                 plog_info("\t\tport %u in promiscuous mode\n", port_id);
709         }
710
711         if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
712             strcmp(port_cfg->short_name, "i40e") &&
713             strcmp(port_cfg->short_name, "i40e_vf") &&
714             strcmp(port_cfg->short_name, "vmxnet3")) {
715                 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
716                         ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
717                         if (ret) {
718                                 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
719                         }
720                 }
721                 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
722                         ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
723                         if (ret) {
724                                 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
725                         }
726                 }
727         }
728         if (port_cfg->nb_mc_addr) {
729                 rte_eth_allmulticast_enable(port_id);
730                 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
731                         plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
732                         port_cfg->nb_mc_addr = 0;
733                         rte_eth_allmulticast_disable(port_id);
734                         plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
735                 } else {
736                         plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
737                         plog_info("\t\tport %u in multicast mode\n", port_id);
738                 }
739         }
740 }
741
742 void init_port_all(void)
743 {
744         uint8_t max_port_idx = prox_last_port_active() + 1;
745
746         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
747                 if (!prox_port_cfg[portid].active) {
748                         continue;
749                 }
750                 init_port(&prox_port_cfg[portid]);
751         }
752 }
753
754 void close_ports_atexit(void)
755 {
756         uint8_t max_port_idx = prox_last_port_active() + 1;
757
758         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
759                 if (!prox_port_cfg[portid].active) {
760                         continue;
761                 }
762                 rte_eth_dev_close(portid);
763         }
764 }
765
766 void init_port_addr(void)
767 {
768         struct prox_port_cfg *port_cfg;
769         int rc;
770
771         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
772                 if (!prox_port_cfg[port_id].active) {
773                         continue;
774                 }
775                 port_cfg = &prox_port_cfg[port_id];
776
777                 switch (port_cfg->type) {
778                 case PROX_PORT_MAC_HW:
779                         rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
780                         break;
781                 case PROX_PORT_MAC_RAND:
782                         prox_rte_eth_random_addr(port_cfg->eth_addr.addr_bytes);
783                         break;
784                 case PROX_PORT_MAC_SET:
785                         if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
786                                 plog_warn("port %u: failed to set mac address. Error = %d\n", port_id, rc);
787                         break;
788                 }
789         }
790 }
791
792 int port_is_active(uint8_t port_id)
793 {
794         if (port_id > PROX_MAX_PORTS) {
795                 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
796                 return 0;
797         }
798
799         struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
800         if (!port_cfg->active) {
801                 plog_info("Port %u is not active\n", port_id);
802                 return 0;
803         }
804         return 1;
805 }