Update PROX to CRC STRIP default bahavior change in DPDK
[samplevnf.git] / VNFs / DPPD-PROX / prox_port_cfg.c
1 /*
2 // Copyright (c) 2010-2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <string.h>
18 #include <stdio.h>
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
21 #include <rte_mbuf.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
24 #else
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
26 #include <rte_dev.h>
27 #else
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
30 #endif
31 #endif
32 #endif
33
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
36 #include "log.h"
37 #include "quit.h"
38 #include "defaults.h"
39 #include "toeplitz.h"
40 #include "defines.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
43 #include "prox_compat.h"
44 #include "rte_ethdev.h"
45
46 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
47 rte_atomic32_t lsc;
48
49 int prox_nb_active_ports(void)
50 {
51         int ret = 0;
52         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
53                 ret += prox_port_cfg[i].active;
54         }
55         return ret;
56 }
57
58 int prox_last_port_active(void)
59 {
60         int ret = -1;
61         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
62                 if (prox_port_cfg[i].active) {
63                         ret = i;
64                 }
65         }
66         return ret;
67 }
68
69 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
70 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
71         __attribute__((unused)) void *ret_param)
72 #else
73 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
74 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
75         __attribute__((unused)) void *ret_param)
76 #else
77 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
78 #endif
79 #endif
80 {
81         if (RTE_ETH_EVENT_INTR_LSC != type) {
82 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
83                 return -1;
84 #else
85                 return;
86 #endif
87         }
88
89         rte_atomic32_inc(&lsc);
90
91 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
92         return 0;
93 #endif
94 }
95
96 struct prox_pktmbuf_reinit_args {
97         struct rte_mempool *mp;
98         struct lcore_cfg   *lconf;
99 };
100
101 /* standard mbuf initialization procedure */
102 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
103 {
104         struct rte_mbuf *mbuf = _m;
105
106 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
107         mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
108 #else
109         mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
110         mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
111 #endif
112
113         rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
114 }
115
116 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
117 {
118         struct prox_pktmbuf_reinit_args *init_args = arg;
119         struct rte_mbuf *m;
120         char* obj = start;
121
122         obj += init_args->mp->header_size;
123         m = (struct rte_mbuf*)obj;
124
125         prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
126 }
127
128 #define CONFIGURE_TX_OFFLOAD(flag)                                           \
129         if (port_cfg->requested_tx_offload & flag)                              {\
130                 if (port_cfg->disabled_tx_offload & flag)                       {\
131                         plog_info("\t\t%s disabled by configuration\n", #flag);\
132                         port_cfg->requested_tx_offload &= ~flag;\
133                 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
134                         port_cfg->port_conf.txmode.offloads |= flag;\
135                         plog_info("\t\t%s enabled on port\n", #flag);\
136                 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
137                         port_cfg->tx_conf.offloads |= flag;\
138                         plog_info("\t\t%s enabled on queue\n", #flag);\
139                 } else {\
140                         port_cfg->requested_tx_offload &= ~flag;\
141                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
142                 }\
143         } else {\
144                 plog_info("\t\t%s disabled\n", #flag);\
145         }\
146
147 #define CONFIGURE_RX_OFFLOAD(flag)                                           \
148         if (port_cfg->requested_rx_offload & flag)                              {\
149                 if (port_cfg->dev_info.rx_offload_capa & flag) {\
150                         port_cfg->port_conf.rxmode.offloads |= flag;\
151                         plog_info("\t\t%s enabled on port\n", #flag);\
152                 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
153                         port_cfg->rx_conf.offloads |= flag;\
154                         plog_info("\t\t%s enabled on queue\n", #flag);\
155                 } else {\
156                         port_cfg->requested_rx_offload &= ~flag;\
157                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
158                 }\
159         } else {\
160                 plog_info("\t\t%s disabled\n", #flag);\
161         }\
162
163
164 /* initialize rte devices and check the number of available ports */
165 void init_rte_dev(int use_dummy_devices)
166 {
167         uint8_t nb_ports, port_id_max;
168         int port_id_last;
169         struct rte_eth_dev_info dev_info;
170         const struct rte_pci_device *pci_dev;
171
172         nb_ports = rte_eth_dev_count();
173         /* get available ports configuration */
174         PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
175
176         if (use_dummy_devices) {
177 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
178                 nb_ports = prox_last_port_active() + 1;
179                 plog_info("Creating %u dummy devices\n", nb_ports);
180
181                 char port_name[32] = "0dummy_dev";
182                 for (uint32_t i = 0; i < nb_ports; ++i) {
183 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
184                         rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
185 #else
186                         eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
187 #endif
188                         port_name[0]++;
189                 }
190 #else
191         PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
192 #endif
193         }
194         else if (prox_last_port_active() != -1) {
195                 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
196                 plog_info("\tDPDK has found %u ports\n", nb_ports);
197         }
198
199         if (nb_ports > PROX_MAX_PORTS) {
200                 plog_warn("\tWarning: I can deal with at most %u ports."
201                         " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
202
203                 nb_ports = PROX_MAX_PORTS;
204         }
205         port_id_max = nb_ports - 1;
206         port_id_last = prox_last_port_active();
207         PROX_PANIC(port_id_last > port_id_max,
208                    "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
209                    port_id_last, port_id_max);
210
211         /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
212         for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
213                 /* skip ports that are not enabled */
214                 if (!prox_port_cfg[port_id].active) {
215                         continue;
216                 }
217                 plog_info("\tGetting info for rte dev %u\n", port_id);
218                 rte_eth_dev_info_get(port_id, &dev_info);
219                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
220                 port_cfg->socket = -1;
221
222                 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
223                 port_cfg->max_txq = dev_info.max_tx_queues;
224                 port_cfg->max_rxq = dev_info.max_rx_queues;
225                 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
226                 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
227
228                 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
229                 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
230
231                 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
232                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
233                 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
234                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
235                 } else {
236                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
237                 }
238                 char *ptr;
239                 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
240                         *ptr = '\x0';
241                 }
242
243 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
244                 pci_dev = dev_info.pci_dev;
245 #else
246                 if (!dev_info.device)
247                         continue;
248                 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
249 #endif
250                 if (!pci_dev)
251                         continue;
252
253                 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
254                          "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
255                 /* Try to find the device's numa node */
256                 char buf[1024];
257                 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
258                 FILE* numa_node_fd = fopen(buf, "r");
259                 if (numa_node_fd) {
260                         if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
261                                 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
262                         }
263                         port_cfg->socket = strtol(buf, 0, 0);
264                         if (port_cfg->socket == -1) {
265                                 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
266                         }
267                         fclose(numa_node_fd);
268                 }
269
270                 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
271                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
272                         plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
273                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
274                 }
275                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
276                         plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
277                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
278                 }
279         }
280 }
281
282 /* Create rte ring-backed devices */
283 uint8_t init_rte_ring_dev(void)
284 {
285         uint8_t nb_ring_dev = 0;
286
287         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
288                 /* skip ports that are not enabled */
289                 if (!prox_port_cfg[port_id].active) {
290                         continue;
291                 }
292                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
293                 if (port_cfg->rx_ring[0] != '\0') {
294                         plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
295
296                         struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
297                         PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
298                         struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
299                         PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
300
301                         int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
302                         PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
303
304                         port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
305
306                         nb_ring_dev++;
307                 }
308         }
309
310         return nb_ring_dev;
311 }
312
313 static void print_port_capa(struct prox_port_cfg *port_cfg)
314 {
315         uint8_t port_id;
316
317         port_id = port_cfg - prox_port_cfg;
318         plog_info("\t*** Initializing port %u ***\n", port_id);
319         plog_info("\t\tPort name is set to %s\n", port_cfg->name);
320         plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
321         plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
322 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
323         plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
324 #endif
325         if (port_cfg->max_link_speed != UINT32_MAX) {
326                 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
327         }
328
329 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
330         plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
331         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
332                 plog_info("VLAN STRIP | ");
333         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
334                 plog_info("IPV4 CKSUM | ");
335         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
336                 plog_info("UDP CKSUM | ");
337         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
338                 plog_info("TCP CKSUM | ");
339         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
340                 plog_info("TCP LRO | ");
341         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
342                 plog_info("QINQ STRIP | ");
343         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
344                 plog_info("OUTER_IPV4_CKSUM | ");
345         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
346                 plog_info("MACSEC STRIP | ");
347         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
348                 plog_info("HEADER SPLIT | ");
349         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
350                 plog_info("VLAN FILTER | ");
351         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
352                 plog_info("VLAN EXTEND | ");
353         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
354                 plog_info("JUMBO FRAME | ");
355 #if defined(DEV_RX_OFFLOAD_CRC_STRIP)
356         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
357                 plog_info("CRC STRIP | ");
358 #endif
359 #if defined(DEV_RX_OFFLOAD_KEEP_CRC)
360         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)
361                 plog_info("KEEP CRC | ");
362 #endif
363         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
364                 plog_info("SCATTER | ");
365         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
366                 plog_info("TIMESTAMP | ");
367         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
368                 plog_info("SECURITY ");
369         plog_info("\n");
370
371         plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
372         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
373                 plog_info("VLAN INSERT | ");
374         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
375                 plog_info("IPV4 CKSUM | ");
376         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
377                 plog_info("UDP CKSUM | ");
378         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
379                 plog_info("TCP CKSUM | ");
380         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
381                 plog_info("SCTP CKSUM | ");
382         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
383                 plog_info("TCP TS0 | ");
384         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
385                 plog_info("UDP TSO | ");
386         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
387                 plog_info("OUTER IPV4 CKSUM | ");
388         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
389                 plog_info("QINQ INSERT | ");
390         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
391                 plog_info("VLAN TNL TSO | ");
392         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
393                 plog_info("GRE TNL TSO | ");
394         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
395                 plog_info("IPIP TNL TSO | ");
396         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
397                 plog_info("GENEVE TNL TSO | ");
398         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
399                 plog_info("MACSEC INSERT | ");
400         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
401                 plog_info("MT LOCKFREE | ");
402         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
403                 plog_info("MULTI SEG | ");
404         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
405                 plog_info("SECURITY | ");
406         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
407                 plog_info("UDP TNL TSO | ");
408         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
409                 plog_info("IP TNL TSO | ");
410         plog_info("\n");
411
412         plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
413         plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
414         plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
415         plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
416         plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
417 #endif
418 }
419
420 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
421 {
422         port_cfg->max_link_speed = UINT32_MAX;
423
424 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
425         // virtio and vmxnet3 reports fake max_link_speed
426         if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
427                 // Get link_speed from highest capability from the port
428                 // This will be used by gen and lat for extrapolation purposes
429                 // The negotiated link_speed (as reported by rte_eth_link_get
430                 // or rte_eth_link_get_nowait) might be reported too late
431                 // and might result in wrong exrapolation, and hence should not be used
432                 // for extrapolation purposes
433                 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
434                         port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
435                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
436                         port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
437                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
438                         port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
439                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
440                         port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
441                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
442                         port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
443                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
444                         port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
445                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
446                         port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
447                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
448                         port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
449                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
450                         port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
451                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
452                         port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
453                 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
454                         port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
455                 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
456                         port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
457
458         }
459 #endif
460 }
461
462 static void init_port(struct prox_port_cfg *port_cfg)
463 {
464         static char dummy_pool_name[] = "0_dummy";
465         struct rte_eth_link link;
466         uint8_t port_id;
467         int ret;
468
469         get_max_link_speed(port_cfg);
470         print_port_capa(port_cfg);
471         port_id = port_cfg - prox_port_cfg;
472         PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
473                    "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
474
475         if (port_cfg->n_rxq == 0) {
476                 /* not receiving on this port */
477                 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
478                 port_cfg->n_rxq = 1;
479                 uint32_t mbuf_size = TX_MBUF_SIZE;
480                 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
481                         mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
482
483                 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
484                           port_cfg->socket, port_cfg->n_rxd, mbuf_size);
485                 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
486                                                        0,
487                                                        sizeof(struct rte_pktmbuf_pool_private),
488                                                        rte_pktmbuf_pool_init, NULL,
489                                                        prox_pktmbuf_init, 0,
490                                                        port_cfg->socket, 0);
491                 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
492                            port_cfg->socket, port_cfg->n_rxd);
493                 dummy_pool_name[0]++;
494         } else {
495                 // Most pmd should now support setting mtu
496                 if (port_cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
497                         plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
498                         port_cfg->mtu = port_cfg->max_rx_pkt_len;
499                 }
500                 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
501                 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
502                 if (ret)
503                         plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
504
505                 if (port_cfg->n_txq == 0) {
506                         /* not sending on this port */
507                         plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
508                         port_cfg->n_txq = 1;
509                 }
510         }
511
512         if (port_cfg->n_rxq > 1)  {
513                 // Enable RSS if multiple receive queues
514                 port_cfg->port_conf.rxmode.mq_mode                      |= ETH_MQ_RX_RSS;
515                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key        = toeplitz_init_key;
516                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len    = TOEPLITZ_KEY_LEN;
517 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
518                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IP|ETH_RSS_UDP;
519 #else
520                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
521 #endif
522         }
523
524         // Make sure that the requested RSS offload is supported by the PMD
525 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
526         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
527 #endif
528         plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
529
530         // rxmode such as hw src strip
531 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
532 #if defined (DEV_RX_OFFLOAD_CRC_STRIP)
533         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
534 #endif
535 #if defined (DEV_RX_OFFLOAD_KEEP_CRC)
536         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_KEEP_CRC);
537 #endif
538         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
539         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
540 #else
541         if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
542                 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
543         }
544         if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
545                 port_cfg->port_conf.rxmode.jumbo_frame = 1;
546         }
547 #endif
548
549         // IPV4, UDP, SCTP Checksums
550 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
551         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
552         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
553         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
554 #else
555         if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
556                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
557                 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
558         }
559         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
560                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
561                 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
562         }
563 #endif
564         // Multi Segments
565 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
566         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
567 #else
568         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
569                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
570                 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
571         } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
572                 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
573         else
574                 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
575
576         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
577                 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
578         else
579                 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
580 #endif
581
582         // Refcount
583 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
584         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
585 #else
586         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
587                 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
588         else
589                 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
590 #endif
591
592         plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
593                   port_id, port_cfg->n_rxq, port_cfg->n_txq);
594
595         PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
596         PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
597
598         if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
599             !strcmp(port_cfg->short_name, "virtio") ||
600 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
601             !strcmp(port_cfg->short_name, "i40e") ||
602 #endif
603             !strcmp(port_cfg->short_name, "i40e_vf") ||
604             !strcmp(port_cfg->short_name, "avp") || /* Wind River */
605             !strcmp(port_cfg->driver_name, "") || /* NULL device */
606             !strcmp(port_cfg->short_name, "vmxnet3")) {
607                 port_cfg->port_conf.intr_conf.lsc = 0;
608                 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
609         }
610
611         if (port_cfg->lsc_set_explicitely) {
612                 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
613                 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
614         }
615         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
616                 if (port_cfg->n_txd < 512) {
617                         // Vmxnet3 driver requires minimum 512 tx descriptors
618                         plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
619                         port_cfg->n_txd = 512;
620                 }
621         }
622
623         ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
624                                     port_cfg->n_txq, &port_cfg->port_conf);
625         PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
626
627         if (port_cfg->port_conf.intr_conf.lsc) {
628                 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
629         }
630
631         plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
632
633         /* initialize TX queues first */
634         for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
635                 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
636                           queue_id, port_cfg->socket, port_cfg->n_txd);
637                 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
638                                              port_cfg->socket, &port_cfg->tx_conf);
639                 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
640         }
641
642         /* initialize RX queues */
643         for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
644                 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
645                           queue_id, port_id, port_cfg->socket,
646                           port_cfg->n_rxd, port_cfg->pool[queue_id]);
647                 ret = rte_eth_rx_queue_setup(port_id, queue_id,
648                                              port_cfg->n_rxd,
649                                              port_cfg->socket, &port_cfg->rx_conf,
650                                              port_cfg->pool[queue_id]);
651                 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
652         }
653
654         plog_info("\t\tStarting up port %u ...", port_id);
655         ret = rte_eth_dev_start(port_id);
656
657         PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
658         plog_info(" done: ");
659
660         /* Getting link status can be done without waiting if Link
661            State Interrupt is enabled since in that case, if the link
662            is recognized as being down, an interrupt will notify that
663            it has gone up. */
664         if (port_cfg->port_conf.intr_conf.lsc)
665                 rte_eth_link_get_nowait(port_id, &link);
666         else
667                 rte_eth_link_get(port_id, &link);
668
669         port_cfg->link_up = link.link_status;
670         port_cfg->link_speed = link.link_speed;
671
672         if (link.link_status) {
673                 plog_info("Link Up - speed %'u Mbps - %s\n",
674                           link.link_speed,
675                           (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
676                           "full-duplex" : "half-duplex");
677         }
678         else {
679                 plog_info("Link Down\n");
680         }
681
682         if (port_cfg->promiscuous) {
683                 rte_eth_promiscuous_enable(port_id);
684                 plog_info("\t\tport %u in promiscuous mode\n", port_id);
685         }
686
687         if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
688             strcmp(port_cfg->short_name, "i40e") &&
689             strcmp(port_cfg->short_name, "i40e_vf") &&
690             strcmp(port_cfg->short_name, "vmxnet3")) {
691                 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
692                         ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
693                         if (ret) {
694                                 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
695                         }
696                 }
697                 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
698                         ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
699                         if (ret) {
700                                 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
701                         }
702                 }
703         }
704         if (port_cfg->nb_mc_addr) {
705                 rte_eth_allmulticast_enable(port_id);
706                 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
707                         plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
708                         port_cfg->nb_mc_addr = 0;
709                         rte_eth_allmulticast_disable(port_id);
710                         plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
711                 } else {
712                         plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
713                         plog_info("\t\tport %u in multicast mode\n", port_id);
714                 }
715         }
716 }
717
718 void init_port_all(void)
719 {
720         uint8_t max_port_idx = prox_last_port_active() + 1;
721
722         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
723                 if (!prox_port_cfg[portid].active) {
724                         continue;
725                 }
726                 init_port(&prox_port_cfg[portid]);
727         }
728 }
729
730 void close_ports_atexit(void)
731 {
732         uint8_t max_port_idx = prox_last_port_active() + 1;
733
734         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
735                 if (!prox_port_cfg[portid].active) {
736                         continue;
737                 }
738                 rte_eth_dev_close(portid);
739         }
740 }
741
742 void init_port_addr(void)
743 {
744         struct prox_port_cfg *port_cfg;
745         int rc;
746
747         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
748                 if (!prox_port_cfg[port_id].active) {
749                         continue;
750                 }
751                 port_cfg = &prox_port_cfg[port_id];
752
753                 switch (port_cfg->type) {
754                 case PROX_PORT_MAC_HW:
755                         rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
756                         break;
757                 case PROX_PORT_MAC_RAND:
758                         eth_random_addr(port_cfg->eth_addr.addr_bytes);
759                         break;
760                 case PROX_PORT_MAC_SET:
761                         if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
762                                 plog_warn("port %u: failed to set mac address. Error = %d\n", port_id, rc);
763                         break;
764                 }
765         }
766 }
767
768 int port_is_active(uint8_t port_id)
769 {
770         if (port_id > PROX_MAX_PORTS) {
771                 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
772                 return 0;
773         }
774
775         struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
776         if (!port_cfg->active) {
777                 plog_info("Port %u is not active\n", port_id);
778                 return 0;
779         }
780         return 1;
781 }