PROX: fix minimum rx buffer size used within generator
[samplevnf.git] / VNFs / DPPD-PROX / prox_port_cfg.c
1 /*
2 // Copyright (c) 2010-2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <string.h>
18 #include <stdio.h>
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
21 #include <rte_mbuf.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
24 #else
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
26 #include <rte_dev.h>
27 #else
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
30 #endif
31 #endif
32 #endif
33
34 #include "prox_port_cfg.h"
35 #include "prox_globals.h"
36 #include "log.h"
37 #include "quit.h"
38 #include "defaults.h"
39 #include "toeplitz.h"
40 #include "defines.h"
41 #include "prox_cksum.h"
42 #include "stats_irq.h"
43 #include "prox_compat.h"
44
45 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
46 rte_atomic32_t lsc;
47
48 int prox_nb_active_ports(void)
49 {
50         int ret = 0;
51         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
52                 ret += prox_port_cfg[i].active;
53         }
54         return ret;
55 }
56
57 int prox_last_port_active(void)
58 {
59         int ret = -1;
60         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
61                 if (prox_port_cfg[i].active) {
62                         ret = i;
63                 }
64         }
65         return ret;
66 }
67
68 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
69 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
70         __attribute__((unused)) void *ret_param)
71 #else
72 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
73 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
74         __attribute__((unused)) void *ret_param)
75 #else
76 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
77 #endif
78 #endif
79 {
80         if (RTE_ETH_EVENT_INTR_LSC != type) {
81 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
82                 return -1;
83 #else
84                 return;
85 #endif
86         }
87
88         rte_atomic32_inc(&lsc);
89
90 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
91         return 0;
92 #endif
93 }
94
95 struct prox_pktmbuf_reinit_args {
96         struct rte_mempool *mp;
97         struct lcore_cfg   *lconf;
98 };
99
100 /* standard mbuf initialization procedure */
101 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
102 {
103         struct rte_mbuf *mbuf = _m;
104
105 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
106         mbuf->tx_offload = CALC_TX_OL(sizeof(struct ether_hdr), sizeof(struct ipv4_hdr));
107 #else
108         mbuf->pkt.vlan_macip.f.l2_len = sizeof(struct ether_hdr);
109         mbuf->pkt.vlan_macip.f.l3_len = sizeof(struct ipv4_hdr);
110 #endif
111
112         rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
113 }
114
115 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
116 {
117         struct prox_pktmbuf_reinit_args *init_args = arg;
118         struct rte_mbuf *m;
119         char* obj = start;
120
121         obj += init_args->mp->header_size;
122         m = (struct rte_mbuf*)obj;
123
124         prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
125 }
126
127 #define CONFIGURE_TX_OFFLOAD(flag)                                           \
128         if (port_cfg->requested_tx_offload & flag)                              {\
129                 if (port_cfg->disabled_tx_offload & flag)                       {\
130                         plog_info("\t\t%s disabled by configuration\n", #flag);\
131                         port_cfg->requested_tx_offload &= ~flag;\
132                 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
133                         port_cfg->port_conf.txmode.offloads |= flag;\
134                         plog_info("\t\t%s enabled on port\n", #flag);\
135                 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
136                         port_cfg->tx_conf.offloads |= flag;\
137                         plog_info("\t\t%s enabled on queue\n", #flag);\
138                 } else {\
139                         port_cfg->requested_tx_offload &= ~flag;\
140                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
141                 }\
142         } else {\
143                 plog_info("\t\t%s disabled\n", #flag);\
144         }\
145
146 #define CONFIGURE_RX_OFFLOAD(flag)                                           \
147         if (port_cfg->requested_rx_offload & flag)                              {\
148                 if (port_cfg->dev_info.rx_offload_capa & flag) {\
149                         port_cfg->port_conf.rxmode.offloads |= flag;\
150                         plog_info("\t\t%s enabled on port\n", #flag);\
151                 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
152                         port_cfg->rx_conf.offloads |= flag;\
153                         plog_info("\t\t%s enabled on queue\n", #flag);\
154                 } else {\
155                         port_cfg->requested_rx_offload &= ~flag;\
156                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
157                 }\
158         } else {\
159                 plog_info("\t\t%s disabled\n", #flag);\
160         }\
161
162
163 /* initialize rte devices and check the number of available ports */
164 void init_rte_dev(int use_dummy_devices)
165 {
166         uint8_t nb_ports, port_id_max;
167         int port_id_last;
168         struct rte_eth_dev_info dev_info;
169         const struct rte_pci_device *pci_dev;
170
171         nb_ports = rte_eth_dev_count();
172         /* get available ports configuration */
173         PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
174
175         if (use_dummy_devices) {
176 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
177                 nb_ports = prox_last_port_active() + 1;
178                 plog_info("Creating %u dummy devices\n", nb_ports);
179
180                 char port_name[32] = "0dummy_dev";
181                 for (uint32_t i = 0; i < nb_ports; ++i) {
182 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
183                         rte_vdev_init(port_name, "size=ETHER_MIN_LEN,copy=0");
184 #else
185                         eth_dev_null_create(port_name, 0, ETHER_MIN_LEN, 0);
186 #endif
187                         port_name[0]++;
188                 }
189 #else
190         PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
191 #endif
192         }
193         else if (prox_last_port_active() != -1) {
194                 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
195                 plog_info("\tDPDK has found %u ports\n", nb_ports);
196         }
197
198         if (nb_ports > PROX_MAX_PORTS) {
199                 plog_warn("\tWarning: I can deal with at most %u ports."
200                         " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
201
202                 nb_ports = PROX_MAX_PORTS;
203         }
204         port_id_max = nb_ports - 1;
205         port_id_last = prox_last_port_active();
206         PROX_PANIC(port_id_last > port_id_max,
207                    "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
208                    port_id_last, port_id_max);
209
210         /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
211         for (uint8_t port_id = 0; port_id < nb_ports; ++port_id) {
212                 /* skip ports that are not enabled */
213                 if (!prox_port_cfg[port_id].active) {
214                         continue;
215                 }
216                 plog_info("\tGetting info for rte dev %u\n", port_id);
217                 rte_eth_dev_info_get(port_id, &dev_info);
218                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
219                 port_cfg->socket = -1;
220
221                 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
222                 port_cfg->max_txq = dev_info.max_tx_queues;
223                 port_cfg->max_rxq = dev_info.max_rx_queues;
224                 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
225                 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
226
227                 strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
228                 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
229
230                 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
231                         strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
232                 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
233                         strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
234                 } else {
235                         strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
236                 }
237                 char *ptr;
238                 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
239                         *ptr = '\x0';
240                 }
241
242 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
243                 pci_dev = dev_info.pci_dev;
244 #else
245                 if (!dev_info.device)
246                         continue;
247                 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
248 #endif
249                 if (!pci_dev)
250                         continue;
251
252                 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
253                          "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
254                 /* Try to find the device's numa node */
255                 char buf[1024];
256                 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
257                 FILE* numa_node_fd = fopen(buf, "r");
258                 if (numa_node_fd) {
259                         if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
260                                 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
261                         }
262                         port_cfg->socket = strtol(buf, 0, 0);
263                         if (port_cfg->socket == -1) {
264                                 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
265                         }
266                         fclose(numa_node_fd);
267                 }
268
269                 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
270                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
271                         plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
272                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
273                 }
274                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
275                         plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
276                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
277                 }
278         }
279 }
280
281 /* Create rte ring-backed devices */
282 uint8_t init_rte_ring_dev(void)
283 {
284         uint8_t nb_ring_dev = 0;
285
286         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
287                 /* skip ports that are not enabled */
288                 if (!prox_port_cfg[port_id].active) {
289                         continue;
290                 }
291                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
292                 if (port_cfg->rx_ring[0] != '\0') {
293                         plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
294
295                         struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
296                         PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
297                         struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
298                         PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
299
300                         int ret = rte_eth_from_rings(port_cfg->name, &rx_ring, 1, &tx_ring, 1, rte_socket_id());
301                         PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
302
303                         port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
304
305                         nb_ring_dev++;
306                 }
307         }
308
309         return nb_ring_dev;
310 }
311
312 static void print_port_capa(struct prox_port_cfg *port_cfg)
313 {
314         uint8_t port_id;
315
316         port_id = port_cfg - prox_port_cfg;
317         plog_info("\t*** Initializing port %u ***\n", port_id);
318         plog_info("\t\tPort name is set to %s\n", port_cfg->name);
319         plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
320         plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
321 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
322         plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
323 #endif
324         if (port_cfg->max_link_speed != UINT32_MAX) {
325                 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
326         }
327
328 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
329         plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
330         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
331                 plog_info("VLAN STRIP | ");
332         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
333                 plog_info("IPV4 CKSUM | ");
334         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
335                 plog_info("UDP CKSUM | ");
336         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
337                 plog_info("TCP CKSUM | ");
338         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
339                 plog_info("TCP LRO | ");
340         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
341                 plog_info("QINQ STRIP | ");
342         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
343                 plog_info("OUTER_IPV4_CKSUM | ");
344         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
345                 plog_info("MACSEC STRIP | ");
346         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
347                 plog_info("HEADER SPLIT | ");
348         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
349                 plog_info("VLAN FILTER | ");
350         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
351                 plog_info("VLAN EXTEND | ");
352         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
353                 plog_info("JUMBO FRAME | ");
354         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
355                 plog_info("CRC STRIP | ");
356         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
357                 plog_info("SCATTER | ");
358         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
359                 plog_info("TIMESTAMP | ");
360         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
361                 plog_info("SECURITY ");
362         plog_info("\n");
363
364         plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
365         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
366                 plog_info("VLAN INSERT | ");
367         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
368                 plog_info("IPV4 CKSUM | ");
369         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
370                 plog_info("UDP CKSUM | ");
371         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
372                 plog_info("TCP CKSUM | ");
373         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
374                 plog_info("SCTP CKSUM | ");
375         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
376                 plog_info("TCP TS0 | ");
377         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
378                 plog_info("UDP TSO | ");
379         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
380                 plog_info("OUTER IPV4 CKSUM | ");
381         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
382                 plog_info("QINQ INSERT | ");
383         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
384                 plog_info("VLAN TNL TSO | ");
385         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
386                 plog_info("GRE TNL TSO | ");
387         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
388                 plog_info("IPIP TNL TSO | ");
389         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
390                 plog_info("GENEVE TNL TSO | ");
391         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
392                 plog_info("MACSEC INSERT | ");
393         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
394                 plog_info("MT LOCKFREE | ");
395         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
396                 plog_info("MULTI SEG | ");
397         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
398                 plog_info("SECURITY | ");
399         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
400                 plog_info("UDP TNL TSO | ");
401         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
402                 plog_info("IP TNL TSO | ");
403         plog_info("\n");
404
405         plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
406         plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
407         plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
408         plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
409         plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
410 #endif
411 }
412
413 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
414 {
415         port_cfg->max_link_speed = UINT32_MAX;
416
417 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
418         // virtio and vmxnet3 reports fake max_link_speed
419         if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
420                 // Get link_speed from highest capability from the port
421                 // This will be used by gen and lat for extrapolation purposes
422                 // The negotiated link_speed (as reported by rte_eth_link_get
423                 // or rte_eth_link_get_nowait) might be reported too late
424                 // and might result in wrong exrapolation, and hence should not be used
425                 // for extrapolation purposes
426                 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
427                         port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
428                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
429                         port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
430                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
431                         port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
432                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
433                         port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
434                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
435                         port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
436                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
437                         port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
438                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
439                         port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
440                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
441                         port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
442                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
443                         port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
444                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
445                         port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
446                 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
447                         port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
448                 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
449                         port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
450
451         }
452 #endif
453 }
454
455 static void init_port(struct prox_port_cfg *port_cfg)
456 {
457         static char dummy_pool_name[] = "0_dummy";
458         struct rte_eth_link link;
459         uint8_t port_id;
460         int ret;
461
462         get_max_link_speed(port_cfg);
463         print_port_capa(port_cfg);
464         port_id = port_cfg - prox_port_cfg;
465         PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
466                    "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
467
468         if (port_cfg->n_rxq == 0) {
469                 /* not receiving on this port */
470                 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
471                 port_cfg->n_rxq = 1;
472                 uint32_t mbuf_size = TX_MBUF_SIZE;
473                 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
474                         mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
475
476                 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
477                           port_cfg->socket, port_cfg->n_rxd, mbuf_size);
478                 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
479                                                        0,
480                                                        sizeof(struct rte_pktmbuf_pool_private),
481                                                        rte_pktmbuf_pool_init, NULL,
482                                                        prox_pktmbuf_init, 0,
483                                                        port_cfg->socket, 0);
484                 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
485                            port_cfg->socket, port_cfg->n_rxd);
486                 dummy_pool_name[0]++;
487         } else {
488                 // Most pmd should now support setting mtu
489                 if (port_cfg->mtu + ETHER_HDR_LEN + ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
490                         plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
491                         port_cfg->mtu = port_cfg->max_rx_pkt_len;
492                 }
493                 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
494                 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
495                 if (ret)
496                         plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
497
498                 if (port_cfg->n_txq == 0) {
499                         /* not sending on this port */
500                         plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
501                         port_cfg->n_txq = 1;
502                 }
503         }
504
505         if (port_cfg->n_rxq > 1)  {
506                 // Enable RSS if multiple receive queues
507                 port_cfg->port_conf.rxmode.mq_mode                      |= ETH_MQ_RX_RSS;
508                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key        = toeplitz_init_key;
509                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len    = TOEPLITZ_KEY_LEN;
510 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
511                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IP|ETH_RSS_UDP;
512 #else
513                 port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
514 #endif
515         }
516
517         // Make sure that the requested RSS offload is supported by the PMD
518 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
519         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
520 #endif
521         plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP);
522
523         // rxmode such as hw src strip
524 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
525         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
526         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
527         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
528 #else
529         if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
530                 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
531         }
532         if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
533                 port_cfg->port_conf.rxmode.jumbo_frame = 1;
534         }
535 #endif
536
537         // IPV4, UDP, SCTP Checksums
538 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
539         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
540         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
541         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
542 #else
543         if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
544                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
545                 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
546         }
547         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
548                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
549                 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
550         }
551 #endif
552         // Multi Segments
553 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
554         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
555 #else
556         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
557                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
558                 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
559         } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
560                 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
561         else
562                 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
563
564         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
565                 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
566         else
567                 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
568 #endif
569
570         // Refcount
571 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
572         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
573 #else
574         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
575                 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
576         else
577                 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
578 #endif
579
580         plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
581                   port_id, port_cfg->n_rxq, port_cfg->n_txq);
582
583         PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
584         PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
585
586         if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
587             !strcmp(port_cfg->short_name, "virtio") ||
588 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
589             !strcmp(port_cfg->short_name, "i40e") ||
590 #endif
591             !strcmp(port_cfg->short_name, "i40e_vf") ||
592             !strcmp(port_cfg->short_name, "avp") || /* Wind River */
593             !strcmp(port_cfg->driver_name, "") || /* NULL device */
594             !strcmp(port_cfg->short_name, "vmxnet3")) {
595                 port_cfg->port_conf.intr_conf.lsc = 0;
596                 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
597         }
598
599         if (port_cfg->lsc_set_explicitely) {
600                 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
601                 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
602         }
603         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
604                 if (port_cfg->n_txd < 512) {
605                         // Vmxnet3 driver requires minimum 512 tx descriptors
606                         plog_info("\t\tNumber of TX descriptors is set to 512 (minimum required for vmxnet3\n");
607                         port_cfg->n_txd = 512;
608                 }
609         }
610
611         ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
612                                     port_cfg->n_txq, &port_cfg->port_conf);
613         PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
614
615         if (port_cfg->port_conf.intr_conf.lsc) {
616                 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
617         }
618
619         plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
620
621         /* initialize TX queues first */
622         for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
623                 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
624                           queue_id, port_cfg->socket, port_cfg->n_txd);
625                 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
626                                              port_cfg->socket, &port_cfg->tx_conf);
627                 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
628         }
629
630         /* initialize RX queues */
631         for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
632                 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
633                           queue_id, port_id, port_cfg->socket,
634                           port_cfg->n_rxd, port_cfg->pool[queue_id]);
635                 ret = rte_eth_rx_queue_setup(port_id, queue_id,
636                                              port_cfg->n_rxd,
637                                              port_cfg->socket, &port_cfg->rx_conf,
638                                              port_cfg->pool[queue_id]);
639                 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
640         }
641
642         plog_info("\t\tStarting up port %u ...", port_id);
643         ret = rte_eth_dev_start(port_id);
644
645         PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
646         plog_info(" done: ");
647
648         /* Getting link status can be done without waiting if Link
649            State Interrupt is enabled since in that case, if the link
650            is recognized as being down, an interrupt will notify that
651            it has gone up. */
652         if (port_cfg->port_conf.intr_conf.lsc)
653                 rte_eth_link_get_nowait(port_id, &link);
654         else
655                 rte_eth_link_get(port_id, &link);
656
657         port_cfg->link_up = link.link_status;
658         port_cfg->link_speed = link.link_speed;
659
660         if (link.link_status) {
661                 plog_info("Link Up - speed %'u Mbps - %s\n",
662                           link.link_speed,
663                           (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
664                           "full-duplex" : "half-duplex");
665         }
666         else {
667                 plog_info("Link Down\n");
668         }
669
670         if (port_cfg->promiscuous) {
671                 rte_eth_promiscuous_enable(port_id);
672                 plog_info("\t\tport %u in promiscuous mode\n", port_id);
673         }
674
675         if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
676             strcmp(port_cfg->short_name, "i40e") &&
677             strcmp(port_cfg->short_name, "i40e_vf") &&
678             strcmp(port_cfg->short_name, "vmxnet3")) {
679                 for (uint8_t i = 0; i < 16; ++i) {
680                         ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
681                         if (ret) {
682                                 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
683                         }
684                         ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
685                         if (ret) {
686                                 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
687                         }
688                 }
689         }
690 }
691
692 void init_port_all(void)
693 {
694         uint8_t max_port_idx = prox_last_port_active() + 1;
695
696         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
697                 if (!prox_port_cfg[portid].active) {
698                         continue;
699                 }
700                 init_port(&prox_port_cfg[portid]);
701         }
702 }
703
704 void close_ports_atexit(void)
705 {
706         uint8_t max_port_idx = prox_last_port_active() + 1;
707
708         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
709                 if (!prox_port_cfg[portid].active) {
710                         continue;
711                 }
712                 rte_eth_dev_close(portid);
713         }
714 }
715
716 void init_port_addr(void)
717 {
718         struct prox_port_cfg *port_cfg;
719
720         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
721                 if (!prox_port_cfg[port_id].active) {
722                         continue;
723                 }
724                 port_cfg = &prox_port_cfg[port_id];
725
726                 switch (port_cfg->type) {
727                 case PROX_PORT_MAC_HW:
728                         rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
729                         break;
730                 case PROX_PORT_MAC_RAND:
731                         eth_random_addr(port_cfg->eth_addr.addr_bytes);
732                         break;
733                 case PROX_PORT_MAC_SET:
734                         break;
735                 }
736         }
737 }
738
739 int port_is_active(uint8_t port_id)
740 {
741         if (port_id > PROX_MAX_PORTS) {
742                 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
743                 return 0;
744         }
745
746         struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
747         if (!port_cfg->active) {
748                 plog_info("Port %u is not active\n", port_id);
749                 return 0;
750         }
751         return 1;
752 }