bf7c4cb7f1cae1789a11d3cb4c7798229fb55d7b
[samplevnf.git] / VNFs / DPPD-PROX / prox_port_cfg.c
1 /*
2 // Copyright (c) 2010-2020 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <string.h>
18 #include <stdio.h>
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
21 #include <rte_mbuf.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
24 #else
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
26 #include <rte_dev.h>
27 #else
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
30 #endif
31 #endif
32 #endif
33
34 #include <sys/ioctl.h>
35 #include <net/if.h>
36
37 #include "prox_port_cfg.h"
38 #include "prox_globals.h"
39 #include "log.h"
40 #include "quit.h"
41 #include "defaults.h"
42 #include "toeplitz.h"
43 #include "defines.h"
44 #include "prox_cksum.h"
45 #include "stats_irq.h"
46 #include "prox_compat.h"
47 #include "rte_ethdev.h"
48 #include "lconf.h"
49
50 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
51
52 rte_atomic32_t lsc;
53
54 int prox_nb_active_ports(void)
55 {
56         int ret = 0;
57         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
58                 ret += prox_port_cfg[i].active;
59         }
60         return ret;
61 }
62
63 int prox_last_port_active(void)
64 {
65         int ret = -1;
66         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
67                 if (prox_port_cfg[i].active) {
68                         ret = i;
69                 }
70         }
71         return ret;
72 }
73
74 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
75 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
76         __attribute__((unused)) void *ret_param)
77 #else
78 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
79 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
80         __attribute__((unused)) void *ret_param)
81 #else
82 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
83 #endif
84 #endif
85 {
86         if (RTE_ETH_EVENT_INTR_LSC != type) {
87 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
88                 return -1;
89 #else
90                 return;
91 #endif
92         }
93
94         rte_atomic32_inc(&lsc);
95
96 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
97         return 0;
98 #endif
99 }
100
101 struct prox_pktmbuf_reinit_args {
102         struct rte_mempool *mp;
103         struct lcore_cfg   *lconf;
104 };
105
106 /* standard mbuf initialization procedure */
107 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
108 {
109         struct rte_mbuf *mbuf = _m;
110
111 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
112         mbuf->tx_offload = CALC_TX_OL(sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr));
113 #else
114         mbuf->pkt.vlan_macip.f.l2_len = sizeof(prox_rte_ether_hdr);
115         mbuf->pkt.vlan_macip.f.l3_len = sizeof(prox_rte_ipv4_hdr);
116 #endif
117
118         rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
119 }
120
121 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
122 {
123         struct prox_pktmbuf_reinit_args *init_args = arg;
124         struct rte_mbuf *m;
125         char* obj = start;
126
127         obj += init_args->mp->header_size;
128         m = (struct rte_mbuf*)obj;
129
130         prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
131 }
132
133 #define CONFIGURE_TX_OFFLOAD(flag)                                           \
134         if (port_cfg->requested_tx_offload & flag)                              {\
135                 if (port_cfg->disabled_tx_offload & flag)                       {\
136                         plog_info("\t\t%s disabled by configuration\n", #flag);\
137                         port_cfg->requested_tx_offload &= ~flag;\
138                 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
139                         port_cfg->port_conf.txmode.offloads |= flag;\
140                         plog_info("\t\t%s enabled on port\n", #flag);\
141                 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
142                         port_cfg->tx_conf.offloads |= flag;\
143                         plog_info("\t\t%s enabled on queue\n", #flag);\
144                 } else {\
145                         port_cfg->requested_tx_offload &= ~flag;\
146                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
147                 }\
148         } else {\
149                 plog_info("\t\t%s disabled\n", #flag);\
150         }\
151
152 #define CONFIGURE_RX_OFFLOAD(flag)                                           \
153         if (port_cfg->requested_rx_offload & flag)                              {\
154                 if (port_cfg->dev_info.rx_offload_capa & flag) {\
155                         port_cfg->port_conf.rxmode.offloads |= flag;\
156                         plog_info("\t\t%s enabled on port\n", #flag);\
157                 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
158                         port_cfg->rx_conf.offloads |= flag;\
159                         plog_info("\t\t%s enabled on queue\n", #flag);\
160                 } else {\
161                         port_cfg->requested_rx_offload &= ~flag;\
162                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
163                 }\
164         } else {\
165                 plog_info("\t\t%s disabled\n", #flag);\
166         }\
167
168 static inline uint32_t get_netmask(uint8_t prefix)
169 {
170         if (prefix == 0)
171                 return(~((uint32_t) -1));
172         else
173                 return rte_cpu_to_be_32(~((1 << (32 - prefix)) - 1));
174 }
175
176 static void set_ip_address(char *devname, uint32_t ip, uint8_t prefix)
177 {
178         struct ifreq ifreq;
179         struct sockaddr_in in_addr;
180         int fd, rc;
181         uint32_t netmask = get_netmask(prefix);
182         plog_info("Setting netmask to %x\n", netmask);
183         uint32_t ip_cpu = rte_be_to_cpu_32(ip);
184
185         fd = socket(AF_INET, SOCK_DGRAM, 0);
186
187         memset(&ifreq, 0, sizeof(struct ifreq));
188         memset(&in_addr, 0, sizeof(struct sockaddr_in));
189
190         in_addr.sin_family = AF_INET;
191         in_addr.sin_addr = *(struct in_addr *)&ip_cpu;
192
193         prox_strncpy(ifreq.ifr_name, devname, IFNAMSIZ);
194         ifreq.ifr_addr = *(struct sockaddr *)&in_addr;
195         rc = ioctl(fd, SIOCSIFADDR, &ifreq);
196         PROX_PANIC(rc < 0, "Failed to set IP address %x on device %s: error = %d (%s)\n", ip_cpu, devname, errno, strerror(errno));
197
198         in_addr.sin_addr = *(struct in_addr *)&netmask;
199         ifreq.ifr_netmask = *(struct sockaddr *)&in_addr;
200         rc = ioctl(fd, SIOCSIFNETMASK, &ifreq);
201         PROX_PANIC(rc < 0, "Failed to set netmask %x (prefix %d) on device %s: error = %d (%s)\n", netmask, prefix, devname, errno, strerror(errno));
202         close(fd);
203 }
204
205 /* initialize rte devices and check the number of available ports */
206 void init_rte_dev(int use_dummy_devices)
207 {
208         uint8_t nb_ports, port_id_max;
209         int port_id_last, rc = 0;
210         struct rte_eth_dev_info dev_info;
211         const struct rte_pci_device *pci_dev;
212
213         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
214                 if (prox_port_cfg[port_id].active && (prox_port_cfg[port_id].virtual == 0) && (port_id >= prox_rte_eth_dev_count_avail())) {
215                         PROX_PANIC(1, "port %u used but only %u available\n", port_id, prox_rte_eth_dev_count_avail());
216                 }
217         }
218         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
219                 if (!prox_port_cfg[port_id].active) {
220                         continue;
221                 }
222                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
223
224                 prox_port_cfg[port_id].n_vlans = 0;
225                 while ((prox_port_cfg[port_id].n_vlans < PROX_MAX_VLAN_TAGS) && (prox_port_cfg[port_id].vlan_tags[prox_port_cfg[port_id].n_vlans])) {
226                         prox_port_cfg[port_id].n_vlans++;
227                 }
228
229                 if (port_cfg->vdev[0]) {
230                         char name[MAX_NAME_BUFFER_SIZE], tap[MAX_NAME_SIZE];
231                         snprintf(tap, MAX_NAME_SIZE, "net_tap%d", port_id);
232 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
233                         snprintf(name, MAX_NAME_BUFFER_SIZE, "iface=%s", port_cfg->vdev);
234                         rc = rte_vdev_init(tap, name);
235 #else
236                         PROX_PANIC(1, "vdev not supported in DPDK < 17.05\n");
237 #endif
238                         PROX_PANIC(rc != 0, "Unable to create device %s %s\n", "net tap", port_cfg->vdev);
239                         int vdev_port_id = prox_rte_eth_dev_count_avail() - 1;
240                         PROX_PANIC(vdev_port_id >= PROX_MAX_PORTS, "Too many port defined %d >= %d\n", vdev_port_id, PROX_MAX_PORTS);
241                         plog_info("\tCreating device %s, port %d\n", port_cfg->vdev, vdev_port_id);
242                         prox_port_cfg[vdev_port_id].is_vdev = 1;
243                         prox_port_cfg[vdev_port_id].active = 1;
244                         prox_port_cfg[vdev_port_id].dpdk_mapping = port_id;
245                         prox_port_cfg[vdev_port_id].n_txq = 1;
246                         prox_port_cfg[vdev_port_id].n_vlans = prox_port_cfg[port_id].n_vlans;
247
248                         for (uint32_t tag_id = 0; tag_id < prox_port_cfg[port_id].n_vlans; tag_id++) {
249                                 prox_port_cfg[vdev_port_id].vlan_tags[tag_id] = prox_port_cfg[port_id].vlan_tags[tag_id];
250                                 char command[1024];
251                                 snprintf(prox_port_cfg[vdev_port_id].names[tag_id], MAX_NAME_BUFFER_SIZE, "%s_%d", port_cfg->vdev, prox_port_cfg[port_id].vlan_tags[tag_id]);
252                                 sprintf(command, "ip link add link %s name %s type vlan id %d", port_cfg->vdev, prox_port_cfg[vdev_port_id].names[tag_id], prox_port_cfg[port_id].vlan_tags[tag_id]);
253                                 system(command);
254                                 plog_info("\tRunning %s\n", command);
255                                 plog_info("\tUsing vlan tag %d - added device %s\n", prox_port_cfg[port_id].vlan_tags[tag_id], prox_port_cfg[vdev_port_id].names[tag_id]);
256                         }
257                         if (prox_port_cfg[port_id].n_vlans == 0) {
258                                 strncpy(prox_port_cfg[vdev_port_id].names[0], port_cfg->vdev, MAX_NAME_SIZE);
259                                 prox_port_cfg[vdev_port_id].n_vlans = 1;
260                                 prox_port_cfg[vdev_port_id].vlan_tags[0] = 0;
261                         }
262
263                         prox_port_cfg[port_id].dpdk_mapping = vdev_port_id;
264                         uint32_t i = 0;
265                         while ((i < PROX_MAX_VLAN_TAGS) && (prox_port_cfg[port_id].ip_addr[i].ip)) {
266                                 prox_port_cfg[vdev_port_id].ip_addr[i].ip = prox_port_cfg[port_id].ip_addr[i].ip;
267                                 prox_port_cfg[vdev_port_id].ip_addr[i].prefix = prox_port_cfg[port_id].ip_addr[i].prefix;
268                                 i++;
269                         }
270                         prox_port_cfg[vdev_port_id].type = prox_port_cfg[port_id].type;
271                         if (prox_port_cfg[vdev_port_id].type == PROX_PORT_MAC_HW) {
272                                 // If DPDK port MAC set to HW, then make sure the vdev has the same MAC as DPDK port
273                                 prox_port_cfg[vdev_port_id].type = PROX_PORT_MAC_SET;
274                                 rte_eth_macaddr_get(port_id, &prox_port_cfg[vdev_port_id].eth_addr);
275                                 plog_info("\tDPDK port %d MAC address pre-configured to MAC from port %d: "MAC_BYTES_FMT"\n",
276                                         vdev_port_id, port_id, MAC_BYTES(prox_port_cfg[vdev_port_id].eth_addr.addr_bytes));
277                         } else
278                                 memcpy(&prox_port_cfg[vdev_port_id].eth_addr, &prox_port_cfg[port_id].eth_addr, sizeof(prox_port_cfg[port_id].eth_addr));
279                 }
280                 if (prox_port_cfg[port_id].n_vlans == 0) {
281                         prox_port_cfg[port_id].n_vlans = 1;
282                         prox_port_cfg[port_id].vlan_tags[0] = 0;
283                 }
284         }
285         nb_ports = prox_rte_eth_dev_count_avail();
286         /* get available ports configuration */
287         PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
288
289         if (use_dummy_devices) {
290 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
291                 nb_ports = prox_last_port_active() + 1;
292                 plog_info("Creating %u dummy devices\n", nb_ports);
293
294                 char port_name[32] = "0dummy_dev";
295                 for (uint32_t i = 0; i < nb_ports; ++i) {
296 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
297                         rte_vdev_init(port_name, "size=64,copy=0");
298 #else
299                         eth_dev_null_create(port_name, 0, PROX_RTE_ETHER_MIN_LEN, 0);
300 #endif
301                         port_name[0]++;
302                 }
303 #else
304         PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
305 #endif
306         }
307         else if (prox_last_port_active() != -1) {
308                 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
309                 plog_info("\tDPDK has found %u ports\n", nb_ports);
310         }
311
312         if (nb_ports > PROX_MAX_PORTS) {
313                 plog_warn("\tWarning: I can deal with at most %u ports."
314                         " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
315
316                 nb_ports = PROX_MAX_PORTS;
317         }
318
319 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
320         port_id_max = -1;
321         uint16_t id;
322         RTE_ETH_FOREACH_DEV(id) {
323                 char name[256];
324                 rte_eth_dev_get_name_by_port(id, name);
325                 plog_info("\tFound DPDK port id %u %s\n", id, name);
326                 if (id >= PROX_MAX_PORTS) {
327                         plog_warn("\tWarning: I can deal with at most %u ports."
328                                  " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
329                 } else {
330                         prox_port_cfg[id].available = 1;
331                         if (id > port_id_max)
332                                 port_id_max = id;
333                 }
334         }
335 #else
336         port_id_max = nb_ports - 1;
337 #endif
338
339         port_id_last = prox_last_port_active();
340         PROX_PANIC(port_id_last > port_id_max,
341                    "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
342                    port_id_last, port_id_max);
343
344         /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
345 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
346         for (uint8_t port_id = 0; port_id <= port_id_last; ++port_id) {
347 #else
348         for (uint8_t port_id = 0; port_id <= nb_ports; ++port_id) {
349 #endif
350                 /* skip ports that are not enabled */
351                 if (!prox_port_cfg[port_id].active) {
352                         continue;
353 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
354                 } else if (prox_port_cfg[port_id].available == 0) {
355                         PROX_PANIC(1, "port %u enabled but not available\n", port_id);
356 #endif
357                 }
358                 plog_info("\tGetting info for rte dev %u\n", port_id);
359                 rte_eth_dev_info_get(port_id, &dev_info);
360                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
361                 port_cfg->socket = -1;
362
363                 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
364                 port_cfg->max_txq = dev_info.max_tx_queues;
365                 port_cfg->max_rxq = dev_info.max_rx_queues;
366                 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
367                 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
368                 port_cfg->min_tx_desc = dev_info.tx_desc_lim.nb_min;
369                 port_cfg->max_tx_desc = dev_info.tx_desc_lim.nb_max;
370                 port_cfg->min_rx_desc = dev_info.rx_desc_lim.nb_min;
371                 port_cfg->max_rx_desc = dev_info.rx_desc_lim.nb_max;
372
373                 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
374                 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
375                 plog_info("\tPort %u : %d<=nb_tx_desc<=%d %d<=nb_rx_desc<=%d\n", port_id, port_cfg->min_tx_desc, port_cfg->max_tx_desc, port_cfg->min_rx_desc, port_cfg->max_rx_desc);
376
377                 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
378                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
379                 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
380                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
381                 } else {
382                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
383                 }
384                 char *ptr;
385                 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
386                         *ptr = '\x0';
387                 }
388
389 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
390                 pci_dev = dev_info.pci_dev;
391 #else
392                 if (!dev_info.device)
393                         continue;
394                 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
395 #endif
396                 if (!pci_dev)
397                         continue;
398
399                 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
400                          "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
401                 /* Try to find the device's numa node */
402                 char buf[1024];
403                 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
404                 FILE* numa_node_fd = fopen(buf, "r");
405                 if (numa_node_fd) {
406                         if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
407                                 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
408                         }
409                         port_cfg->socket = strtol(buf, 0, 0);
410                         if (port_cfg->socket == -1) {
411                                 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
412                         }
413                         fclose(numa_node_fd);
414                 }
415
416                 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
417                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) {
418                         plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
419                         port_cfg->disabled_tx_offload |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
420                 }
421                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) {
422                         plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
423                         port_cfg->disabled_tx_offload |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
424                 }
425                 // Some OVS versions reports that they support UDP offload and no IPv4 offload, but fails when UDP offload is enabled
426                 if ((!strcmp(port_cfg->short_name, "virtio")) &&
427                         ((port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) == 0) &&
428                         (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) {
429                         plog_info("\t\tDisabling UDP cksum on virtio\n");
430                         port_cfg->disabled_tx_offload |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
431                 }
432         }
433 }
434
435 /* Create rte ring-backed devices */
436 uint8_t init_rte_ring_dev(void)
437 {
438         uint8_t nb_ring_dev = 0;
439
440         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
441                 /* skip ports that are not enabled */
442                 if (!prox_port_cfg[port_id].active) {
443                         continue;
444                 }
445                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
446                 if (port_cfg->rx_ring[0] != '\0') {
447                         plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
448
449                         struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
450                         PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
451                         struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
452                         PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
453
454                         int ret = rte_eth_from_rings(port_cfg->names[0], &rx_ring, 1, &tx_ring, 1, rte_socket_id());
455                         PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
456
457                         port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
458
459                         nb_ring_dev++;
460                 }
461         }
462
463         return nb_ring_dev;
464 }
465
466 static void print_port_capa(struct prox_port_cfg *port_cfg)
467 {
468         uint8_t port_id;
469
470         port_id = port_cfg - prox_port_cfg;
471         plog_info("\t*** Initializing port %u ***\n", port_id);
472         plog_info("\t\tPort name is set to %s\n", port_cfg->names[0]);
473         plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
474         plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
475 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
476         plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
477 #endif
478         if (port_cfg->max_link_speed != UINT32_MAX) {
479                 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
480         }
481
482 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
483         plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
484         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
485                 plog_info("VLAN STRIP | ");
486         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM)
487                 plog_info("IPV4 CKSUM | ");
488         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_UDP_CKSUM)
489                 plog_info("UDP CKSUM | ");
490         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_CKSUM)
491                 plog_info("TCP CKSUM | ");
492         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO)
493                 plog_info("TCP LRO | ");
494         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_QINQ_STRIP)
495                 plog_info("QINQ STRIP | ");
496         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM)
497                 plog_info("OUTER_IPV4_CKSUM | ");
498         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_MACSEC_STRIP)
499                 plog_info("MACSEC STRIP | ");
500 #if defined(RTE_ETH_RX_OFFLOAD_HEADER_SPLIT)
501         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_HEADER_SPLIT)
502                 plog_info("HEADER SPLIT | ");
503 #endif
504         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_FILTER)
505                 plog_info("VLAN FILTER | ");
506         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND)
507                 plog_info("VLAN EXTEND | ");
508         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_JUMBO_FRAME)
509                 plog_info("JUMBO FRAME | ");
510 #if defined(RTE_ETH_RX_OFFLOAD_CRC_STRIP)
511         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_CRC_STRIP)
512                 plog_info("CRC STRIP | ");
513 #endif
514 #if defined(RTE_ETH_RX_OFFLOAD_KEEP_CRC)
515         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
516                 plog_info("KEEP CRC | ");
517 #endif
518         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER)
519                 plog_info("SCATTER | ");
520         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
521                 plog_info("TIMESTAMP | ");
522         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SECURITY)
523                 plog_info("SECURITY ");
524         plog_info("\n");
525
526         plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
527         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VLAN_INSERT)
528                 plog_info("VLAN INSERT | ");
529         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)
530                 plog_info("IPV4 CKSUM | ");
531         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)
532                 plog_info("UDP CKSUM | ");
533         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)
534                 plog_info("TCP CKSUM | ");
535         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM)
536                 plog_info("SCTP CKSUM | ");
537         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO)
538                 plog_info("TCP TS0 | ");
539         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_TSO)
540                 plog_info("UDP TSO | ");
541         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM)
542                 plog_info("OUTER IPV4 CKSUM | ");
543         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_QINQ_INSERT)
544                 plog_info("QINQ INSERT | ");
545         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO)
546                 plog_info("VLAN TNL TSO | ");
547         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO)
548                 plog_info("GRE TNL TSO | ");
549         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPIP_TNL_TSO)
550                 plog_info("IPIP TNL TSO | ");
551         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO)
552                 plog_info("GENEVE TNL TSO | ");
553         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MACSEC_INSERT)
554                 plog_info("MACSEC INSERT | ");
555         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MT_LOCKFREE)
556                 plog_info("MT LOCKFREE | ");
557         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
558                 plog_info("MULTI SEG | ");
559         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SECURITY)
560                 plog_info("SECURITY | ");
561         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO)
562                 plog_info("UDP TNL TSO | ");
563         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IP_TNL_TSO)
564                 plog_info("IP TNL TSO | ");
565         plog_info("\n");
566
567         plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
568         plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
569         plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
570         plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
571         plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
572 #endif
573 }
574
575 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
576 {
577         port_cfg->max_link_speed = UINT32_MAX;
578
579 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
580         // virtio and vmxnet3 reports fake max_link_speed
581         if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
582                 // Get link_speed from highest capability from the port
583                 // This will be used by gen and lat for extrapolation purposes
584                 // The negotiated link_speed (as reported by rte_eth_link_get
585                 // or rte_eth_link_get_nowait) might be reported too late
586                 // and might result in wrong exrapolation, and hence should not be used
587                 // for extrapolation purposes
588                 if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_100G)
589                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_100G;
590                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_56G)
591                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_56G;
592                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_50G)
593                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_50G;
594                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_40G)
595                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_40G;
596                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_25G)
597                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_25G;
598                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_20G)
599                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_20G;
600                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_10G)
601                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_10G;
602                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_5G)
603                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_5G;
604                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_2_5G)
605                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_2_5G;
606                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_1G)
607                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_1G;
608                 else if (port_cfg->dev_info.speed_capa & (RTE_ETH_LINK_SPEED_100M_HD | RTE_ETH_LINK_SPEED_100M))
609                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_100M;
610                 else if (port_cfg->dev_info.speed_capa & (RTE_ETH_LINK_SPEED_10M_HD | RTE_ETH_LINK_SPEED_10M))
611                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_10M;
612
613         }
614 #endif
615 }
616
617 static void init_port(struct prox_port_cfg *port_cfg)
618 {
619         static char dummy_pool_name[] = "0_dummy";
620         struct rte_eth_link link;
621         uint8_t port_id;
622         int ret;
623
624         get_max_link_speed(port_cfg);
625         print_port_capa(port_cfg);
626         port_id = port_cfg - prox_port_cfg;
627         PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
628                    "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
629
630         if (port_cfg->n_rxq == 0) {
631                 /* not receiving on this port */
632                 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
633                 port_cfg->n_rxq = 1;
634                 uint32_t mbuf_size = TX_MBUF_SIZE;
635                 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
636                         mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
637
638                 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
639                           port_cfg->socket, port_cfg->n_rxd, mbuf_size);
640                 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
641                                                        0,
642                                                        sizeof(struct rte_pktmbuf_pool_private),
643                                                        rte_pktmbuf_pool_init, NULL,
644                                                        prox_pktmbuf_init, 0,
645                                                        port_cfg->socket, 0);
646                 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
647                            port_cfg->socket, port_cfg->n_rxd);
648                 dummy_pool_name[0]++;
649         } else {
650                 // Most pmd should now support setting mtu
651                 if (port_cfg->mtu + PROX_RTE_ETHER_HDR_LEN + PROX_RTE_ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
652                         plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
653                         port_cfg->mtu = port_cfg->max_rx_pkt_len;
654                 }
655                 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
656                 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
657                 if (ret)
658                         plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
659
660                 if (port_cfg->n_txq == 0) {
661                         /* not sending on this port */
662                         plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
663                         port_cfg->n_txq = 1;
664                 }
665         }
666
667         if (port_cfg->n_rxq > 1)  {
668                 // Enable RSS if multiple receive queues
669                 if (strcmp(port_cfg->short_name, "virtio")) {
670                         port_cfg->port_conf.rxmode.mq_mode                      |= RTE_ETH_MQ_RX_RSS;
671                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key        = toeplitz_init_key;
672                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len    = TOEPLITZ_KEY_LEN;
673 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
674                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = RTE_ETH_RSS_IP|RTE_ETH_RSS_UDP;
675 #else
676                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = RTE_ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
677 #endif
678                 }
679         }
680
681         // Make sure that the requested RSS offload is supported by the PMD
682 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
683         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
684 #endif
685         if (strcmp(port_cfg->short_name, "virtio")) {
686                 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, RTE_ETH_RSS_IP|RTE_ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
687         } else {
688                 plog_info("\t\t Not enabling RSS on virtio port");
689         }
690
691         // rxmode such as hw src strip
692 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
693 #if defined (RTE_ETH_RX_OFFLOAD_CRC_STRIP)
694         CONFIGURE_RX_OFFLOAD(RTE_ETH_RX_OFFLOAD_CRC_STRIP);
695 #endif
696 #if defined (RTE_ETH_RX_OFFLOAD_KEEP_CRC)
697         CONFIGURE_RX_OFFLOAD(RTE_ETH_RX_OFFLOAD_KEEP_CRC);
698 #endif
699         CONFIGURE_RX_OFFLOAD(RTE_ETH_RX_OFFLOAD_JUMBO_FRAME);
700         CONFIGURE_RX_OFFLOAD(RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
701 #else
702         if (port_cfg->requested_rx_offload & RTE_ETH_RX_OFFLOAD_CRC_STRIP) {
703                 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
704         }
705         if (port_cfg->requested_rx_offload & RTE_ETH_RX_OFFLOAD_JUMBO_FRAME) {
706                 port_cfg->port_conf.rxmode.jumbo_frame = 1;
707         }
708 #endif
709
710         // IPV4, UDP, SCTP Checksums
711 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
712         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_IPV4_CKSUM);
713         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_UDP_CKSUM);
714         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_VLAN_INSERT);
715 #else
716         if ((port_cfg->dev_info.tx_offload_capa & (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) == 0) {
717                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
718                 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
719         }
720         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
721                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
722                 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
723         }
724 #endif
725         // Multi Segments
726 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
727         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_MULTI_SEGS);
728 #else
729         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
730                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
731                 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
732         } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
733                 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
734         else
735                 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
736
737         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
738                 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
739         else
740                 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
741 #endif
742
743         // Refcount
744 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
745         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE);
746 #else
747         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
748                 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
749         else
750                 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
751 #endif
752
753         plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
754                   port_id, port_cfg->n_rxq, port_cfg->n_txq);
755
756         PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
757         PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
758
759         if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
760             !strcmp(port_cfg->short_name, "virtio") ||
761 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
762             !strcmp(port_cfg->short_name, "i40e") ||
763 #endif
764             !strcmp(port_cfg->short_name, "i40e_vf") ||
765             !strcmp(port_cfg->short_name, "avp") || /* Wind River */
766             !strcmp(port_cfg->driver_name, "") || /* NULL device */
767             !strcmp(port_cfg->short_name, "vmxnet3")) {
768                 port_cfg->port_conf.intr_conf.lsc = 0;
769                 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
770         }
771
772         if (port_cfg->lsc_set_explicitely) {
773                 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
774                 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
775         }
776         if (port_cfg->n_txd < port_cfg->min_tx_desc) {
777                 plog_info("\t\tNumber of TX descriptors is set to %d (minimum required for %s\n", port_cfg->min_tx_desc, port_cfg->short_name);
778                 port_cfg->n_txd = port_cfg->min_tx_desc;
779         }
780
781         if (port_cfg->n_rxd < port_cfg->min_rx_desc) {
782                 plog_info("\t\tNumber of RX descriptors is set to %d (minimum required for %s\n", port_cfg->min_rx_desc, port_cfg->short_name);
783                 port_cfg->n_rxd = port_cfg->min_rx_desc;
784         }
785
786         if (port_cfg->n_txd > port_cfg->max_tx_desc) {
787                 plog_info("\t\tNumber of TX descriptors is set to %d (maximum required for %s\n", port_cfg->max_tx_desc, port_cfg->short_name);
788                 port_cfg->n_txd = port_cfg->max_tx_desc;
789         }
790
791         if (port_cfg->n_rxd > port_cfg->max_rx_desc) {
792                 plog_info("\t\tNumber of RX descriptors is set to %d (maximum required for %s\n", port_cfg->max_rx_desc, port_cfg->short_name);
793                 port_cfg->n_rxd = port_cfg->max_rx_desc;
794         }
795
796         ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
797                                     port_cfg->n_txq, &port_cfg->port_conf);
798         PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
799
800         if (port_cfg->port_conf.intr_conf.lsc) {
801                 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
802         }
803
804         plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
805
806         /* initialize TX queues first */
807         for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
808                 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
809                           queue_id, port_cfg->socket, port_cfg->n_txd);
810                 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
811                                              port_cfg->socket, &port_cfg->tx_conf);
812                 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
813         }
814
815         /* initialize RX queues */
816         for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
817                 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
818                           queue_id, port_id, port_cfg->socket,
819                           port_cfg->n_rxd, port_cfg->pool[queue_id]);
820                 ret = rte_eth_rx_queue_setup(port_id, queue_id,
821                                              port_cfg->n_rxd,
822                                              port_cfg->socket, &port_cfg->rx_conf,
823                                              port_cfg->pool[queue_id]);
824                 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
825         }
826
827         plog_info("\t\tStarting up port %u ...", port_id);
828         ret = rte_eth_dev_start(port_id);
829
830         PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
831         plog_info(" done: ");
832
833         if (prox_port_cfg[port_id].is_vdev) {
834                 for (int vlan_id = 0; vlan_id < prox_port_cfg[port_id].n_vlans; vlan_id++) {
835                         set_ip_address(prox_port_cfg[port_id].names[vlan_id], prox_port_cfg[port_id].ip_addr[vlan_id].ip, prox_port_cfg[port_id].ip_addr[vlan_id].prefix);
836                 }
837         }
838         /* Getting link status can be done without waiting if Link
839            State Interrupt is enabled since in that case, if the link
840            is recognized as being down, an interrupt will notify that
841            it has gone up. */
842         if (port_cfg->port_conf.intr_conf.lsc)
843                 rte_eth_link_get_nowait(port_id, &link);
844         else
845                 rte_eth_link_get(port_id, &link);
846
847         port_cfg->link_up = link.link_status;
848         port_cfg->link_speed = link.link_speed;
849
850         if (link.link_status) {
851                 plog_info("Link Up - speed %'u Mbps - %s\n",
852                           link.link_speed,
853                           (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
854                           "full-duplex" : "half-duplex");
855         }
856         else {
857                 plog_info("Link Down\n");
858         }
859
860         if (port_cfg->promiscuous) {
861                 rte_eth_promiscuous_enable(port_id);
862                 plog_info("\t\tport %u in promiscuous mode\n", port_id);
863         }
864
865         if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
866             strcmp(port_cfg->short_name, "i40e") &&
867             strcmp(port_cfg->short_name, "i40e_vf") &&
868             strcmp(port_cfg->short_name, "vmxnet3")) {
869                 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
870                         ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
871                         if (ret) {
872                                 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
873                         }
874                 }
875                 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
876                         ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
877                         if (ret) {
878                                 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
879                         }
880                 }
881         }
882         if (port_cfg->nb_mc_addr) {
883                 rte_eth_allmulticast_enable(port_id);
884                 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
885                         plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
886                         port_cfg->nb_mc_addr = 0;
887                         rte_eth_allmulticast_disable(port_id);
888                         plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
889                 } else {
890                         plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
891                         plog_info("\t\tport %u in multicast mode\n", port_id);
892                 }
893         }
894 }
895
896 void init_port_all(void)
897 {
898         enum rte_proc_type_t proc_type;
899         proc_type = rte_eal_process_type();
900         if (proc_type == RTE_PROC_SECONDARY) {
901                 plog_info("\tSkipping port initialization as secondary process\n");
902                 return;
903         }
904         uint8_t max_port_idx = prox_last_port_active() + 1;
905
906         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
907                 if (!prox_port_cfg[portid].active) {
908                         continue;
909                 }
910                 init_port(&prox_port_cfg[portid]);
911         }
912 }
913
914 void close_ports_atexit(void)
915 {
916         uint8_t max_port_idx = prox_last_port_active() + 1;
917
918         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
919                 if (!prox_port_cfg[portid].active) {
920                         continue;
921                 }
922                 plog_info("Closing port %u\n", portid);
923                 rte_eth_dev_close(portid);
924         }
925
926         if (lcore_cfg == NULL)
927                 return;
928
929         struct lcore_cfg *lconf = NULL;
930         struct task_args *targ;
931         while (core_targ_next(&lconf, &targ, 0) == 0) {
932                 if (targ->pool) {
933                         rte_mempool_free(targ->pool);
934                         plog_info("freeing pool %p\n", targ->pool);
935                         targ->pool = NULL;
936                 }
937         }
938 }
939
940 void init_port_addr(void)
941 {
942         struct prox_port_cfg *port_cfg;
943         enum rte_proc_type_t proc_type;
944         int rc;
945
946         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
947                 if (!prox_port_cfg[port_id].active) {
948                         continue;
949                 }
950                 port_cfg = &prox_port_cfg[port_id];
951
952                 switch (port_cfg->type) {
953                 case PROX_PORT_MAC_HW:
954                         rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
955                         break;
956                 case PROX_PORT_MAC_RAND:
957                         prox_rte_eth_random_addr(port_cfg->eth_addr.addr_bytes);
958                         break;
959                 case PROX_PORT_MAC_SET:
960                         proc_type = rte_eal_process_type();
961                         if (proc_type == RTE_PROC_SECONDARY) {
962                                 plog_warn("\tport %u: unable to change port mac address as secondary process\n", port_id);
963                         } else if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
964                                 plog_warn("\tport %u: failed to set mac address. Error = %d\n", port_id, rc);
965                         else
966                                 plog_info("Setting MAC to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
967                         break;
968                 }
969         }
970 }
971
972 int port_is_active(uint8_t port_id)
973 {
974         if (port_id > PROX_MAX_PORTS) {
975                 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
976                 return 0;
977         }
978
979         struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
980         if (!port_cfg->active) {
981                 plog_info("Port %u is not active\n", port_id);
982                 return 0;
983         }
984         return 1;
985 }