Report failure when tap port is not mapped to real dpdk port.
[samplevnf.git] / VNFs / DPPD-PROX / prox_port_cfg.c
1 /*
2 // Copyright (c) 2010-2020 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <string.h>
18 #include <stdio.h>
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
21 #include <rte_mbuf.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
24 #else
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
26 #include <rte_dev.h>
27 #else
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
30 #endif
31 #endif
32 #endif
33
34 #include <sys/ioctl.h>
35 #include <net/if.h>
36
37 #include "prox_port_cfg.h"
38 #include "prox_globals.h"
39 #include "log.h"
40 #include "quit.h"
41 #include "defaults.h"
42 #include "toeplitz.h"
43 #include "defines.h"
44 #include "prox_cksum.h"
45 #include "stats_irq.h"
46 #include "prox_compat.h"
47 #include "rte_ethdev.h"
48 #include "lconf.h"
49
50 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
51
52 rte_atomic32_t lsc;
53
54 int prox_nb_active_ports(void)
55 {
56         int ret = 0;
57         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
58                 ret += prox_port_cfg[i].active;
59         }
60         return ret;
61 }
62
63 int prox_last_port_active(void)
64 {
65         int ret = -1;
66         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
67                 if (prox_port_cfg[i].active) {
68                         ret = i;
69                 }
70         }
71         return ret;
72 }
73
74 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
75 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
76         __attribute__((unused)) void *ret_param)
77 #else
78 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
79 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
80         __attribute__((unused)) void *ret_param)
81 #else
82 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
83 #endif
84 #endif
85 {
86         if (RTE_ETH_EVENT_INTR_LSC != type) {
87 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
88                 return -1;
89 #else
90                 return;
91 #endif
92         }
93
94         rte_atomic32_inc(&lsc);
95
96 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
97         return 0;
98 #endif
99 }
100
101 struct prox_pktmbuf_reinit_args {
102         struct rte_mempool *mp;
103         struct lcore_cfg   *lconf;
104 };
105
106 /* standard mbuf initialization procedure */
107 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
108 {
109         struct rte_mbuf *mbuf = _m;
110
111 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
112         mbuf->tx_offload = CALC_TX_OL(sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr));
113 #else
114         mbuf->pkt.vlan_macip.f.l2_len = sizeof(prox_rte_ether_hdr);
115         mbuf->pkt.vlan_macip.f.l3_len = sizeof(prox_rte_ipv4_hdr);
116 #endif
117
118         rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
119 }
120
121 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
122 {
123         struct prox_pktmbuf_reinit_args *init_args = arg;
124         struct rte_mbuf *m;
125         char* obj = start;
126
127         obj += init_args->mp->header_size;
128         m = (struct rte_mbuf*)obj;
129
130         prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
131 }
132
133 #define CONFIGURE_TX_OFFLOAD(flag)                                           \
134         if (port_cfg->requested_tx_offload & flag)                              {\
135                 if (port_cfg->disabled_tx_offload & flag)                       {\
136                         plog_info("\t\t%s disabled by configuration\n", #flag);\
137                         port_cfg->requested_tx_offload &= ~flag;\
138                 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
139                         port_cfg->port_conf.txmode.offloads |= flag;\
140                         plog_info("\t\t%s enabled on port\n", #flag);\
141                 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
142                         port_cfg->tx_conf.offloads |= flag;\
143                         plog_info("\t\t%s enabled on queue\n", #flag);\
144                 } else {\
145                         port_cfg->requested_tx_offload &= ~flag;\
146                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
147                 }\
148         } else {\
149                 plog_info("\t\t%s disabled\n", #flag);\
150         }\
151
152 #define CONFIGURE_RX_OFFLOAD(flag)                                           \
153         if (port_cfg->requested_rx_offload & flag)                              {\
154                 if (port_cfg->dev_info.rx_offload_capa & flag) {\
155                         port_cfg->port_conf.rxmode.offloads |= flag;\
156                         plog_info("\t\t%s enabled on port\n", #flag);\
157                 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
158                         port_cfg->rx_conf.offloads |= flag;\
159                         plog_info("\t\t%s enabled on queue\n", #flag);\
160                 } else {\
161                         port_cfg->requested_rx_offload &= ~flag;\
162                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
163                 }\
164         } else {\
165                 plog_info("\t\t%s disabled\n", #flag);\
166         }\
167
168 static inline uint32_t get_netmask(uint8_t prefix)
169 {
170         if (prefix == 0)
171                 return(~((uint32_t) -1));
172         else
173                 return rte_cpu_to_be_32(~((1 << (32 - prefix)) - 1));
174 }
175
176 static void set_ip_address(char *devname, uint32_t ip, uint8_t prefix)
177 {
178         struct ifreq ifreq;
179         struct sockaddr_in in_addr;
180         int fd, rc;
181         uint32_t netmask = get_netmask(prefix);
182         plog_info("Setting netmask to %x\n", netmask);
183         uint32_t ip_cpu = rte_be_to_cpu_32(ip);
184
185         fd = socket(AF_INET, SOCK_DGRAM, 0);
186
187         memset(&ifreq, 0, sizeof(struct ifreq));
188         memset(&in_addr, 0, sizeof(struct sockaddr_in));
189
190         in_addr.sin_family = AF_INET;
191         in_addr.sin_addr = *(struct in_addr *)&ip_cpu;
192
193         strncpy(ifreq.ifr_name, devname, IFNAMSIZ);
194         ifreq.ifr_addr = *(struct sockaddr *)&in_addr;
195         rc = ioctl(fd, SIOCSIFADDR, &ifreq);
196         PROX_PANIC(rc < 0, "Failed to set IP address %x on device %s: error = %d (%s)\n", ip_cpu, devname, errno, strerror(errno));
197
198         in_addr.sin_addr = *(struct in_addr *)&netmask;
199         ifreq.ifr_netmask = *(struct sockaddr *)&in_addr;
200         rc = ioctl(fd, SIOCSIFNETMASK, &ifreq);
201         PROX_PANIC(rc < 0, "Failed to set netmask %x (prefix %d) on device %s: error = %d (%s)\n", netmask, prefix, devname, errno, strerror(errno));
202         close(fd);
203 }
204
205 /* initialize rte devices and check the number of available ports */
206 void init_rte_dev(int use_dummy_devices)
207 {
208         uint8_t nb_ports, port_id_max;
209         int port_id_last, rc = 0;
210         struct rte_eth_dev_info dev_info;
211         const struct rte_pci_device *pci_dev;
212
213         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
214                 if (prox_port_cfg[port_id].active && (prox_port_cfg[port_id].virtual == 0) && (port_id >= prox_rte_eth_dev_count_avail())) {
215                         PROX_PANIC(1, "port %u used but only %u available\n", port_id, prox_rte_eth_dev_count_avail());
216                 }
217         }
218         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
219                 if (!prox_port_cfg[port_id].active) {
220                         continue;
221                 }
222                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
223
224                 prox_port_cfg[port_id].n_vlans = 0;
225                 while ((prox_port_cfg[port_id].n_vlans < PROX_MAX_VLAN_TAGS) && (prox_port_cfg[port_id].vlan_tags[prox_port_cfg[port_id].n_vlans])) {
226                         prox_port_cfg[port_id].n_vlans++;
227                 }
228
229                 if (port_cfg->vdev[0]) {
230                         char name[MAX_NAME_SIZE], tap[MAX_NAME_SIZE];
231                         snprintf(tap, MAX_NAME_SIZE, "net_tap%d", port_id);
232 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
233                         snprintf(name, MAX_NAME_SIZE, "iface=%s", port_cfg->vdev);
234                         rc = rte_vdev_init(tap, name);
235 #else
236                         PROX_PANIC(1, "vdev not supported in DPDK < 17.05\n");
237 #endif
238                         PROX_PANIC(rc != 0, "Unable to create device %s %s\n", "net tap", port_cfg->vdev);
239                         int vdev_port_id = prox_rte_eth_dev_count_avail() - 1;
240                         PROX_PANIC(vdev_port_id >= PROX_MAX_PORTS, "Too many port defined %d >= %d\n", vdev_port_id, PROX_MAX_PORTS);
241                         plog_info("\tCreating device %s, port %d\n", port_cfg->vdev, vdev_port_id);
242                         prox_port_cfg[vdev_port_id].is_vdev = 1;
243                         prox_port_cfg[vdev_port_id].active = 1;
244                         prox_port_cfg[vdev_port_id].dpdk_mapping = port_id;
245                         prox_port_cfg[vdev_port_id].n_txq = 1;
246                         prox_port_cfg[vdev_port_id].n_vlans = prox_port_cfg[port_id].n_vlans;
247
248                         for (uint32_t tag_id = 0; tag_id < prox_port_cfg[port_id].n_vlans; tag_id++) {
249                                 prox_port_cfg[vdev_port_id].vlan_tags[tag_id] = prox_port_cfg[port_id].vlan_tags[tag_id];
250                                 char command[1024];
251                                 snprintf(prox_port_cfg[vdev_port_id].names[tag_id], MAX_NAME_SIZE, "%s_%d", port_cfg->vdev, prox_port_cfg[port_id].vlan_tags[tag_id]);
252                                 sprintf(command, "ip link add link %s name %s type vlan id %d", port_cfg->vdev, prox_port_cfg[vdev_port_id].names[tag_id], prox_port_cfg[port_id].vlan_tags[tag_id]);
253                                 system(command);
254                                 plog_info("\tRunning %s\n", command);
255                                 plog_info("\tUsing vlan tag %d - added device %s\n", prox_port_cfg[port_id].vlan_tags[tag_id], prox_port_cfg[vdev_port_id].names[tag_id]);
256                         }
257                         if (prox_port_cfg[port_id].n_vlans == 0) {
258                                 strncpy(prox_port_cfg[vdev_port_id].names[0], port_cfg->vdev, MAX_NAME_SIZE);
259                                 prox_port_cfg[vdev_port_id].n_vlans = 1;
260                                 prox_port_cfg[vdev_port_id].vlan_tags[0] = 0;
261                         }
262
263                         prox_port_cfg[port_id].dpdk_mapping = vdev_port_id;
264                         uint32_t i = 0;
265                         while ((i < PROX_MAX_VLAN_TAGS) && (prox_port_cfg[port_id].ip_addr[i].ip)) {
266                                 prox_port_cfg[vdev_port_id].ip_addr[i].ip = prox_port_cfg[port_id].ip_addr[i].ip;
267                                 prox_port_cfg[vdev_port_id].ip_addr[i].prefix = prox_port_cfg[port_id].ip_addr[i].prefix;
268                                 i++;
269                         }
270                         prox_port_cfg[vdev_port_id].type = prox_port_cfg[port_id].type;
271                         if (prox_port_cfg[vdev_port_id].type == PROX_PORT_MAC_HW) {
272                                 // If DPDK port MAC set to HW, then make sure the vdev has the same MAC as DPDK port
273                                 prox_port_cfg[vdev_port_id].type = PROX_PORT_MAC_SET;
274                                 rte_eth_macaddr_get(port_id, &prox_port_cfg[vdev_port_id].eth_addr);
275                                 plog_info("\tDPDK port %d MAC address pre-configured to MAC from port %d: "MAC_BYTES_FMT"\n",
276                                         vdev_port_id, port_id, MAC_BYTES(prox_port_cfg[vdev_port_id].eth_addr.addr_bytes));
277                         } else
278                                 memcpy(&prox_port_cfg[vdev_port_id].eth_addr, &prox_port_cfg[port_id].eth_addr, sizeof(prox_port_cfg[port_id].eth_addr));
279                 }
280                 if (prox_port_cfg[port_id].n_vlans == 0) {
281                         prox_port_cfg[port_id].n_vlans = 1;
282                         prox_port_cfg[port_id].vlan_tags[0] = 0;
283                 }
284         }
285         nb_ports = prox_rte_eth_dev_count_avail();
286         /* get available ports configuration */
287         PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
288
289         if (use_dummy_devices) {
290 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
291                 nb_ports = prox_last_port_active() + 1;
292                 plog_info("Creating %u dummy devices\n", nb_ports);
293
294                 char port_name[32] = "0dummy_dev";
295                 for (uint32_t i = 0; i < nb_ports; ++i) {
296 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
297                         rte_vdev_init(port_name, "size=64,copy=0");
298 #else
299                         eth_dev_null_create(port_name, 0, PROX_RTE_ETHER_MIN_LEN, 0);
300 #endif
301                         port_name[0]++;
302                 }
303 #else
304         PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
305 #endif
306         }
307         else if (prox_last_port_active() != -1) {
308                 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
309                 plog_info("\tDPDK has found %u ports\n", nb_ports);
310         }
311
312         if (nb_ports > PROX_MAX_PORTS) {
313                 plog_warn("\tWarning: I can deal with at most %u ports."
314                         " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
315
316                 nb_ports = PROX_MAX_PORTS;
317         }
318
319 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
320         port_id_max = -1;
321         uint16_t id;
322         RTE_ETH_FOREACH_DEV(id) {
323                 char name[256];
324                 rte_eth_dev_get_name_by_port(id, name);
325                 plog_info("\tFound DPDK port id %u %s\n", id, name);
326                 if (id >= PROX_MAX_PORTS) {
327                         plog_warn("\tWarning: I can deal with at most %u ports."
328                                  " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
329                 } else {
330                         prox_port_cfg[id].available = 1;
331                         if (id > port_id_max)
332                                 port_id_max = id;
333                 }
334         }
335 #else
336         port_id_max = nb_ports - 1;
337 #endif
338
339         port_id_last = prox_last_port_active();
340         PROX_PANIC(port_id_last > port_id_max,
341                    "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
342                    port_id_last, port_id_max);
343
344         /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
345 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
346         for (uint8_t port_id = 0; port_id <= port_id_last; ++port_id) {
347 #else
348         for (uint8_t port_id = 0; port_id <= nb_ports; ++port_id) {
349 #endif
350                 /* skip ports that are not enabled */
351                 if (!prox_port_cfg[port_id].active) {
352                         continue;
353 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
354                 } else if (prox_port_cfg[port_id].available == 0) {
355                         PROX_PANIC(1, "port %u enabled but not available\n", port_id);
356 #endif
357                 }
358                 plog_info("\tGetting info for rte dev %u\n", port_id);
359                 rte_eth_dev_info_get(port_id, &dev_info);
360                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
361                 port_cfg->socket = -1;
362
363                 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
364                 port_cfg->max_txq = dev_info.max_tx_queues;
365                 port_cfg->max_rxq = dev_info.max_rx_queues;
366                 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
367                 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
368                 port_cfg->min_tx_desc = dev_info.tx_desc_lim.nb_min;
369                 port_cfg->max_tx_desc = dev_info.tx_desc_lim.nb_max;
370                 port_cfg->min_rx_desc = dev_info.rx_desc_lim.nb_min;
371                 port_cfg->max_rx_desc = dev_info.rx_desc_lim.nb_max;
372
373                 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
374                 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
375                 plog_info("\tPort %u : %d<=nb_tx_desc<=%d %d<=nb_rx_desc<=%d\n", port_id, port_cfg->min_tx_desc, port_cfg->max_tx_desc, port_cfg->min_rx_desc, port_cfg->max_rx_desc);
376
377                 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
378                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
379                 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
380                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
381                 } else {
382                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
383                 }
384                 char *ptr;
385                 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
386                         *ptr = '\x0';
387                 }
388
389 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
390                 pci_dev = dev_info.pci_dev;
391 #else
392                 if (!dev_info.device)
393                         continue;
394                 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
395 #endif
396                 if (!pci_dev)
397                         continue;
398
399                 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
400                          "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
401                 /* Try to find the device's numa node */
402                 char buf[1024];
403                 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
404                 FILE* numa_node_fd = fopen(buf, "r");
405                 if (numa_node_fd) {
406                         if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
407                                 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
408                         }
409                         port_cfg->socket = strtol(buf, 0, 0);
410                         if (port_cfg->socket == -1) {
411                                 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
412                         }
413                         fclose(numa_node_fd);
414                 }
415
416                 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
417                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
418                         plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
419                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_IPV4_CKSUM;
420                 }
421                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
422                         plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
423                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
424                 }
425                 // Some OVS versions reports that they support UDP offload and no IPv4 offload, but fails when UDP offload is enabled
426                 if ((!strcmp(port_cfg->short_name, "virtio")) &&
427                         ((port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) == 0) &&
428                         (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
429                         plog_info("\t\tDisabling UDP cksum on virtio\n");
430                         port_cfg->disabled_tx_offload |= DEV_TX_OFFLOAD_UDP_CKSUM;
431                 }
432         }
433 }
434
435 /* Create rte ring-backed devices */
436 uint8_t init_rte_ring_dev(void)
437 {
438         uint8_t nb_ring_dev = 0;
439
440         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
441                 /* skip ports that are not enabled */
442                 if (!prox_port_cfg[port_id].active) {
443                         continue;
444                 }
445                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
446                 if (port_cfg->rx_ring[0] != '\0') {
447                         plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
448
449                         struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
450                         PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
451                         struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
452                         PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
453
454                         int ret = rte_eth_from_rings(port_cfg->names[0], &rx_ring, 1, &tx_ring, 1, rte_socket_id());
455                         PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
456
457                         port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
458
459                         nb_ring_dev++;
460                 }
461         }
462
463         return nb_ring_dev;
464 }
465
466 static void print_port_capa(struct prox_port_cfg *port_cfg)
467 {
468         uint8_t port_id;
469
470         port_id = port_cfg - prox_port_cfg;
471         plog_info("\t*** Initializing port %u ***\n", port_id);
472         plog_info("\t\tPort name is set to %s\n", port_cfg->names[0]);
473         plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
474         plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
475 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
476         plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
477 #endif
478         if (port_cfg->max_link_speed != UINT32_MAX) {
479                 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
480         }
481
482 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
483         plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
484         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP)
485                 plog_info("VLAN STRIP | ");
486         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
487                 plog_info("IPV4 CKSUM | ");
488         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
489                 plog_info("UDP CKSUM | ");
490         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
491                 plog_info("TCP CKSUM | ");
492         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
493                 plog_info("TCP LRO | ");
494         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP)
495                 plog_info("QINQ STRIP | ");
496         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
497                 plog_info("OUTER_IPV4_CKSUM | ");
498         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_MACSEC_STRIP)
499                 plog_info("MACSEC STRIP | ");
500         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_HEADER_SPLIT)
501                 plog_info("HEADER SPLIT | ");
502         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER)
503                 plog_info("VLAN FILTER | ");
504         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_EXTEND)
505                 plog_info("VLAN EXTEND | ");
506         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME)
507                 plog_info("JUMBO FRAME | ");
508 #if defined(DEV_RX_OFFLOAD_CRC_STRIP)
509         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP)
510                 plog_info("CRC STRIP | ");
511 #endif
512 #if defined(DEV_RX_OFFLOAD_KEEP_CRC)
513         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC)
514                 plog_info("KEEP CRC | ");
515 #endif
516         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER)
517                 plog_info("SCATTER | ");
518         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)
519                 plog_info("TIMESTAMP | ");
520         if (port_cfg->dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY)
521                 plog_info("SECURITY ");
522         plog_info("\n");
523
524         plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
525         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)
526                 plog_info("VLAN INSERT | ");
527         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)
528                 plog_info("IPV4 CKSUM | ");
529         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)
530                 plog_info("UDP CKSUM | ");
531         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)
532                 plog_info("TCP CKSUM | ");
533         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)
534                 plog_info("SCTP CKSUM | ");
535         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO)
536                 plog_info("TCP TS0 | ");
537         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)
538                 plog_info("UDP TSO | ");
539         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)
540                 plog_info("OUTER IPV4 CKSUM | ");
541         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT)
542                 plog_info("QINQ INSERT | ");
543         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO)
544                 plog_info("VLAN TNL TSO | ");
545         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GRE_TNL_TSO)
546                 plog_info("GRE TNL TSO | ");
547         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPIP_TNL_TSO)
548                 plog_info("IPIP TNL TSO | ");
549         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_GENEVE_TNL_TSO)
550                 plog_info("GENEVE TNL TSO | ");
551         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MACSEC_INSERT)
552                 plog_info("MACSEC INSERT | ");
553         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MT_LOCKFREE)
554                 plog_info("MT LOCKFREE | ");
555         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MULTI_SEGS)
556                 plog_info("MULTI SEG | ");
557         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY)
558                 plog_info("SECURITY | ");
559         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TNL_TSO)
560                 plog_info("UDP TNL TSO | ");
561         if (port_cfg->dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IP_TNL_TSO)
562                 plog_info("IP TNL TSO | ");
563         plog_info("\n");
564
565         plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
566         plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
567         plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
568         plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
569         plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
570 #endif
571 }
572
573 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
574 {
575         port_cfg->max_link_speed = UINT32_MAX;
576
577 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
578         // virtio and vmxnet3 reports fake max_link_speed
579         if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
580                 // Get link_speed from highest capability from the port
581                 // This will be used by gen and lat for extrapolation purposes
582                 // The negotiated link_speed (as reported by rte_eth_link_get
583                 // or rte_eth_link_get_nowait) might be reported too late
584                 // and might result in wrong exrapolation, and hence should not be used
585                 // for extrapolation purposes
586                 if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_100G)
587                         port_cfg->max_link_speed = ETH_SPEED_NUM_100G;
588                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_56G)
589                         port_cfg->max_link_speed = ETH_SPEED_NUM_56G;
590                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_50G)
591                         port_cfg->max_link_speed = ETH_SPEED_NUM_50G;
592                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_40G)
593                         port_cfg->max_link_speed = ETH_SPEED_NUM_40G;
594                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_25G)
595                         port_cfg->max_link_speed = ETH_SPEED_NUM_25G;
596                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_20G)
597                         port_cfg->max_link_speed = ETH_SPEED_NUM_20G;
598                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_10G)
599                         port_cfg->max_link_speed = ETH_SPEED_NUM_10G;
600                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_5G)
601                         port_cfg->max_link_speed = ETH_SPEED_NUM_5G;
602                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_2_5G)
603                         port_cfg->max_link_speed = ETH_SPEED_NUM_2_5G;
604                 else if (port_cfg->dev_info.speed_capa & ETH_LINK_SPEED_1G)
605                         port_cfg->max_link_speed = ETH_SPEED_NUM_1G;
606                 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_100M_HD | ETH_LINK_SPEED_100M))
607                         port_cfg->max_link_speed = ETH_SPEED_NUM_100M;
608                 else if (port_cfg->dev_info.speed_capa & (ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M))
609                         port_cfg->max_link_speed = ETH_SPEED_NUM_10M;
610
611         }
612 #endif
613 }
614
615 static void init_port(struct prox_port_cfg *port_cfg)
616 {
617         static char dummy_pool_name[] = "0_dummy";
618         struct rte_eth_link link;
619         uint8_t port_id;
620         int ret;
621
622         get_max_link_speed(port_cfg);
623         print_port_capa(port_cfg);
624         port_id = port_cfg - prox_port_cfg;
625         PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
626                    "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
627
628         if (port_cfg->n_rxq == 0) {
629                 /* not receiving on this port */
630                 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
631                 port_cfg->n_rxq = 1;
632                 uint32_t mbuf_size = TX_MBUF_SIZE;
633                 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
634                         mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
635
636                 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
637                           port_cfg->socket, port_cfg->n_rxd, mbuf_size);
638                 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
639                                                        0,
640                                                        sizeof(struct rte_pktmbuf_pool_private),
641                                                        rte_pktmbuf_pool_init, NULL,
642                                                        prox_pktmbuf_init, 0,
643                                                        port_cfg->socket, 0);
644                 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
645                            port_cfg->socket, port_cfg->n_rxd);
646                 dummy_pool_name[0]++;
647         } else {
648                 // Most pmd should now support setting mtu
649                 if (port_cfg->mtu + PROX_RTE_ETHER_HDR_LEN + PROX_RTE_ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
650                         plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
651                         port_cfg->mtu = port_cfg->max_rx_pkt_len;
652                 }
653                 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
654                 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
655                 if (ret)
656                         plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
657
658                 if (port_cfg->n_txq == 0) {
659                         /* not sending on this port */
660                         plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
661                         port_cfg->n_txq = 1;
662                 }
663         }
664
665         if (port_cfg->n_rxq > 1)  {
666                 // Enable RSS if multiple receive queues
667                 if (strcmp(port_cfg->short_name, "virtio")) {
668                         port_cfg->port_conf.rxmode.mq_mode                      |= ETH_MQ_RX_RSS;
669                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key        = toeplitz_init_key;
670                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len    = TOEPLITZ_KEY_LEN;
671 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
672                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IP|ETH_RSS_UDP;
673 #else
674                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
675 #endif
676                 }
677         }
678
679         // Make sure that the requested RSS offload is supported by the PMD
680 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
681         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
682 #endif
683         if (strcmp(port_cfg->short_name, "virtio")) {
684                 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, ETH_RSS_IP|ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
685         } else {
686                 plog_info("\t\t Not enabling RSS on virtio port");
687         }
688
689         // rxmode such as hw src strip
690 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
691 #if defined (DEV_RX_OFFLOAD_CRC_STRIP)
692         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_CRC_STRIP);
693 #endif
694 #if defined (DEV_RX_OFFLOAD_KEEP_CRC)
695         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_KEEP_CRC);
696 #endif
697         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_JUMBO_FRAME);
698         CONFIGURE_RX_OFFLOAD(DEV_RX_OFFLOAD_VLAN_STRIP);
699 #else
700         if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_CRC_STRIP) {
701                 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
702         }
703         if (port_cfg->requested_rx_offload & DEV_RX_OFFLOAD_JUMBO_FRAME) {
704                 port_cfg->port_conf.rxmode.jumbo_frame = 1;
705         }
706 #endif
707
708         // IPV4, UDP, SCTP Checksums
709 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
710         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_IPV4_CKSUM);
711         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_UDP_CKSUM);
712         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_VLAN_INSERT);
713 #else
714         if ((port_cfg->dev_info.tx_offload_capa & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)) == 0) {
715                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
716                 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
717         }
718         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
719                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
720                 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
721         }
722 #endif
723         // Multi Segments
724 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
725         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MULTI_SEGS);
726 #else
727         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
728                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
729                 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
730         } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
731                 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
732         else
733                 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
734
735         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
736                 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
737         else
738                 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
739 #endif
740
741         // Refcount
742 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
743         CONFIGURE_TX_OFFLOAD(DEV_TX_OFFLOAD_MBUF_FAST_FREE);
744 #else
745         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
746                 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
747         else
748                 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
749 #endif
750
751         plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
752                   port_id, port_cfg->n_rxq, port_cfg->n_txq);
753
754         PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
755         PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
756
757         if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
758             !strcmp(port_cfg->short_name, "virtio") ||
759 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
760             !strcmp(port_cfg->short_name, "i40e") ||
761 #endif
762             !strcmp(port_cfg->short_name, "i40e_vf") ||
763             !strcmp(port_cfg->short_name, "avp") || /* Wind River */
764             !strcmp(port_cfg->driver_name, "") || /* NULL device */
765             !strcmp(port_cfg->short_name, "vmxnet3")) {
766                 port_cfg->port_conf.intr_conf.lsc = 0;
767                 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
768         }
769
770         if (port_cfg->lsc_set_explicitely) {
771                 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
772                 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
773         }
774         if (port_cfg->n_txd < port_cfg->min_tx_desc) {
775                 plog_info("\t\tNumber of TX descriptors is set to %d (minimum required for %s\n", port_cfg->min_tx_desc, port_cfg->short_name);
776                 port_cfg->n_txd = port_cfg->min_tx_desc;
777         }
778
779         if (port_cfg->n_rxd < port_cfg->min_rx_desc) {
780                 plog_info("\t\tNumber of RX descriptors is set to %d (minimum required for %s\n", port_cfg->min_rx_desc, port_cfg->short_name);
781                 port_cfg->n_rxd = port_cfg->min_rx_desc;
782         }
783
784         if (port_cfg->n_txd > port_cfg->max_tx_desc) {
785                 plog_info("\t\tNumber of TX descriptors is set to %d (maximum required for %s\n", port_cfg->max_tx_desc, port_cfg->short_name);
786                 port_cfg->n_txd = port_cfg->max_tx_desc;
787         }
788
789         if (port_cfg->n_rxd > port_cfg->max_rx_desc) {
790                 plog_info("\t\tNumber of RX descriptors is set to %d (maximum required for %s\n", port_cfg->max_rx_desc, port_cfg->short_name);
791                 port_cfg->n_rxd = port_cfg->max_rx_desc;
792         }
793
794         ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
795                                     port_cfg->n_txq, &port_cfg->port_conf);
796         PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
797
798         if (port_cfg->port_conf.intr_conf.lsc) {
799                 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
800         }
801
802         plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
803
804         /* initialize TX queues first */
805         for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
806                 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
807                           queue_id, port_cfg->socket, port_cfg->n_txd);
808                 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
809                                              port_cfg->socket, &port_cfg->tx_conf);
810                 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
811         }
812
813         /* initialize RX queues */
814         for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
815                 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
816                           queue_id, port_id, port_cfg->socket,
817                           port_cfg->n_rxd, port_cfg->pool[queue_id]);
818                 ret = rte_eth_rx_queue_setup(port_id, queue_id,
819                                              port_cfg->n_rxd,
820                                              port_cfg->socket, &port_cfg->rx_conf,
821                                              port_cfg->pool[queue_id]);
822                 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
823         }
824
825         plog_info("\t\tStarting up port %u ...", port_id);
826         ret = rte_eth_dev_start(port_id);
827
828         PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
829         plog_info(" done: ");
830
831         if (prox_port_cfg[port_id].is_vdev) {
832                 for (int vlan_id = 0; vlan_id < prox_port_cfg[port_id].n_vlans; vlan_id++) {
833                         set_ip_address(prox_port_cfg[port_id].names[vlan_id], prox_port_cfg[port_id].ip_addr[vlan_id].ip, prox_port_cfg[port_id].ip_addr[vlan_id].prefix);
834                 }
835         }
836         /* Getting link status can be done without waiting if Link
837            State Interrupt is enabled since in that case, if the link
838            is recognized as being down, an interrupt will notify that
839            it has gone up. */
840         if (port_cfg->port_conf.intr_conf.lsc)
841                 rte_eth_link_get_nowait(port_id, &link);
842         else
843                 rte_eth_link_get(port_id, &link);
844
845         port_cfg->link_up = link.link_status;
846         port_cfg->link_speed = link.link_speed;
847
848         if (link.link_status) {
849                 plog_info("Link Up - speed %'u Mbps - %s\n",
850                           link.link_speed,
851                           (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
852                           "full-duplex" : "half-duplex");
853         }
854         else {
855                 plog_info("Link Down\n");
856         }
857
858         if (port_cfg->promiscuous) {
859                 rte_eth_promiscuous_enable(port_id);
860                 plog_info("\t\tport %u in promiscuous mode\n", port_id);
861         }
862
863         if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
864             strcmp(port_cfg->short_name, "i40e") &&
865             strcmp(port_cfg->short_name, "i40e_vf") &&
866             strcmp(port_cfg->short_name, "vmxnet3")) {
867                 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
868                         ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
869                         if (ret) {
870                                 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
871                         }
872                 }
873                 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
874                         ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
875                         if (ret) {
876                                 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
877                         }
878                 }
879         }
880         if (port_cfg->nb_mc_addr) {
881                 rte_eth_allmulticast_enable(port_id);
882                 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
883                         plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
884                         port_cfg->nb_mc_addr = 0;
885                         rte_eth_allmulticast_disable(port_id);
886                         plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
887                 } else {
888                         plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
889                         plog_info("\t\tport %u in multicast mode\n", port_id);
890                 }
891         }
892 }
893
894 void init_port_all(void)
895 {
896         enum rte_proc_type_t proc_type;
897         proc_type = rte_eal_process_type();
898         if (proc_type == RTE_PROC_SECONDARY) {
899                 plog_info("\tSkipping port initialization as secondary process\n");
900                 return;
901         }
902         uint8_t max_port_idx = prox_last_port_active() + 1;
903
904         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
905                 if (!prox_port_cfg[portid].active) {
906                         continue;
907                 }
908                 init_port(&prox_port_cfg[portid]);
909         }
910 }
911
912 void close_ports_atexit(void)
913 {
914         uint8_t max_port_idx = prox_last_port_active() + 1;
915
916         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
917                 if (!prox_port_cfg[portid].active) {
918                         continue;
919                 }
920                 plog_info("Closing port %u\n", portid);
921                 rte_eth_dev_close(portid);
922         }
923
924         if (lcore_cfg == NULL)
925                 return;
926
927         struct lcore_cfg *lconf = NULL;
928         struct task_args *targ;
929         while (core_targ_next(&lconf, &targ, 0) == 0) {
930                 if (targ->pool) {
931                         rte_mempool_free(targ->pool);
932                         plog_info("freeing pool %p\n", targ->pool);
933                         targ->pool = NULL;
934                 }
935         }
936 }
937
938 void init_port_addr(void)
939 {
940         struct prox_port_cfg *port_cfg;
941         enum rte_proc_type_t proc_type;
942         int rc;
943
944         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
945                 if (!prox_port_cfg[port_id].active) {
946                         continue;
947                 }
948                 port_cfg = &prox_port_cfg[port_id];
949
950                 switch (port_cfg->type) {
951                 case PROX_PORT_MAC_HW:
952                         rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
953                         break;
954                 case PROX_PORT_MAC_RAND:
955                         prox_rte_eth_random_addr(port_cfg->eth_addr.addr_bytes);
956                         break;
957                 case PROX_PORT_MAC_SET:
958                         proc_type = rte_eal_process_type();
959                         if (proc_type == RTE_PROC_SECONDARY) {
960                                 plog_warn("\tport %u: unable to change port mac address as secondary process\n", port_id);
961                         } else if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
962                                 plog_warn("\tport %u: failed to set mac address. Error = %d\n", port_id, rc);
963                         else
964                                 plog_info("Setting MAC to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
965                         break;
966                 }
967         }
968 }
969
970 int port_is_active(uint8_t port_id)
971 {
972         if (port_id > PROX_MAX_PORTS) {
973                 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
974                 return 0;
975         }
976
977         struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
978         if (!port_cfg->active) {
979                 plog_info("Port %u is not active\n", port_id);
980                 return 0;
981         }
982         return 1;
983 }