Fix socket for vdev port
[samplevnf.git] / VNFs / DPPD-PROX / prox_port_cfg.c
1 /*
2 // Copyright (c) 2010-2020 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <string.h>
18 #include <stdio.h>
19 #include <rte_version.h>
20 #include <rte_eth_ring.h>
21 #include <rte_mbuf.h>
22 #if (RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0))
23 #include <rte_bus_vdev.h>
24 #else
25 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,2))
26 #include <rte_dev.h>
27 #else
28 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
29 #include <rte_eth_null.h>
30 #endif
31 #endif
32 #endif
33
34 #include <sys/ioctl.h>
35 #include <net/if.h>
36
37 #include "prox_port_cfg.h"
38 #include "prox_globals.h"
39 #include "log.h"
40 #include "quit.h"
41 #include "defaults.h"
42 #include "toeplitz.h"
43 #include "defines.h"
44 #include "prox_cksum.h"
45 #include "stats_irq.h"
46 #include "prox_compat.h"
47 #include "rte_ethdev.h"
48 #include "lconf.h"
49
50 struct prox_port_cfg prox_port_cfg[PROX_MAX_PORTS];
51
52 rte_atomic32_t lsc;
53
54 int prox_nb_active_ports(void)
55 {
56         int ret = 0;
57         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
58                 ret += prox_port_cfg[i].active;
59         }
60         return ret;
61 }
62
63 int prox_last_port_active(void)
64 {
65         int ret = -1;
66         for (uint32_t i = 0; i < PROX_MAX_PORTS; ++i) {
67                 if (prox_port_cfg[i].active) {
68                         ret = i;
69                 }
70         }
71         return ret;
72 }
73
74 #if RTE_VERSION >= RTE_VERSION_NUM(17,11,0,0)
75 static int lsc_cb(__attribute__((unused)) uint16_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
76         __attribute__((unused)) void *ret_param)
77 #else
78 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
79 static int lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param,
80         __attribute__((unused)) void *ret_param)
81 #else
82 static void lsc_cb(__attribute__((unused)) uint8_t port_id, enum rte_eth_event_type type, __attribute__((unused)) void *param)
83 #endif
84 #endif
85 {
86         if (RTE_ETH_EVENT_INTR_LSC != type) {
87 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
88                 return -1;
89 #else
90                 return;
91 #endif
92         }
93
94         rte_atomic32_inc(&lsc);
95
96 #if RTE_VERSION >= RTE_VERSION_NUM(17,8,0,1)
97         return 0;
98 #endif
99 }
100
101 struct prox_pktmbuf_reinit_args {
102         struct rte_mempool *mp;
103         struct lcore_cfg   *lconf;
104 };
105
106 /* standard mbuf initialization procedure */
107 void prox_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, void *_m, unsigned i)
108 {
109         struct rte_mbuf *mbuf = _m;
110
111 #if RTE_VERSION >= RTE_VERSION_NUM(1,8,0,0)
112         mbuf->tx_offload = CALC_TX_OL(sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr));
113 #else
114         mbuf->pkt.vlan_macip.f.l2_len = sizeof(prox_rte_ether_hdr);
115         mbuf->pkt.vlan_macip.f.l3_len = sizeof(prox_rte_ipv4_hdr);
116 #endif
117
118         rte_pktmbuf_init(mp, opaque_arg, mbuf, i);
119 }
120
121 void prox_pktmbuf_reinit(void *arg, void *start, __attribute__((unused)) void *end, uint32_t idx)
122 {
123         struct prox_pktmbuf_reinit_args *init_args = arg;
124         struct rte_mbuf *m;
125         char* obj = start;
126
127         obj += init_args->mp->header_size;
128         m = (struct rte_mbuf*)obj;
129
130         prox_pktmbuf_init(init_args->mp, init_args->lconf, obj, idx);
131 }
132
133 #define CONFIGURE_TX_OFFLOAD(flag)                                           \
134         if (port_cfg->requested_tx_offload & flag)                              {\
135                 if (port_cfg->disabled_tx_offload & flag)                       {\
136                         plog_info("\t\t%s disabled by configuration\n", #flag);\
137                         port_cfg->requested_tx_offload &= ~flag;\
138                 } else if (port_cfg->dev_info.tx_offload_capa & flag) {\
139                         port_cfg->port_conf.txmode.offloads |= flag;\
140                         plog_info("\t\t%s enabled on port\n", #flag);\
141                 } else if (port_cfg->dev_info.tx_queue_offload_capa & flag) {\
142                         port_cfg->tx_conf.offloads |= flag;\
143                         plog_info("\t\t%s enabled on queue\n", #flag);\
144                 } else {\
145                         port_cfg->requested_tx_offload &= ~flag;\
146                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
147                 }\
148         } else {\
149                 plog_info("\t\t%s disabled\n", #flag);\
150         }\
151
152 #define CONFIGURE_RX_OFFLOAD(flag)                                           \
153         if (port_cfg->requested_rx_offload & flag)                              {\
154                 if (port_cfg->dev_info.rx_offload_capa & flag) {\
155                         port_cfg->port_conf.rxmode.offloads |= flag;\
156                         plog_info("\t\t%s enabled on port\n", #flag);\
157                 } else if (port_cfg->dev_info.rx_queue_offload_capa & flag) {\
158                         port_cfg->rx_conf.offloads |= flag;\
159                         plog_info("\t\t%s enabled on queue\n", #flag);\
160                 } else {\
161                         port_cfg->requested_rx_offload &= ~flag;\
162                         plog_info("\t\t%s disabled as neither port or queue supports it\n", #flag);\
163                 }\
164         } else {\
165                 plog_info("\t\t%s disabled\n", #flag);\
166         }\
167
168 static inline uint32_t get_netmask(uint8_t prefix)
169 {
170         if (prefix == 0)
171                 return(~((uint32_t) -1));
172         else
173                 return rte_cpu_to_be_32(~((1 << (32 - prefix)) - 1));
174 }
175
176 static void set_ip_address(char *devname, uint32_t ip, uint8_t prefix)
177 {
178         struct ifreq ifreq;
179         struct sockaddr_in in_addr;
180         int fd, rc;
181         uint32_t netmask = get_netmask(prefix);
182         plog_info("Setting netmask to %x\n", netmask);
183         uint32_t ip_cpu = rte_be_to_cpu_32(ip);
184
185         fd = socket(AF_INET, SOCK_DGRAM, 0);
186
187         memset(&ifreq, 0, sizeof(struct ifreq));
188         memset(&in_addr, 0, sizeof(struct sockaddr_in));
189
190         in_addr.sin_family = AF_INET;
191         in_addr.sin_addr = *(struct in_addr *)&ip_cpu;
192
193         prox_strncpy(ifreq.ifr_name, devname, IFNAMSIZ);
194         ifreq.ifr_addr = *(struct sockaddr *)&in_addr;
195         rc = ioctl(fd, SIOCSIFADDR, &ifreq);
196         PROX_PANIC(rc < 0, "Failed to set IP address %x on device %s: error = %d (%s)\n", ip_cpu, devname, errno, strerror(errno));
197
198         in_addr.sin_addr = *(struct in_addr *)&netmask;
199         ifreq.ifr_netmask = *(struct sockaddr *)&in_addr;
200         rc = ioctl(fd, SIOCSIFNETMASK, &ifreq);
201         PROX_PANIC(rc < 0, "Failed to set netmask %x (prefix %d) on device %s: error = %d (%s)\n", netmask, prefix, devname, errno, strerror(errno));
202         close(fd);
203 }
204
205 /* initialize rte devices and check the number of available ports */
206 void init_rte_dev(int use_dummy_devices)
207 {
208         uint8_t nb_ports, port_id_max;
209         int port_id_last, rc = 0;
210         struct rte_eth_dev_info dev_info;
211         const struct rte_pci_device *pci_dev;
212
213         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
214                 if (prox_port_cfg[port_id].active && (prox_port_cfg[port_id].virtual == 0) && (port_id >= prox_rte_eth_dev_count_avail())) {
215                         PROX_PANIC(1, "port %u used but only %u available\n", port_id, prox_rte_eth_dev_count_avail());
216                 }
217         }
218         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
219                 if (!prox_port_cfg[port_id].active) {
220                         continue;
221                 }
222                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
223
224                 prox_port_cfg[port_id].n_vlans = 0;
225                 while ((prox_port_cfg[port_id].n_vlans < PROX_MAX_VLAN_TAGS) && (prox_port_cfg[port_id].vlan_tags[prox_port_cfg[port_id].n_vlans])) {
226                         prox_port_cfg[port_id].n_vlans++;
227                 }
228
229                 if (port_cfg->vdev[0]) {
230                         char name[MAX_NAME_BUFFER_SIZE], tap[MAX_NAME_SIZE];
231                         snprintf(tap, MAX_NAME_SIZE, "net_tap%d", port_id);
232 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
233                         snprintf(name, MAX_NAME_BUFFER_SIZE, "iface=%s", port_cfg->vdev);
234                         rc = rte_vdev_init(tap, name);
235 #else
236                         PROX_PANIC(1, "vdev not supported in DPDK < 17.05\n");
237 #endif
238                         PROX_PANIC(rc != 0, "Unable to create device %s %s\n", "net tap", port_cfg->vdev);
239                         int vdev_port_id = prox_rte_eth_dev_count_avail() - 1;
240                         PROX_PANIC(vdev_port_id >= PROX_MAX_PORTS, "Too many port defined %d >= %d\n", vdev_port_id, PROX_MAX_PORTS);
241                         plog_info("\tCreating device %s, port %d\n", port_cfg->vdev, vdev_port_id);
242                         prox_port_cfg[vdev_port_id].is_vdev = 1;
243                         prox_port_cfg[vdev_port_id].active = 1;
244                         prox_port_cfg[vdev_port_id].dpdk_mapping = port_id;
245                         prox_port_cfg[vdev_port_id].n_txq = 1;
246                         prox_port_cfg[vdev_port_id].n_vlans = prox_port_cfg[port_id].n_vlans;
247
248                         for (uint32_t tag_id = 0; tag_id < prox_port_cfg[port_id].n_vlans; tag_id++) {
249                                 prox_port_cfg[vdev_port_id].vlan_tags[tag_id] = prox_port_cfg[port_id].vlan_tags[tag_id];
250                                 char command[1024];
251                                 snprintf(prox_port_cfg[vdev_port_id].names[tag_id], MAX_NAME_BUFFER_SIZE, "%s_%d", port_cfg->vdev, prox_port_cfg[port_id].vlan_tags[tag_id]);
252                                 sprintf(command, "ip link add link %s name %s type vlan id %d", port_cfg->vdev, prox_port_cfg[vdev_port_id].names[tag_id], prox_port_cfg[port_id].vlan_tags[tag_id]);
253                                 system(command);
254                                 plog_info("\tRunning %s\n", command);
255                                 plog_info("\tUsing vlan tag %d - added device %s\n", prox_port_cfg[port_id].vlan_tags[tag_id], prox_port_cfg[vdev_port_id].names[tag_id]);
256                         }
257                         if (prox_port_cfg[port_id].n_vlans == 0) {
258                                 strncpy(prox_port_cfg[vdev_port_id].names[0], port_cfg->vdev, MAX_NAME_SIZE);
259                                 prox_port_cfg[vdev_port_id].n_vlans = 1;
260                                 prox_port_cfg[vdev_port_id].vlan_tags[0] = 0;
261                         }
262
263                         prox_port_cfg[port_id].dpdk_mapping = vdev_port_id;
264                         uint32_t i = 0;
265                         while ((i < PROX_MAX_VLAN_TAGS) && (prox_port_cfg[port_id].ip_addr[i].ip)) {
266                                 prox_port_cfg[vdev_port_id].ip_addr[i].ip = prox_port_cfg[port_id].ip_addr[i].ip;
267                                 prox_port_cfg[vdev_port_id].ip_addr[i].prefix = prox_port_cfg[port_id].ip_addr[i].prefix;
268                                 i++;
269                         }
270                         prox_port_cfg[vdev_port_id].type = prox_port_cfg[port_id].type;
271                         if (prox_port_cfg[vdev_port_id].type == PROX_PORT_MAC_HW) {
272                                 // If DPDK port MAC set to HW, then make sure the vdev has the same MAC as DPDK port
273                                 prox_port_cfg[vdev_port_id].type = PROX_PORT_MAC_SET;
274                                 rte_eth_macaddr_get(port_id, &prox_port_cfg[vdev_port_id].eth_addr);
275                                 plog_info("\tDPDK port %d MAC address pre-configured to MAC from port %d: "MAC_BYTES_FMT"\n",
276                                         vdev_port_id, port_id, MAC_BYTES(prox_port_cfg[vdev_port_id].eth_addr.addr_bytes));
277                         } else
278                                 memcpy(&prox_port_cfg[vdev_port_id].eth_addr, &prox_port_cfg[port_id].eth_addr, sizeof(prox_port_cfg[port_id].eth_addr));
279                 }
280                 if (prox_port_cfg[port_id].n_vlans == 0) {
281                         prox_port_cfg[port_id].n_vlans = 1;
282                         prox_port_cfg[port_id].vlan_tags[0] = 0;
283                 }
284         }
285         nb_ports = prox_rte_eth_dev_count_avail();
286         /* get available ports configuration */
287         PROX_PANIC(use_dummy_devices && nb_ports, "Can't use dummy devices while there are also real ports\n");
288
289         if (use_dummy_devices) {
290 #if (RTE_VERSION >= RTE_VERSION_NUM(2,1,0,0))
291                 nb_ports = prox_last_port_active() + 1;
292                 plog_info("Creating %u dummy devices\n", nb_ports);
293
294                 char port_name[32] = "0dummy_dev";
295                 for (uint32_t i = 0; i < nb_ports; ++i) {
296 #if (RTE_VERSION > RTE_VERSION_NUM(17,5,0,1))
297                         rte_vdev_init(port_name, "size=64,copy=0");
298 #else
299                         eth_dev_null_create(port_name, 0, PROX_RTE_ETHER_MIN_LEN, 0);
300 #endif
301                         port_name[0]++;
302                 }
303 #else
304         PROX_PANIC(use_dummy_devices, "Can't use dummy devices\n");
305 #endif
306         }
307         else if (prox_last_port_active() != -1) {
308                 PROX_PANIC(nb_ports == 0, "\tError: DPDK could not find any port\n");
309                 plog_info("\tDPDK has found %u ports\n", nb_ports);
310         }
311
312         if (nb_ports > PROX_MAX_PORTS) {
313                 plog_warn("\tWarning: I can deal with at most %u ports."
314                         " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
315
316                 nb_ports = PROX_MAX_PORTS;
317         }
318
319 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
320         port_id_max = -1;
321         uint16_t id;
322         RTE_ETH_FOREACH_DEV(id) {
323                 char name[256];
324                 rte_eth_dev_get_name_by_port(id, name);
325                 plog_info("\tFound DPDK port id %u %s\n", id, name);
326                 if (id >= PROX_MAX_PORTS) {
327                         plog_warn("\tWarning: I can deal with at most %u ports."
328                                  " Please update PROX_MAX_PORTS and recompile.\n", PROX_MAX_PORTS);
329                 } else {
330                         prox_port_cfg[id].available = 1;
331                         if (id > port_id_max)
332                                 port_id_max = id;
333                 }
334         }
335 #else
336         port_id_max = nb_ports - 1;
337 #endif
338
339         port_id_last = prox_last_port_active();
340         PROX_PANIC(port_id_last > port_id_max,
341                    "\tError: invalid port(s) specified, last port index active: %d (max index is %d)\n",
342                    port_id_last, port_id_max);
343
344         /* Assign ports to PROX interfaces & Read max RX/TX queues per port */
345 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
346         for (uint8_t port_id = 0; port_id <= port_id_last; ++port_id) {
347 #else
348         for (uint8_t port_id = 0; port_id <= nb_ports; ++port_id) {
349 #endif
350                 /* skip ports that are not enabled */
351                 if (!prox_port_cfg[port_id].active) {
352                         continue;
353 #if (RTE_VERSION >= RTE_VERSION_NUM(17,5,0,0))
354                 } else if (prox_port_cfg[port_id].available == 0) {
355                         PROX_PANIC(1, "port %u enabled but not available\n", port_id);
356 #endif
357                 }
358                 plog_info("\tGetting info for rte dev %u\n", port_id);
359                 rte_eth_dev_info_get(port_id, &dev_info);
360                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
361                 port_cfg->socket = -1;
362
363                 memcpy(&port_cfg->dev_info, &dev_info, sizeof(struct rte_eth_dev_info));
364                 port_cfg->max_txq = dev_info.max_tx_queues;
365                 port_cfg->max_rxq = dev_info.max_rx_queues;
366                 port_cfg->max_rx_pkt_len = dev_info.max_rx_pktlen;
367                 port_cfg->min_rx_bufsize = dev_info.min_rx_bufsize;
368                 port_cfg->min_tx_desc = dev_info.tx_desc_lim.nb_min;
369                 port_cfg->max_tx_desc = dev_info.tx_desc_lim.nb_max;
370                 port_cfg->min_rx_desc = dev_info.rx_desc_lim.nb_min;
371                 port_cfg->max_rx_desc = dev_info.rx_desc_lim.nb_max;
372
373                 prox_strncpy(port_cfg->driver_name, dev_info.driver_name, sizeof(port_cfg->driver_name));
374                 plog_info("\tPort %u : driver='%s' tx_queues=%d rx_queues=%d\n", port_id, !strcmp(port_cfg->driver_name, "")? "null" : port_cfg->driver_name, port_cfg->max_txq, port_cfg->max_rxq);
375                 plog_info("\tPort %u : %d<=nb_tx_desc<=%d %d<=nb_rx_desc<=%d\n", port_id, port_cfg->min_tx_desc, port_cfg->max_tx_desc, port_cfg->min_rx_desc, port_cfg->max_rx_desc);
376
377                 if (strncmp(port_cfg->driver_name, "rte_", 4) == 0) {
378                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
379                 } else if (strncmp(port_cfg->driver_name, "net_", 4) == 0) {
380                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name + 4, sizeof(port_cfg->short_name));
381                 } else {
382                         prox_strncpy(port_cfg->short_name, prox_port_cfg[port_id].driver_name, sizeof(port_cfg->short_name));
383                 }
384                 char *ptr;
385                 if ((ptr = strstr(port_cfg->short_name, "_pmd")) != NULL) {
386                         *ptr = '\x0';
387                 }
388                 // Set socket for vdev device identical to socket of corresponding port
389                 if (prox_port_cfg[port_id].is_vdev) {
390                         prox_port_cfg[port_id].socket = prox_port_cfg[prox_port_cfg[port_id].dpdk_mapping].socket;
391                         continue;
392                 }
393
394 #if RTE_VERSION < RTE_VERSION_NUM(18,5,0,0)
395                 pci_dev = dev_info.pci_dev;
396 #else
397                 if (!dev_info.device)
398                         continue;
399                 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
400 #endif
401                 if (!pci_dev)
402                         continue;
403
404                 snprintf(port_cfg->pci_addr, sizeof(port_cfg->pci_addr),
405                          "%04x:%02x:%02x.%1x", pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
406                 /* Try to find the device's numa node */
407                 char buf[1024];
408                 snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/numa_node", port_cfg->pci_addr);
409                 FILE* numa_node_fd = fopen(buf, "r");
410                 if (numa_node_fd) {
411                         if (fgets(buf, sizeof(buf), numa_node_fd) == NULL) {
412                                 plog_warn("Failed to read numa_node for device %s\n", port_cfg->pci_addr);
413                         }
414                         port_cfg->socket = strtol(buf, 0, 0);
415                         if (port_cfg->socket == -1) {
416                                 plog_warn("System did not report numa_node for device %s\n", port_cfg->pci_addr);
417                         }
418                         fclose(numa_node_fd);
419                 }
420
421                 // In DPDK 18.08 vmxnet3 reports it supports IPV4 checksum, but packets does not go through when IPv4 cksum is enabled
422                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)) {
423                         plog_info("\t\tDisabling IPV4 cksum on vmxnet3\n");
424                         port_cfg->disabled_tx_offload |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
425                 }
426                 if ((!strcmp(port_cfg->short_name, "vmxnet3")) && (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) {
427                         plog_info("\t\tDisabling UDP cksum on vmxnet3\n");
428                         port_cfg->disabled_tx_offload |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
429                 }
430                 // Some OVS versions reports that they support UDP offload and no IPv4 offload, but fails when UDP offload is enabled
431                 if ((!strcmp(port_cfg->short_name, "virtio")) &&
432                         ((port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) == 0) &&
433                         (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) {
434                         plog_info("\t\tDisabling UDP cksum on virtio\n");
435                         port_cfg->disabled_tx_offload |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
436                 }
437         }
438 }
439
440 /* Create rte ring-backed devices */
441 uint8_t init_rte_ring_dev(void)
442 {
443         uint8_t nb_ring_dev = 0;
444
445         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
446                 /* skip ports that are not enabled */
447                 if (!prox_port_cfg[port_id].active) {
448                         continue;
449                 }
450                 struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
451                 if (port_cfg->rx_ring[0] != '\0') {
452                         plog_info("\tRing-backed port %u: rx='%s' tx='%s'\n", port_id, port_cfg->rx_ring, port_cfg->tx_ring);
453
454                         struct rte_ring* rx_ring = rte_ring_lookup(port_cfg->rx_ring);
455                         PROX_PANIC(rx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->rx_ring, port_id);
456                         struct rte_ring* tx_ring = rte_ring_lookup(port_cfg->tx_ring);
457                         PROX_PANIC(tx_ring == NULL, "Ring %s not found for port %d!\n", port_cfg->tx_ring, port_id);
458
459                         int ret = rte_eth_from_rings(port_cfg->names[0], &rx_ring, 1, &tx_ring, 1, rte_socket_id());
460                         PROX_PANIC(ret != 0, "Failed to create eth_dev from rings for port %d\n", port_id);
461
462                         port_cfg->port_conf.intr_conf.lsc = 0; /* Link state interrupt not supported for ring-backed ports */
463
464                         nb_ring_dev++;
465                 }
466         }
467
468         return nb_ring_dev;
469 }
470
471 static void print_port_capa(struct prox_port_cfg *port_cfg)
472 {
473         uint8_t port_id;
474
475         port_id = port_cfg - prox_port_cfg;
476         plog_info("\t*** Initializing port %u ***\n", port_id);
477         plog_info("\t\tPort name is set to %s\n", port_cfg->names[0]);
478         plog_info("\t\tPort max RX/TX queue is %u/%u\n", port_cfg->max_rxq, port_cfg->max_txq);
479         plog_info("\t\tPort driver is %s\n", port_cfg->driver_name);
480 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
481         plog_info("\t\tSupported speed mask = 0x%x\n", port_cfg->dev_info.speed_capa);
482 #endif
483         if (port_cfg->max_link_speed != UINT32_MAX) {
484                 plog_info("\t\tHighest link speed capa = %d Mbps\n", port_cfg->max_link_speed);
485         }
486
487 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
488         plog_info("\t\tRX offload capa = 0x%lx = ", port_cfg->dev_info.rx_offload_capa);
489         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_STRIP)
490                 plog_info("VLAN STRIP | ");
491         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM)
492                 plog_info("IPV4 CKSUM | ");
493         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_UDP_CKSUM)
494                 plog_info("UDP CKSUM | ");
495         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_CKSUM)
496                 plog_info("TCP CKSUM | ");
497         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TCP_LRO)
498                 plog_info("TCP LRO | ");
499         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_QINQ_STRIP)
500                 plog_info("QINQ STRIP | ");
501         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM)
502                 plog_info("OUTER_IPV4_CKSUM | ");
503         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_MACSEC_STRIP)
504                 plog_info("MACSEC STRIP | ");
505 #if defined(RTE_ETH_RX_OFFLOAD_HEADER_SPLIT)
506         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_HEADER_SPLIT)
507                 plog_info("HEADER SPLIT | ");
508 #endif
509         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_FILTER)
510                 plog_info("VLAN FILTER | ");
511         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND)
512                 plog_info("VLAN EXTEND | ");
513         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_JUMBO_FRAME)
514                 plog_info("JUMBO FRAME | ");
515 #if defined(RTE_ETH_RX_OFFLOAD_CRC_STRIP)
516         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_CRC_STRIP)
517                 plog_info("CRC STRIP | ");
518 #endif
519 #if defined(RTE_ETH_RX_OFFLOAD_KEEP_CRC)
520         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
521                 plog_info("KEEP CRC | ");
522 #endif
523         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SCATTER)
524                 plog_info("SCATTER | ");
525         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
526                 plog_info("TIMESTAMP | ");
527         if (port_cfg->dev_info.rx_offload_capa & RTE_ETH_RX_OFFLOAD_SECURITY)
528                 plog_info("SECURITY ");
529         plog_info("\n");
530
531         plog_info("\t\tTX offload capa = 0x%lx = ", port_cfg->dev_info.tx_offload_capa);
532         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VLAN_INSERT)
533                 plog_info("VLAN INSERT | ");
534         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)
535                 plog_info("IPV4 CKSUM | ");
536         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM)
537                 plog_info("UDP CKSUM | ");
538         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM)
539                 plog_info("TCP CKSUM | ");
540         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM)
541                 plog_info("SCTP CKSUM | ");
542         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO)
543                 plog_info("TCP TS0 | ");
544         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_TSO)
545                 plog_info("UDP TSO | ");
546         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM)
547                 plog_info("OUTER IPV4 CKSUM | ");
548         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_QINQ_INSERT)
549                 plog_info("QINQ INSERT | ");
550         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO)
551                 plog_info("VLAN TNL TSO | ");
552         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO)
553                 plog_info("GRE TNL TSO | ");
554         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPIP_TNL_TSO)
555                 plog_info("IPIP TNL TSO | ");
556         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO)
557                 plog_info("GENEVE TNL TSO | ");
558         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MACSEC_INSERT)
559                 plog_info("MACSEC INSERT | ");
560         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MT_LOCKFREE)
561                 plog_info("MT LOCKFREE | ");
562         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
563                 plog_info("MULTI SEG | ");
564         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SECURITY)
565                 plog_info("SECURITY | ");
566         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_TNL_TSO)
567                 plog_info("UDP TNL TSO | ");
568         if (port_cfg->dev_info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IP_TNL_TSO)
569                 plog_info("IP TNL TSO | ");
570         plog_info("\n");
571
572         plog_info("\t\trx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.rx_queue_offload_capa);
573         plog_info("\t\ttx_queue_offload_capa = 0x%lx\n", port_cfg->dev_info.tx_queue_offload_capa);
574         plog_info("\t\tflow_type_rss_offloads = 0x%lx\n", port_cfg->dev_info.flow_type_rss_offloads);
575         plog_info("\t\tdefault RX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_rxportconf.burst_size, port_cfg->dev_info.default_rxportconf.ring_size, port_cfg->dev_info.default_rxportconf.nb_queues);
576         plog_info("\t\tdefault TX port conf: burst_size = %d, ring_size = %d, nb_queues = %d\n", port_cfg->dev_info.default_txportconf.burst_size, port_cfg->dev_info.default_txportconf.ring_size, port_cfg->dev_info.default_txportconf.nb_queues);
577 #endif
578 }
579
580 static void get_max_link_speed(struct prox_port_cfg *port_cfg)
581 {
582         port_cfg->max_link_speed = UINT32_MAX;
583
584 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
585         // virtio and vmxnet3 reports fake max_link_speed
586         if (strcmp(port_cfg->short_name, "vmxnet3") && strcmp(port_cfg->short_name, "virtio")) {
587                 // Get link_speed from highest capability from the port
588                 // This will be used by gen and lat for extrapolation purposes
589                 // The negotiated link_speed (as reported by rte_eth_link_get
590                 // or rte_eth_link_get_nowait) might be reported too late
591                 // and might result in wrong exrapolation, and hence should not be used
592                 // for extrapolation purposes
593                 if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_100G)
594                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_100G;
595                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_56G)
596                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_56G;
597                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_50G)
598                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_50G;
599                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_40G)
600                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_40G;
601                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_25G)
602                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_25G;
603                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_20G)
604                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_20G;
605                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_10G)
606                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_10G;
607                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_5G)
608                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_5G;
609                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_2_5G)
610                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_2_5G;
611                 else if (port_cfg->dev_info.speed_capa & RTE_ETH_LINK_SPEED_1G)
612                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_1G;
613                 else if (port_cfg->dev_info.speed_capa & (RTE_ETH_LINK_SPEED_100M_HD | RTE_ETH_LINK_SPEED_100M))
614                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_100M;
615                 else if (port_cfg->dev_info.speed_capa & (RTE_ETH_LINK_SPEED_10M_HD | RTE_ETH_LINK_SPEED_10M))
616                         port_cfg->max_link_speed = RTE_ETH_SPEED_NUM_10M;
617
618         }
619 #endif
620 }
621
622 static void init_port(struct prox_port_cfg *port_cfg)
623 {
624         static char dummy_pool_name[] = "0_dummy";
625         struct rte_eth_link link;
626         uint8_t port_id;
627         int ret;
628
629         get_max_link_speed(port_cfg);
630         print_port_capa(port_cfg);
631         port_id = port_cfg - prox_port_cfg;
632         PROX_PANIC(port_cfg->n_rxq == 0 && port_cfg->n_txq == 0,
633                    "\t\t port %u is enabled but no RX or TX queues have been configured", port_id);
634
635         if (port_cfg->n_rxq == 0) {
636                 /* not receiving on this port */
637                 plog_info("\t\tPort %u had no RX queues, setting to 1\n", port_id);
638                 port_cfg->n_rxq = 1;
639                 uint32_t mbuf_size = TX_MBUF_SIZE;
640                 if (mbuf_size < port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf))
641                         mbuf_size = port_cfg->min_rx_bufsize + RTE_PKTMBUF_HEADROOM + sizeof(struct rte_mbuf);
642
643                 plog_info("\t\tAllocating dummy memory pool on socket %u with %u elements of size %u\n",
644                           port_cfg->socket, port_cfg->n_rxd, mbuf_size);
645                 port_cfg->pool[0] = rte_mempool_create(dummy_pool_name, port_cfg->n_rxd, mbuf_size,
646                                                        0,
647                                                        sizeof(struct rte_pktmbuf_pool_private),
648                                                        rte_pktmbuf_pool_init, NULL,
649                                                        prox_pktmbuf_init, 0,
650                                                        port_cfg->socket, 0);
651                 PROX_PANIC(port_cfg->pool[0] == NULL, "Failed to allocate dummy memory pool on socket %u with %u elements\n",
652                            port_cfg->socket, port_cfg->n_rxd);
653                 dummy_pool_name[0]++;
654         } else {
655                 // Most pmd should now support setting mtu
656                 if (port_cfg->mtu + PROX_RTE_ETHER_HDR_LEN + PROX_RTE_ETHER_CRC_LEN > port_cfg->max_rx_pkt_len) {
657                         plog_info("\t\tMTU is too big for the port, reducing MTU from %d to %d\n", port_cfg->mtu, port_cfg->max_rx_pkt_len);
658                         port_cfg->mtu = port_cfg->max_rx_pkt_len;
659                 }
660                 plog_info("\t\tSetting MTU size to %u for port %u ...\n", port_cfg->mtu, port_id);
661                 ret = rte_eth_dev_set_mtu(port_id, port_cfg->mtu);
662                 if (ret)
663                         plog_err("\t\t\trte_eth_dev_set_mtu() failed on port %u: error %d\n", port_id, ret);
664
665                 if (port_cfg->n_txq == 0) {
666                         /* not sending on this port */
667                         plog_info("\t\tPort %u had no TX queues, setting to 1\n", port_id);
668                         port_cfg->n_txq = 1;
669                 }
670         }
671
672         if (port_cfg->n_rxq > 1)  {
673                 // Enable RSS if multiple receive queues
674                 if (strcmp(port_cfg->short_name, "virtio")) {
675                         port_cfg->port_conf.rxmode.mq_mode                      |= RTE_ETH_MQ_RX_RSS;
676                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key        = toeplitz_init_key;
677                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_key_len    = TOEPLITZ_KEY_LEN;
678 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
679                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = RTE_ETH_RSS_IP|RTE_ETH_RSS_UDP;
680 #else
681                         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf         = RTE_ETH_RSS_IPV4|ETH_RSS_NONF_IPV4_UDP;
682 #endif
683                 }
684         }
685
686         // Make sure that the requested RSS offload is supported by the PMD
687 #if RTE_VERSION >= RTE_VERSION_NUM(2,0,0,0)
688         port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf &= port_cfg->dev_info.flow_type_rss_offloads;
689 #endif
690         if (strcmp(port_cfg->short_name, "virtio")) {
691                 plog_info("\t\t Enabling RSS rss_hf = 0x%lx (requested 0x%llx, supported 0x%lx)\n", port_cfg->port_conf.rx_adv_conf.rss_conf.rss_hf, RTE_ETH_RSS_IP|RTE_ETH_RSS_UDP, port_cfg->dev_info.flow_type_rss_offloads);
692         } else {
693                 plog_info("\t\t Not enabling RSS on virtio port");
694         }
695
696         // rxmode such as hw src strip
697 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
698 #if defined (RTE_ETH_RX_OFFLOAD_CRC_STRIP)
699         CONFIGURE_RX_OFFLOAD(RTE_ETH_RX_OFFLOAD_CRC_STRIP);
700 #endif
701 #if defined (RTE_ETH_RX_OFFLOAD_KEEP_CRC)
702         CONFIGURE_RX_OFFLOAD(RTE_ETH_RX_OFFLOAD_KEEP_CRC);
703 #endif
704         CONFIGURE_RX_OFFLOAD(RTE_ETH_RX_OFFLOAD_JUMBO_FRAME);
705         CONFIGURE_RX_OFFLOAD(RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
706 #else
707         if (port_cfg->requested_rx_offload & RTE_ETH_RX_OFFLOAD_CRC_STRIP) {
708                 port_cfg->port_conf.rxmode.hw_strip_crc = 1;
709         }
710         if (port_cfg->requested_rx_offload & RTE_ETH_RX_OFFLOAD_JUMBO_FRAME) {
711                 port_cfg->port_conf.rxmode.jumbo_frame = 1;
712         }
713 #endif
714
715         // IPV4, UDP, SCTP Checksums
716 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
717         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_IPV4_CKSUM);
718         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_UDP_CKSUM);
719         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_VLAN_INSERT);
720 #else
721         if ((port_cfg->dev_info.tx_offload_capa & (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) == 0) {
722                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOOFFLOADS;
723                 plog_info("\t\tDisabling TX offloads as pmd reports that it does not support them)\n");
724         }
725         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
726                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
727                 plog_info("\t\tDisabling SCTP offload on port %d as vmxnet3 does not support them\n", port_id);
728         }
729 #endif
730         // Multi Segments
731 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
732         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_MULTI_SEGS);
733 #else
734         if (!strcmp(port_cfg->short_name, "vmxnet3")) {
735                 port_cfg->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
736                 plog_info("\t\tDisabling TX multsegs on port %d as vmxnet3 does not support them\n", port_id);
737         } else if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)
738                 plog_info("\t\tDisabling TX multsegs on port %d\n", port_id);
739         else
740                 plog_info("\t\tEnabling TX multsegs on port %d\n", port_id);
741
742         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOOFFLOADS)
743                 plog_info("\t\tEnabling No TX offloads on port %d\n", port_id);
744         else
745                 plog_info("\t\tTX offloads enabled on port %d\n", port_id);
746 #endif
747
748         // Refcount
749 #if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,1)
750         CONFIGURE_TX_OFFLOAD(RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE);
751 #else
752         if (port_cfg->tx_conf.txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT)
753                 plog_info("\t\tEnabling No refcnt on port %d\n", port_id);
754         else
755                 plog_info("\t\tRefcnt enabled on port %d\n", port_id);
756 #endif
757
758         plog_info("\t\tConfiguring port %u... with %u RX queues and %u TX queues\n",
759                   port_id, port_cfg->n_rxq, port_cfg->n_txq);
760
761         PROX_PANIC(port_cfg->n_rxq > port_cfg->max_rxq, "\t\t\tToo many RX queues (configuring %u, max is %u)\n", port_cfg->n_rxq, port_cfg->max_rxq);
762         PROX_PANIC(port_cfg->n_txq > port_cfg->max_txq, "\t\t\tToo many TX queues (configuring %u, max is %u)\n", port_cfg->n_txq, port_cfg->max_txq);
763
764         if (!strcmp(port_cfg->short_name, "ixgbe_vf") ||
765             !strcmp(port_cfg->short_name, "virtio") ||
766 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
767             !strcmp(port_cfg->short_name, "i40e") ||
768 #endif
769             !strcmp(port_cfg->short_name, "i40e_vf") ||
770             !strcmp(port_cfg->short_name, "avp") || /* Wind River */
771             !strcmp(port_cfg->driver_name, "") || /* NULL device */
772             !strcmp(port_cfg->short_name, "vmxnet3")) {
773                 port_cfg->port_conf.intr_conf.lsc = 0;
774                 plog_info("\t\tDisabling link state interrupt for vmxnet3/VF/virtio (unsupported)\n");
775         }
776
777         if (port_cfg->lsc_set_explicitely) {
778                 port_cfg->port_conf.intr_conf.lsc = port_cfg->lsc_val;
779                 plog_info("\t\tOverriding link state interrupt configuration to '%s'\n", port_cfg->lsc_val? "enabled" : "disabled");
780         }
781         if (port_cfg->n_txd < port_cfg->min_tx_desc) {
782                 plog_info("\t\tNumber of TX descriptors is set to %d (minimum required for %s\n", port_cfg->min_tx_desc, port_cfg->short_name);
783                 port_cfg->n_txd = port_cfg->min_tx_desc;
784         }
785
786         if (port_cfg->n_rxd < port_cfg->min_rx_desc) {
787                 plog_info("\t\tNumber of RX descriptors is set to %d (minimum required for %s\n", port_cfg->min_rx_desc, port_cfg->short_name);
788                 port_cfg->n_rxd = port_cfg->min_rx_desc;
789         }
790
791         if (port_cfg->n_txd > port_cfg->max_tx_desc) {
792                 plog_info("\t\tNumber of TX descriptors is set to %d (maximum required for %s\n", port_cfg->max_tx_desc, port_cfg->short_name);
793                 port_cfg->n_txd = port_cfg->max_tx_desc;
794         }
795
796         if (port_cfg->n_rxd > port_cfg->max_rx_desc) {
797                 plog_info("\t\tNumber of RX descriptors is set to %d (maximum required for %s\n", port_cfg->max_rx_desc, port_cfg->short_name);
798                 port_cfg->n_rxd = port_cfg->max_rx_desc;
799         }
800
801         ret = rte_eth_dev_configure(port_id, port_cfg->n_rxq,
802                                     port_cfg->n_txq, &port_cfg->port_conf);
803         PROX_PANIC(ret < 0, "\t\t\trte_eth_dev_configure() failed on port %u: %s (%d)\n", port_id, strerror(-ret), ret);
804
805         if (port_cfg->port_conf.intr_conf.lsc) {
806                 rte_eth_dev_callback_register(port_id, RTE_ETH_EVENT_INTR_LSC, lsc_cb, NULL);
807         }
808
809         plog_info("\t\tMAC address set to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
810
811         /* initialize TX queues first */
812         for (uint16_t queue_id = 0; queue_id < port_cfg->n_txq; ++queue_id) {
813                 plog_info("\t\tSetting up TX queue %u on socket %u with %u desc\n",
814                           queue_id, port_cfg->socket, port_cfg->n_txd);
815                 ret = rte_eth_tx_queue_setup(port_id, queue_id, port_cfg->n_txd,
816                                              port_cfg->socket, &port_cfg->tx_conf);
817                 PROX_PANIC(ret < 0, "\t\t\trte_eth_tx_queue_setup() failed on port %u: error %d\n", port_id, ret);
818         }
819
820         /* initialize RX queues */
821         for (uint16_t queue_id = 0; queue_id < port_cfg->n_rxq; ++queue_id) {
822                 plog_info("\t\tSetting up RX queue %u on port %u on socket %u with %u desc (pool 0x%p)\n",
823                           queue_id, port_id, port_cfg->socket,
824                           port_cfg->n_rxd, port_cfg->pool[queue_id]);
825                 ret = rte_eth_rx_queue_setup(port_id, queue_id,
826                                              port_cfg->n_rxd,
827                                              port_cfg->socket, &port_cfg->rx_conf,
828                                              port_cfg->pool[queue_id]);
829                 PROX_PANIC(ret < 0, "\t\t\trte_eth_rx_queue_setup() failed on port %u: error %s (%d)\n", port_id, strerror(-ret), ret);
830         }
831
832         plog_info("\t\tStarting up port %u ...", port_id);
833         ret = rte_eth_dev_start(port_id);
834
835         PROX_PANIC(ret < 0, "\n\t\t\trte_eth_dev_start() failed on port %u: error %d\n", port_id, ret);
836         plog_info(" done: ");
837
838         if (prox_port_cfg[port_id].is_vdev) {
839                 for (int vlan_id = 0; vlan_id < prox_port_cfg[port_id].n_vlans; vlan_id++) {
840                         set_ip_address(prox_port_cfg[port_id].names[vlan_id], prox_port_cfg[port_id].ip_addr[vlan_id].ip, prox_port_cfg[port_id].ip_addr[vlan_id].prefix);
841                 }
842         }
843         /* Getting link status can be done without waiting if Link
844            State Interrupt is enabled since in that case, if the link
845            is recognized as being down, an interrupt will notify that
846            it has gone up. */
847         if (port_cfg->port_conf.intr_conf.lsc)
848                 rte_eth_link_get_nowait(port_id, &link);
849         else
850                 rte_eth_link_get(port_id, &link);
851
852         port_cfg->link_up = link.link_status;
853         port_cfg->link_speed = link.link_speed;
854
855         if (link.link_status) {
856                 plog_info("Link Up - speed %'u Mbps - %s\n",
857                           link.link_speed,
858                           (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
859                           "full-duplex" : "half-duplex");
860         }
861         else {
862                 plog_info("Link Down\n");
863         }
864
865         if (port_cfg->promiscuous) {
866                 rte_eth_promiscuous_enable(port_id);
867                 plog_info("\t\tport %u in promiscuous mode\n", port_id);
868         }
869
870         if (strcmp(port_cfg->short_name, "ixgbe_vf") &&
871             strcmp(port_cfg->short_name, "i40e") &&
872             strcmp(port_cfg->short_name, "i40e_vf") &&
873             strcmp(port_cfg->short_name, "vmxnet3")) {
874                 for (uint8_t i = 0; i < port_cfg->n_rxq; ++i) {
875                         ret = rte_eth_dev_set_rx_queue_stats_mapping(port_id, i, i);
876                         if (ret) {
877                                 plog_info("\t\trte_eth_dev_set_rx_queue_stats_mapping() failed: error %d\n", ret);
878                         }
879                 }
880                 for (uint8_t i = 0; i < port_cfg->n_txq; ++i) {
881                         ret = rte_eth_dev_set_tx_queue_stats_mapping(port_id, i, i);
882                         if (ret) {
883                                 plog_info("\t\trte_eth_dev_set_tx_queue_stats_mapping() failed: error %d\n", ret);
884                         }
885                 }
886         }
887         if (port_cfg->nb_mc_addr) {
888                 rte_eth_allmulticast_enable(port_id);
889                 if ((ret = rte_eth_dev_set_mc_addr_list(port_id, port_cfg->mc_addr, port_cfg->nb_mc_addr)) != 0) {
890                         plog_err("\t\trte_eth_dev_set_mc_addr_list returns %d on port %u\n", ret, port_id);
891                         port_cfg->nb_mc_addr = 0;
892                         rte_eth_allmulticast_disable(port_id);
893                         plog_info("\t\tport %u NOT in multicast mode as failed to add mcast address\n", port_id);
894                 } else {
895                         plog_info("\t\trte_eth_dev_set_mc_addr_list(%d addr) on port %u\n", port_cfg->nb_mc_addr, port_id);
896                         plog_info("\t\tport %u in multicast mode\n", port_id);
897                 }
898         }
899 }
900
901 void init_port_all(void)
902 {
903         enum rte_proc_type_t proc_type;
904         proc_type = rte_eal_process_type();
905         if (proc_type == RTE_PROC_SECONDARY) {
906                 plog_info("\tSkipping port initialization as secondary process\n");
907                 return;
908         }
909         uint8_t max_port_idx = prox_last_port_active() + 1;
910
911         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
912                 if (!prox_port_cfg[portid].active) {
913                         continue;
914                 }
915                 init_port(&prox_port_cfg[portid]);
916         }
917 }
918
919 void close_ports_atexit(void)
920 {
921         uint8_t max_port_idx = prox_last_port_active() + 1;
922
923         for (uint8_t portid = 0; portid < max_port_idx; ++portid) {
924                 if (!prox_port_cfg[portid].active) {
925                         continue;
926                 }
927                 plog_info("Closing port %u\n", portid);
928                 rte_eth_dev_close(portid);
929         }
930
931         if (lcore_cfg == NULL)
932                 return;
933
934         struct lcore_cfg *lconf = NULL;
935         struct task_args *targ;
936         while (core_targ_next(&lconf, &targ, 0) == 0) {
937                 if (targ->pool) {
938                         rte_mempool_free(targ->pool);
939                         plog_info("freeing pool %p\n", targ->pool);
940                         targ->pool = NULL;
941                 }
942         }
943 }
944
945 void init_port_addr(void)
946 {
947         struct prox_port_cfg *port_cfg;
948         enum rte_proc_type_t proc_type;
949         int rc;
950
951         for (uint8_t port_id = 0; port_id < PROX_MAX_PORTS; ++port_id) {
952                 if (!prox_port_cfg[port_id].active) {
953                         continue;
954                 }
955                 port_cfg = &prox_port_cfg[port_id];
956
957                 switch (port_cfg->type) {
958                 case PROX_PORT_MAC_HW:
959                         rte_eth_macaddr_get(port_id, &port_cfg->eth_addr);
960                         break;
961                 case PROX_PORT_MAC_RAND:
962                         prox_rte_eth_random_addr(port_cfg->eth_addr.addr_bytes);
963                         break;
964                 case PROX_PORT_MAC_SET:
965                         proc_type = rte_eal_process_type();
966                         if (proc_type == RTE_PROC_SECONDARY) {
967                                 plog_warn("\tport %u: unable to change port mac address as secondary process\n", port_id);
968                         } else if ((rc = rte_eth_dev_default_mac_addr_set(port_id, &port_cfg->eth_addr)) != 0)
969                                 plog_warn("\tport %u: failed to set mac address. Error = %d\n", port_id, rc);
970                         else
971                                 plog_info("Setting MAC to "MAC_BYTES_FMT"\n", MAC_BYTES(port_cfg->eth_addr.addr_bytes));
972                         break;
973                 }
974         }
975 }
976
977 int port_is_active(uint8_t port_id)
978 {
979         if (port_id > PROX_MAX_PORTS) {
980                 plog_info("requested port is higher than highest supported port ID (%u)\n", PROX_MAX_PORTS);
981                 return 0;
982         }
983
984         struct prox_port_cfg* port_cfg = &prox_port_cfg[port_id];
985         if (!port_cfg->active) {
986                 plog_info("Port %u is not active\n", port_id);
987                 return 0;
988         }
989         return 1;
990 }