+ if (prox_cfg.flags & DSF_L3_ENABLED) {
+ hash_params.key_len = sizeof(uint32_t);
+ task->external_ip_hash = rte_hash_create(&hash_params);
+ PROX_PANIC(task->external_ip_hash == NULL, "Failed to set up external ip hash\n");
+ plog_info("\texternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
+ hash_name[0]++;
+
+ task->external_ip_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id);
+ PROX_PANIC(task->external_ip_table == NULL, "Failed to allocate memory for %u entries in external ip table\n", n_entries);
+ plog_info("\texternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table));
+
+ hash_params.key_len = sizeof(struct ip_port);
+ task->internal_ip_hash = rte_hash_create(&hash_params);
+ PROX_PANIC(task->internal_ip_hash == NULL, "Failed to set up internal ip hash\n");
+ plog_info("\tinternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
+ hash_name[0]++;
+
+ task->internal_ip_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id);
+ PROX_PANIC(task->internal_ip_table == NULL, "Failed to allocate memory for %u entries in internal ip table\n", n_entries);
+ plog_info("\tinternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table));
+ }
+
+ if (prox_cfg.flags & DSF_NDP_ENABLED) {
+ hash_params.key_len = sizeof(struct ipv6_addr);
+ task->external_ip6_hash = rte_hash_create(&hash_params);
+ PROX_PANIC(task->external_ip6_hash == NULL, "Failed to set up external ip6 hash\n");
+ plog_info("\texternal ip6 hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
+ hash_name[0]++;
+
+ task->external_ip6_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id);
+ PROX_PANIC(task->external_ip6_table == NULL, "Failed to allocate memory for %u entries in external ip6 table\n", n_entries);
+ plog_info("\texternal ip6_table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table));
+
+ hash_params.key_len = sizeof(struct ip6_port);
+ task->internal_ip6_hash = rte_hash_create(&hash_params);
+ PROX_PANIC(task->internal_ip6_hash == NULL, "Failed to set up internal ip6 hash\n");
+ plog_info("\tinternal ip6 hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
+ hash_name[0]++;
+
+ task->internal_ip6_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id);
+ PROX_PANIC(task->internal_ip6_table == NULL, "Failed to allocate memory for %u entries in internal ip6 table\n", n_entries);
+ plog_info("\tinternal ip6 table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table));
+ }
+
+ int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno);
+ fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
+
+ struct sockaddr_nl sockaddr;
+ memset(&sockaddr, 0, sizeof(struct sockaddr_nl));
+ sockaddr.nl_family = AF_NETLINK;
+ sockaddr.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
+ int rc = bind(fd, (struct sockaddr *)&sockaddr, sizeof(struct sockaddr_nl));
+ PROX_PANIC(rc < 0, "Failed to bind to RTMGRP_NEIGH netlink group\n");
+ task->arp_fds.fd = fd;
+ task->arp_fds.events = POLL_IN;
+ plog_info("\tRTMGRP_NEIGH netlink group bound; fd = %d\n", fd);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno);
+ fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
+ struct sockaddr_nl sockaddr2;
+ memset(&sockaddr2, 0, sizeof(struct sockaddr_nl));
+ sockaddr2.nl_family = AF_NETLINK;
+ sockaddr2.nl_groups = RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
+ rc = bind(fd, (struct sockaddr *)&sockaddr2, sizeof(struct sockaddr_nl));
+ PROX_PANIC(rc < 0, "Failed to bind to RTMGRP_NEIGH netlink group\n");
+ task->route_fds.fd = fd;
+ task->route_fds.events = POLL_IN;
+ plog_info("\tRTMGRP_IPV4_ROUTE netlink group bound; fd = %d\n", fd);
+
+ static char name[] = "master_arp_nd_pool";
+ const int NB_ARP_MBUF = 1024;
+ const int ARP_MBUF_SIZE = 2048;
+ const int NB_CACHE_ARP_MBUF = 256;
+ struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF,
+ sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
+ rte_socket_id(), 0);
+ PROX_PANIC(ret == NULL, "Failed to allocate ARP memory pool on socket %u with %u elements\n",
+ rte_socket_id(), NB_ARP_MBUF);
+ plog_info("\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_ARP_MBUF,
+ ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, rte_socket_id());
+ tbase->l3.arp_nd_pool = ret;
+}
+
+static void handle_route_event(struct task_base *tbase)
+{
+ struct task_master *task = (struct task_master *)tbase;
+ struct rte_mbuf *mbufs[MAX_RING_BURST];
+ int fd = task->route_fds.fd, interface_index, mask = -1;
+ char interface_name[IF_NAMESIZE] = {0};
+ int len = recv(fd, netlink_buf, sizeof(netlink_buf), 0);
+ uint32_t ip = 0, gw_ip = 0;
+ if (len < 0) {
+ plog_err("Failed to recv from netlink: %d\n", errno);
+ return;
+ }
+ struct nlmsghdr * nl_hdr = (struct nlmsghdr *)netlink_buf;
+ if (nl_hdr->nlmsg_flags & NLM_F_MULTI) {
+ plog_err("Unexpected multipart netlink message\n");
+ return;
+ }
+ if ((nl_hdr->nlmsg_type != RTM_NEWROUTE) && (nl_hdr->nlmsg_type != RTM_DELROUTE))
+ return;
+
+ struct rtmsg *rtmsg = (struct rtmsg *)NLMSG_DATA(nl_hdr);
+ int rtm_family = rtmsg->rtm_family;
+ if (rtm_family != AF_INET) {
+ plog_warn("Unhandled non IPV4 routing message\n");
+ return;
+ }
+ if ((rtmsg->rtm_table != RT_TABLE_MAIN) && (rtmsg->rtm_table != RT_TABLE_LOCAL))
+ return;
+ int dst_len = rtmsg->rtm_dst_len;
+
+ struct rtattr *rta = (struct rtattr *)RTM_RTA(rtmsg);
+ int rtl = RTM_PAYLOAD(nl_hdr);
+ for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) {
+ switch (rta->rta_type) {
+ case RTA_DST:
+ ip = *((uint32_t *)RTA_DATA(rta));
+ break;
+ case RTA_OIF:
+ interface_index = *((int *)RTA_DATA(rta));
+ if (if_indextoname(interface_index, interface_name) == NULL) {
+ plog_info("Unknown Interface Index %d\n", interface_index);
+ }
+ break;
+ case RTA_METRICS:
+ mask = *((int *)RTA_DATA(rta));
+ break;
+ case RTA_GATEWAY:
+ gw_ip = *((uint32_t *)RTA_DATA(rta));
+ break;
+ default:
+ break;
+ }
+ }
+ int dpdk_vdev_port = -1;
+ for (int i = 0; i< prox_rte_eth_dev_count_avail(); i++) {
+ for (int vlan_id = 0; vlan_id < prox_port_cfg[i].n_vlans; vlan_id++) {
+ if (strcmp(prox_port_cfg[i].names[vlan_id], interface_name) == 0) {
+ dpdk_vdev_port = i;
+ break;
+ }
+ }
+ if (dpdk_vdev_port != -1)
+ break;
+ }
+ if (dpdk_vdev_port != -1) {
+ plogx_info("Received netlink message on tap interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name, IP4(ip), dst_len, IP4(gw_ip));
+ int ret1 = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)mbufs);
+ if (unlikely(ret1 != 0)) {
+ plog_err("Unable to allocate a mbuf for master to core communication\n");
+ return;
+ }
+ int dpdk_port = prox_port_cfg[dpdk_vdev_port].dpdk_mapping;
+ tx_ring_route(tbase, task->internal_port_table[dpdk_port].ring, (nl_hdr->nlmsg_type == RTM_NEWROUTE), mbufs[0], ip, gw_ip, dst_len);
+ } else
+ plog_info("Received netlink message on unknown interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name[0] ? interface_name:"", IP4(ip), dst_len, IP4(gw_ip));
+ return;
+}
+
+static void handle_arp_event(struct task_base *tbase)
+{
+ struct task_master *task = (struct task_master *)tbase;
+ struct rte_mbuf *mbufs[MAX_RING_BURST];
+ struct nlmsghdr * nl_hdr;
+ int fd = task->arp_fds.fd;
+ int len, ret;
+ uint32_t ip = 0;
+ prox_rte_ether_addr mac;
+ memset(&mac, 0, sizeof(mac));
+ len = recv(fd, netlink_buf, sizeof(netlink_buf), 0);
+ if (len < 0) {
+ plog_err("Failed to recv from netlink: %d\n", errno);
+ return;
+ }
+ nl_hdr = (struct nlmsghdr *)netlink_buf;
+ if (nl_hdr->nlmsg_flags & NLM_F_MULTI) {
+ plog_err("Unexpected multipart netlink message\n");
+ return;
+ }
+ if ((nl_hdr->nlmsg_type != RTM_NEWNEIGH) && (nl_hdr->nlmsg_type != RTM_DELNEIGH))
+ return;
+
+ struct ndmsg *ndmsg = (struct ndmsg *)NLMSG_DATA(nl_hdr);
+ int ndm_family = ndmsg->ndm_family;
+ struct rtattr *rta = (struct rtattr *)RTM_RTA(ndmsg);
+ int rtl = RTM_PAYLOAD(nl_hdr);
+ for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) {
+ switch (rta->rta_type) {
+ case NDA_DST:
+ ip = *((uint32_t *)RTA_DATA(rta));
+ break;
+ case NDA_LLADDR:
+ mac = *((prox_rte_ether_addr *)(uint64_t *)RTA_DATA(rta));
+ break;
+ default:
+ break;
+ }
+ }
+ plogx_info("Received netlink ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
+ ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip);
+ if (unlikely(ret < 0)) {
+ // entry not found for this IP: we did not ask a request.
+ // This can happen if the kernel updated the ARP table when receiving an ARP_REQUEST
+ // We must record this, as the ARP entry is now in the kernel table
+ if (prox_rte_is_zero_ether_addr(&mac)) {
+ // Timeout or MAC deleted from kernel MAC table
+ int ret = rte_hash_del_key(task->external_ip_hash, (const void *)&ip);
+ plogx_dbg("ip "IPv4_BYTES_FMT" removed from external_ip_hash\n", IP4(ip));
+ return;
+ }
+ int ret = rte_hash_add_key(task->external_ip_hash, (const void *)&ip);
+ if (unlikely(ret < 0)) {
+ plogx_dbg("IP "IPv4_BYTES_FMT" not found in external_ip_hash and unable to add it\n", IP4(ip));
+ return;
+ }
+ memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr));
+ plogx_dbg("ip "IPv4_BYTES_FMT" added in external_ip_hash with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
+ return;
+ }
+
+ // entry found for this IP
+ uint16_t nb_requests = task->external_ip_table[ret].nb_requests;
+ if (nb_requests == 0) {
+ return;
+ }
+
+ memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr));
+
+ // If we receive a request from multiple task for the same IP, then we update all tasks
+ int ret1 = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)mbufs);
+ if (unlikely(ret1 != 0)) {
+ plog_err("Unable to allocate a mbuf for master to core communication\n");
+ return;
+ }
+ rte_mbuf_refcnt_set(mbufs[0], nb_requests);
+ for (int i = 0; i < nb_requests; i++) {
+ struct rte_ring *ring = task->external_ip_table[ret].rings[i];
+ struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *);
+ memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr));
+ tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbufs[0], ip);
+ plog_dbg("MAC_INFO_FROM_MASTER ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
+ }
+ task->external_ip_table[ret].nb_requests = 0;
+ return;