2 // Copyright (c) 2010-2020 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
18 #include <sys/types.h>
19 #include <sys/socket.h>
20 #include <linux/netlink.h>
21 #include <linux/rtnetlink.h>
25 #include <rte_hash_crc.h>
26 #include <rte_ether.h>
30 #include "prox_globals.h"
33 #include "handle_master.h"
35 #include "mbuf_utils.h"
38 #include "prox_malloc.h"
40 #include "task_init.h"
41 #include "prox_port_cfg.h"
47 #include "prox_ipv6.h"
48 #include "packet_utils.h"
50 #define PROX_MAX_ARP_REQUESTS 32 // Maximum number of tasks requesting the same MAC address
51 #define NETLINK_BUF_SIZE 16384
53 static char netlink_buf[NETLINK_BUF_SIZE];
55 const char *actions_string[] = {
56 "MAC_INFO_FROM_MASTER", // Controlplane sending a MAC update to dataplane
57 "MAC_INFO_FROM_MASTER_FOR_IPV6",// Controlplane sending a MAC update to dataplane
58 "IPV6_INFO_FROM_MASTER", // Controlplane IPv6 Global IP info to dataplane
59 "ROUTE_ADD_FROM_MASTER", // Controlplane sending a new route to dataplane
60 "ROUTE_DEL_FROM_MASTER", // Controlplane deleting a new route from dataplane
61 "SEND_ARP_REQUEST_FROM_MASTER", // Controlplane requesting dataplane to send ARP request
62 "SEND_ARP_REPLY_FROM_MASTER", // Controlplane requesting dataplane to send ARP reply
63 "SEND_NDP_FROM_MASTER", // Controlplane requesting dataplane to send NDP
64 "SEND_ICMP_FROM_MASTER", // Controlplane requesting dataplane to send ICMP message
65 "SEND_BGP_FROM_MASTER", // Controlplane requesting dataplane to send BGP message
66 "ARP_PKT_FROM_NET_TO_MASTER", // ARP sent by datplane to Controlpane for handling
67 "NDP_PKT_FROM_NET_TO_MASTER," // NDP sent by datplane to Controlpane for handling
68 "ICMP_TO_MASTER", // ICMP sent by datplane to Controlpane for handling
69 "BGP_TO_MASTER" // BGP sent by datplane to Controlpane for handling
70 "IP4_REQ_MAC_TO_MASTER", // Dataplane requesting MAC resolution to Controlplane
71 "IP6_REQ_MAC_TO_MASTER", // Dataplane requesting MAC resolution to Controlplane
72 "PKT_FROM_TAP" // Packet received by Controlplane from kernel and forwarded to dataplane for sending
76 int (*handle_ctrl_plane)(struct task_base *tbase, struct rte_mbuf **mbuf, uint16_t n_pkts) = NULL;
78 static struct my_arp_t arp_reply = {
85 static struct my_arp_t arp_request = {
96 } __attribute__((packed));
101 } __attribute__((packed));
103 void register_router_to_ctrl_plane(struct task_base *tbase, uint8_t port_id, uint8_t core_id, uint8_t task_id, struct ipv6_addr *local_ipv6_addr, struct ipv6_addr *global_ipv6_addr, struct ipv6_addr *router_prefix)
105 struct task_master *task = (struct task_master *)tbase;
106 task->internal_port_table[port_id].flags |= IPV6_ROUTER;
107 memcpy(&task->internal_port_table[port_id].router_prefix, router_prefix, sizeof(struct ipv6_addr));
108 register_node_to_ctrl_plane(tbase, local_ipv6_addr, global_ipv6_addr, port_id, core_id, task_id);
111 void register_node_to_ctrl_plane(struct task_base *tbase, struct ipv6_addr *local_ipv6_addr, struct ipv6_addr *global_ipv6_addr, uint8_t port_id, uint8_t core_id, uint8_t task_id)
113 struct task_master *task = (struct task_master *)tbase;
114 if (task->internal_port_table[port_id].flags & IPV6_ROUTER)
115 plogx_dbg("\tregistering router with port %d core %d and task %d\n", port_id, core_id, task_id);
117 plogx_dbg("\tregistering node with port %d core %d and task %d\n", port_id, core_id, task_id);
119 if (port_id >= PROX_MAX_PORTS) {
120 plog_err("Unable to register router, port %d\n", port_id);
123 task->internal_port_table[port_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id];
124 memcpy(&task->internal_port_table[port_id].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr));
125 memcpy(&task->internal_port_table[port_id].local_ipv6_addr, local_ipv6_addr, sizeof(struct ipv6_addr));
126 if (memcmp(local_ipv6_addr, &prox_cfg.random_ip, sizeof(struct ipv6_addr)) == 0) {
127 task->internal_port_table[port_id].flags |= HANDLE_RANDOM_LOCAL_IP_FLAG;
130 memcpy(&task->internal_port_table[port_id].global_ipv6_addr, global_ipv6_addr, sizeof(struct ipv6_addr));
131 if (memcmp(global_ipv6_addr, &prox_cfg.random_ip, sizeof(struct ipv6_addr)) == 0) {
132 task->internal_port_table[port_id].flags |= HANDLE_RANDOM_GLOBAL_IP_FLAG;
136 memcpy(&key.ip6, local_ipv6_addr, sizeof(struct ipv6_addr));
138 int ret = rte_hash_add_key(task->internal_ip6_hash, (const void *)&key);
139 if (unlikely(ret < 0)) {
140 plog_err("Unable to register ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(local_ipv6_addr->bytes));
143 memcpy(&key.ip6, global_ipv6_addr, sizeof(struct ipv6_addr));
144 ret = rte_hash_add_key(task->internal_ip6_hash, (const void *)&key);
145 if (unlikely(ret < 0)) {
146 plog_err("Unable to register ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(global_ipv6_addr->bytes));
149 memcpy(&task->internal_ip6_table[ret].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr));
150 task->internal_ip6_table[ret].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id];
153 void master_init_vdev(struct task_base *tbase, uint8_t port_id, uint8_t core_id, uint8_t task_id)
155 struct task_master *task = (struct task_master *)tbase;
156 uint8_t vdev_port = prox_port_cfg[port_id].dpdk_mapping;
158 if (vdev_port != NO_VDEV_PORT) {
159 for (i = 0; i < task->max_vdev_id; i++) {
160 if (task->all_vdev[i].port_id == vdev_port)
163 if (i < task->max_vdev_id) {
164 // Already initialized (e.g. by another core handling the same port).
167 task->all_vdev[task->max_vdev_id].port_id = vdev_port;
168 task->all_vdev[task->max_vdev_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id];
170 struct sockaddr_in dst, src;
171 src.sin_family = AF_INET;
172 src.sin_port = rte_cpu_to_be_16(PROX_PSEUDO_PKT_PORT);
173 for (int vlan_id = 0; vlan_id < prox_port_cfg[vdev_port].n_vlans; vlan_id++) {
174 src.sin_addr.s_addr = rte_be_to_cpu_32(prox_port_cfg[vdev_port].ip_addr[vlan_id].ip);
175 int fd = socket(AF_INET, SOCK_DGRAM, 0);
176 PROX_PANIC(fd < 0, "Failed to open socket(AF_INET, SOCK_DGRAM, 0)\n");
177 prox_port_cfg[vdev_port].fds[vlan_id] = fd;
178 rc = bind(fd,(struct sockaddr *)&src, sizeof(struct sockaddr_in));
179 PROX_PANIC(rc, "Failed to bind("IPv4_BYTES_FMT":%d): errno = %d (%s)\n", IPv4_BYTES(((uint8_t*)&src.sin_addr.s_addr)), src.sin_port, errno, strerror(errno));
180 plog_info("DPDK port %d bound("IPv4_BYTES_FMT":%d) to fd %d\n", port_id, IPv4_BYTES(((uint8_t*)&src.sin_addr.s_addr)), src.sin_port, fd);
181 fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
187 void register_ip_to_ctrl_plane(struct task_base *tbase, uint32_t ip, uint8_t port_id, uint8_t core_id, uint8_t task_id)
189 struct task_master *task = (struct task_master *)tbase;
191 plogx_info("\tregistering IP "IPv4_BYTES_FMT" with port %d core %d and task %d\n", IP4(ip), port_id, core_id, task_id);
193 if (port_id >= PROX_MAX_PORTS) {
194 plog_err("Unable to register ip "IPv4_BYTES_FMT", port %d\n", IP4(ip), port_id);
198 /* TODO - store multiple rings if multiple cores able to handle IP
199 Remove them when such cores are stopped and de-register IP
201 task->internal_port_table[port_id].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id];
202 memcpy(&task->internal_port_table[port_id].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr));
203 task->internal_port_table[port_id].ip = ip;
205 if (ip == RANDOM_IP) {
206 task->internal_port_table[port_id].flags |= HANDLE_RANDOM_IP_FLAG;
212 int ret = rte_hash_add_key(task->internal_ip_hash, (const void *)&key);
213 if (unlikely(ret < 0)) {
214 plog_err("Unable to register ip "IPv4_BYTES_FMT"\n", IP4(ip));
217 memcpy(&task->internal_ip_table[ret].mac, &prox_port_cfg[port_id].eth_addr, sizeof(prox_rte_ether_addr));
218 task->internal_ip_table[ret].ring = task->ctrl_tx_rings[core_id * MAX_TASKS_PER_CORE + task_id];
221 static inline void handle_arp_reply(struct task_base *tbase, struct rte_mbuf *mbuf, struct my_arp_t *arp)
223 struct task_master *task = (struct task_master *)tbase;
225 uint32_t key = arp->data.spa;
226 plogx_dbg("\tMaster handling ARP reply for ip "IPv4_BYTES_FMT"\n", IP4(key));
228 ret = rte_hash_lookup(task->external_ip_hash, (const void *)&key);
229 if (unlikely(ret < 0)) {
230 // entry not found for this IP: we did not ask a request, delete the reply
233 // entry found for this IP
234 uint16_t nb_requests = task->external_ip_table[ret].nb_requests;
235 // If we receive a request from multiple task for the same IP, then we update all tasks
236 if (task->external_ip_table[ret].nb_requests) {
237 rte_mbuf_refcnt_set(mbuf, nb_requests);
238 for (int i = 0; i < nb_requests; i++) {
239 struct rte_ring *ring = task->external_ip_table[ret].rings[i];
240 tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbuf, key);
242 task->external_ip_table[ret].nb_requests = 0;
249 static inline void handle_arp_request(struct task_base *tbase, struct rte_mbuf *mbuf, struct my_arp_t *arp)
251 struct task_master *task = (struct task_master *)tbase;
252 prox_rte_ether_hdr *ether_hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
254 uint8_t port = get_port(mbuf);
257 key.ip = arp->data.tpa;
259 if (task->internal_port_table[port].flags & HANDLE_RANDOM_IP_FLAG) {
260 prox_rte_ether_addr mac;
261 plogx_dbg("\tMaster handling ARP request for ip "IPv4_BYTES_FMT" on port %d which supports random ip\n", IP4(key.ip), key.port);
262 struct rte_ring *ring = task->internal_port_table[port].ring;
263 create_mac(arp, &mac);
264 mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM);
265 build_arp_reply(ether_hdr, &mac, arp);
266 tx_ring(tbase, ring, SEND_ARP_REPLY_FROM_MASTER, mbuf);
270 plogx_dbg("\tMaster handling ARP request for ip "IPv4_BYTES_FMT"\n", IP4(key.ip));
272 ret = rte_hash_lookup(task->internal_ip_hash, (const void *)&key);
273 if (unlikely(ret < 0)) {
274 // entry not found for this IP.
275 plogx_dbg("Master ignoring ARP REQUEST received on un-registered IP "IPv4_BYTES_FMT" on port %d\n", IP4(arp->data.tpa), port);
278 struct rte_ring *ring = task->internal_ip_table[ret].ring;
279 mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM);
280 build_arp_reply(ether_hdr, &task->internal_ip_table[ret].mac, arp);
281 tx_ring(tbase, ring, SEND_ARP_REPLY_FROM_MASTER, mbuf);
285 static inline int record_request(struct task_base *tbase, uint32_t ip_dst, uint8_t port, struct rte_ring *ring)
287 struct task_master *task = (struct task_master *)tbase;
288 int ret = rte_hash_add_key(task->external_ip_hash, (const void *)&ip_dst);
291 if (unlikely(ret < 0)) {
292 plogx_dbg("Unable to add IP "IPv4_BYTES_FMT" in external_ip_hash\n", IP4(ip_dst));
296 // If multiple tasks requesting the same info, we will need to send a reply to all of them
297 // However if one task sends multiple requests to the same IP (e.g. because it is not answering)
298 // then we should not send multiple replies to the same task
299 if (task->external_ip_table[ret].nb_requests >= PROX_MAX_ARP_REQUESTS) {
300 // This can only happen if really many tasks requests the same IP
301 plogx_dbg("Unable to add request for IP "IPv4_BYTES_FMT" in external_ip_table\n", IP4(ip_dst));
304 for (i = 0; i < task->external_ip_table[ret].nb_requests; i++) {
305 if (task->external_ip_table[ret].rings[i] == ring)
308 if (i >= task->external_ip_table[ret].nb_requests) {
309 // If this is a new request i.e. a new task requesting a new IP
310 task->external_ip_table[ret].rings[task->external_ip_table[ret].nb_requests] = ring;
311 task->external_ip_table[ret].nb_requests++;
316 static inline void handle_unknown_ip(struct task_base *tbase, struct rte_mbuf *mbuf)
318 struct task_master *task = (struct task_master *)tbase;
319 struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *);
320 uint8_t port = get_port(mbuf);
321 uint32_t ip_dst = get_ip(mbuf);
322 uint16_t vlan = ctrl_ring_get_vlan(mbuf);
324 plogx_dbg("\tMaster handling unknown ip "IPv4_BYTES_FMT" for port %d\n", IP4(ip_dst), port);
325 if (unlikely(port >= PROX_MAX_PORTS)) {
326 plogx_dbg("Port %d not found", port);
330 uint32_t ip_src = task->internal_port_table[port].ip;
331 struct rte_ring *ring = task->ctrl_tx_rings[get_core(mbuf) * MAX_TASKS_PER_CORE + get_task(mbuf)];
334 plogx_dbg("Port %d not registered", port);
339 if (record_request(tbase, ip_dst, port, ring) < 0) {
343 // We send an ARP request even if one was just sent (and not yet answered) by another task
344 mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM);
345 build_arp_request(mbuf, &task->internal_port_table[port].mac, ip_dst, ip_src, vlan);
346 tx_ring(tbase, ring, SEND_ARP_REQUEST_FROM_MASTER, mbuf);
349 static inline void build_icmp_reply_message(struct task_base *tbase, struct rte_mbuf *mbuf)
351 struct task_master *task = (struct task_master *)tbase;
353 key.port = mbuf->port;
354 prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
355 prox_rte_ether_addr dst_mac;
356 prox_rte_ether_addr_copy(&hdr->s_addr, &dst_mac);
357 prox_rte_ether_addr_copy(&hdr->d_addr, &hdr->s_addr);
358 prox_rte_ether_addr_copy(&dst_mac, &hdr->d_addr);
359 prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1);
360 key.ip = ip_hdr->dst_addr;
361 ip_hdr->dst_addr = ip_hdr->src_addr;
362 ip_hdr->src_addr = key.ip;
363 prox_rte_icmp_hdr *picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1);
364 picmp->icmp_type = PROX_RTE_IP_ICMP_ECHO_REPLY;
366 int ret = rte_hash_lookup(task->internal_ip_hash, (const void *)&key);
367 if (unlikely(ret < 0)) {
368 // entry not found for this IP.
369 plogx_dbg("Master ignoring ICMP received on un-registered IP "IPv4_BYTES_FMT" on port %d\n", IP4(key.ip), mbuf->port);
372 struct rte_ring *ring = task->internal_ip_table[ret].ring;
373 mbuf->ol_flags &= ~(PKT_TX_IP_CKSUM|PKT_TX_UDP_CKSUM);
374 tx_ring(tbase, ring, SEND_ICMP_FROM_MASTER, mbuf);
378 static inline void handle_icmp(struct task_base *tbase, struct rte_mbuf *mbuf)
380 struct task_master *task = (struct task_master *)tbase;
381 uint8_t port_id = get_port(mbuf);
382 struct port_table *port = &task->internal_port_table[port_id];
383 prox_rte_ether_hdr *hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
384 if (hdr->ether_type != ETYPE_IPv4) {
388 prox_rte_ipv4_hdr *ip_hdr = (prox_rte_ipv4_hdr *)(hdr + 1);
389 if (ip_hdr->next_proto_id != IPPROTO_ICMP) {
393 if (ip_hdr->dst_addr != port->ip) {
398 prox_rte_icmp_hdr *picmp = (prox_rte_icmp_hdr *)(ip_hdr + 1);
399 uint8_t type = picmp->icmp_type;
400 if (type == PROX_RTE_IP_ICMP_ECHO_REQUEST) {
402 if (rte_rdtsc() - port->last_echo_req_rcvd_tsc > rte_get_tsc_hz()) {
403 plog_dbg("Received %u Echo Request on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", port->n_echo_req, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr)));
404 port->n_echo_req = 0;
405 port->last_echo_req_rcvd_tsc = rte_rdtsc();
407 return build_icmp_reply_message(tbase, mbuf);
408 } else if (type == PROX_RTE_IP_ICMP_ECHO_REPLY) {
410 if (rte_rdtsc() - port->last_echo_rep_rcvd_tsc > rte_get_tsc_hz()) {
411 plog_info("Received %u Echo Reply on IP "IPv4_BYTES_FMT" (last received from IP "IPv4_BYTES_FMT")\n", port->n_echo_rep, IPv4_BYTES(((uint8_t*)&ip_hdr->dst_addr)), IPv4_BYTES(((uint8_t*)&ip_hdr->src_addr)));
412 port->n_echo_rep = 0;
413 port->last_echo_rep_rcvd_tsc = rte_rdtsc();
420 static inline void handle_unknown_ip6(struct task_base *tbase, struct rte_mbuf *mbuf)
422 struct task_master *task = (struct task_master *)tbase;
423 struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *);
424 uint8_t port_id = get_port(mbuf);
425 struct ipv6_addr *ip_dst = ctrl_ring_get_ipv6_addr(mbuf);
426 uint16_t vlan = ctrl_ring_get_vlan(mbuf);
429 plogx_dbg("\tMaster trying to find MAC of external IP "IPv6_BYTES_FMT" for port %d\n", IPv6_BYTES(ip_dst->bytes), port_id);
430 if (unlikely(port_id >= PROX_MAX_PORTS)) {
431 plogx_dbg("Port %d not found", port_id);
435 struct ipv6_addr *local_ip_src = &task->internal_port_table[port_id].local_ipv6_addr;
436 struct ipv6_addr *global_ip_src = &task->internal_port_table[port_id].global_ipv6_addr;
437 struct ipv6_addr *ip_src;
438 if (memcmp(local_ip_src, ip_dst, prox_port_cfg[port_id].v6_mask_length) == 0)
439 ip_src = local_ip_src;
440 else if (memcmp(global_ip_src, &null_addr, 16))
441 ip_src = global_ip_src;
443 plogx_dbg("Unable to find a src ip for dst ip "IPv6_BYTES_FMT"\n", IPv6_BYTES(ip_dst->bytes));
447 struct rte_ring *ring = task->ctrl_tx_rings[get_core(mbuf) * MAX_TASKS_PER_CORE + get_task(mbuf)];
450 plogx_dbg("Port %d not registered", port_id);
455 ret2 = rte_hash_add_key(task->external_ip6_hash, (const void *)ip_dst);
456 if (unlikely(ret2 < 0)) {
457 plogx_dbg("Unable to add IP "IPv6_BYTES_FMT" in external_ip6_hash\n", IPv6_BYTES(ip_dst->bytes));
462 // If multiple tasks requesting the same info, we will need to send a reply to all of them
463 // However if one task sends multiple requests to the same IP (e.g. because it is not answering)
464 // then we should not send multiple replies to the same task
465 if (task->external_ip6_table[ret2].nb_requests >= PROX_MAX_ARP_REQUESTS) {
466 // This can only happen if really many tasks requests the same IP
467 plogx_dbg("Unable to add request for IP "IPv6_BYTES_FMT" in external_ip6_table\n", IPv6_BYTES(ip_dst->bytes));
471 for (i = 0; i < task->external_ip6_table[ret2].nb_requests; i++) {
472 if (task->external_ip6_table[ret2].rings[i] == ring)
475 if (i >= task->external_ip6_table[ret2].nb_requests) {
476 // If this is a new request i.e. a new task requesting a new IP
477 task->external_ip6_table[ret2].rings[task->external_ip6_table[ret2].nb_requests] = ring;
478 task->external_ip6_table[ret2].nb_requests++;
479 // Only needed for first request - but avoid test and copy the same 6 bytes
480 // In most cases we will only have one request per IP.
481 //memcpy(&task->external_ip6_table[ret2].mac, &task->internal_port_table[port_id].mac, sizeof(prox_rte_ether_addr));
484 // As timers are not handled by master, we might send an NS request even if one was just sent
485 // (and not yet answered) by another task
486 build_neighbour_sollicitation(mbuf, &task->internal_port_table[port_id].mac, ip_dst, ip_src, vlan);
487 tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf);
490 static inline void handle_rs(struct task_base *tbase, struct rte_mbuf *mbuf, prox_rte_ipv6_hdr *ipv6_hdr, uint16_t vlan)
492 struct task_master *task = (struct task_master *)tbase;
494 uint8_t port = get_port(mbuf);
496 if (task->internal_port_table[port].flags & IPV6_ROUTER) {
497 plogx_dbg("\tMaster handling Router Solicitation from ip "IPv6_BYTES_FMT" on port %d\n", IPv6_BYTES(ipv6_hdr->src_addr), port);
498 struct rte_ring *ring = task->internal_port_table[port].ring;
499 build_router_advertisement(mbuf, &prox_port_cfg[port].eth_addr, &task->internal_port_table[port].local_ipv6_addr, &task->internal_port_table[port].router_prefix, vlan);
500 tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf);
505 static inline void handle_ra(struct task_base *tbase, struct rte_mbuf *mbuf, prox_rte_ipv6_hdr *ipv6_hdr, uint16_t vlan)
507 struct task_master *task = (struct task_master *)tbase;
508 int i, ret, send = 0;
509 uint8_t port = get_port(mbuf);
510 struct rte_ring *ring = task->internal_port_table[port].ring;
512 plog_dbg("Master handling Router Advertisement from ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(ipv6_hdr->src_addr), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len));
513 if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) {
514 plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len));
519 plog_info("TX side not initialized yet => dropping\n");
523 int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_RA) + sizeof(struct icmpv6_option);
524 struct icmpv6_RA *router_advertisement = (struct icmpv6_RA *)(ipv6_hdr + 1);
525 struct icmpv6_option *option = (struct icmpv6_option *)&router_advertisement->options;
526 struct icmpv6_prefix_option *prefix_option;
527 while(option_len > 0) {
528 uint8_t type = option->type;
530 case ICMPv6_source_link_layer_address:
531 plog_dbg("\tOption %d = Source Link Layer Address\n", type);
533 case ICMPv6_prefix_information:
534 prefix_option = (struct icmpv6_prefix_option *)option;
535 plog_dbg("\tOption %d = Prefix Information = %s\n", type, IP6_Canonical(&prefix_option->prefix));
539 plog_dbg("\tOption %d = MTU\n", type);
542 plog_dbg("\tOption %d = Unknown Option\n", type);
545 if ((option->length == 0) || (option->length *8 > option_len)) {
546 plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length);
550 option_len -=option->length * 8;
551 option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8);
554 struct ipv6_addr global_ipv6;
555 memcpy(&global_ipv6, &prefix_option->prefix, sizeof(struct ipv6_addr));
556 set_EUI(&global_ipv6, &task->internal_port_table[port].mac);
557 tx_ring_ip6(tbase, ring, IPV6_INFO_FROM_MASTER, mbuf, &global_ipv6);
562 static inline void handle_ns(struct task_base *tbase, struct rte_mbuf *mbuf, prox_rte_ipv6_hdr *ipv6_hdr, uint16_t vlan)
564 struct task_master *task = (struct task_master *)tbase;
565 struct icmpv6_NS *neighbour_sollicitation = (struct icmpv6_NS *)(ipv6_hdr + 1);
567 uint8_t port = get_port(mbuf);
568 struct rte_ring *ring = task->internal_port_table[port].ring;
570 plog_dbg("Master handling Neighbour Sollicitation for ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(neighbour_sollicitation->target_address.bytes), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len));
571 if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) {
572 plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len));
576 int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_NS) + sizeof(struct icmpv6_option);
577 struct icmpv6_option *option = (struct icmpv6_option *)&neighbour_sollicitation->options;
578 while(option_len > 0) {
579 uint8_t type = option->type;
581 case ICMPv6_source_link_layer_address:
582 plog_dbg("Option %d = Source Link Layer Address\n", type);
585 plog_dbg("Option %d = Unknown Option\n", type);
588 if ((option->length == 0) || (option->length *8 > option_len)) {
589 plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length);
593 option_len -=option->length * 8;
594 option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8);
597 memcpy(&key.ip6, &neighbour_sollicitation->target_address, sizeof(struct ipv6_addr));
600 if (memcmp(&neighbour_sollicitation->target_address, &task->internal_port_table[port].local_ipv6_addr, 8) == 0) {
602 if (task->internal_port_table[port].flags & HANDLE_RANDOM_LOCAL_IP_FLAG) {
603 prox_rte_ether_addr mac;
604 plogx_dbg("\tMaster handling NS request for ip "IPv6_BYTES_FMT" on port %d which supports random ip\n", IPv6_BYTES(key.ip6.bytes), key.port);
605 struct rte_ring *ring = task->internal_port_table[port].ring;
606 create_mac_from_EUI(&key.ip6, &mac);
607 build_neighbour_advertisement(tbase, mbuf, &mac, &task->internal_port_table[port].local_ipv6_addr, PROX_SOLLICITED, vlan);
608 tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf);
612 if (task->internal_port_table[port].flags & HANDLE_RANDOM_GLOBAL_IP_FLAG) {
613 prox_rte_ether_addr mac;
614 plogx_dbg("\tMaster handling NS request for ip "IPv6_BYTES_FMT" on port %d which supports random ip\n", IPv6_BYTES(key.ip6.bytes), key.port);
615 struct rte_ring *ring = task->internal_port_table[port].ring;
616 create_mac_from_EUI(&key.ip6, &mac);
617 build_neighbour_advertisement(tbase, mbuf, &mac, &task->internal_port_table[port].global_ipv6_addr, PROX_SOLLICITED, vlan);
618 tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf);
623 ret = rte_hash_lookup(task->internal_ip6_hash, (const void *)&key);
624 if (unlikely(ret < 0)) {
625 // entry not found for this IP.
626 plogx_dbg("Master ignoring Neighbour Sollicitation received on un-registered IP "IPv6_BYTES_FMT" on port %d\n", IPv6_BYTES(key.ip6.bytes), port);
629 struct rte_ring *ring = task->internal_ip6_table[ret].ring;
630 if (ring == NULL) return;
631 build_neighbour_advertisement(tbase, mbuf, &task->internal_ip6_table[ret].mac, &key.ip6, PROX_SOLLICITED, vlan);
632 tx_ring(tbase, ring, SEND_NDP_FROM_MASTER, mbuf);
636 static inline void handle_na(struct task_base *tbase, struct rte_mbuf *mbuf, prox_rte_ipv6_hdr *ipv6_hdr, uint16_t vlan)
638 struct task_master *task = (struct task_master *)tbase;
639 struct icmpv6_NA *neighbour_advertisement = (struct icmpv6_NA *)(ipv6_hdr + 1);
641 uint8_t port = get_port(mbuf);
642 struct rte_ring *ring = task->internal_port_table[port].ring;
644 plog_dbg("Master handling Neighbour Advertisement for ip "IPv6_BYTES_FMT" on port %d - len = %d; payload_len = %d\n", IPv6_BYTES(neighbour_advertisement->destination_address.bytes), port, rte_pktmbuf_pkt_len(mbuf), rte_be_to_cpu_16(ipv6_hdr->payload_len));
645 if (rte_be_to_cpu_16(ipv6_hdr->payload_len) + sizeof(prox_rte_ipv6_hdr) + sizeof(prox_rte_ether_hdr) > rte_pktmbuf_pkt_len(mbuf)) {
646 plog_err("Unexpected length received: pkt_len = %d, ipv6 hdr length = %ld, ipv6 payload len = %d\n", rte_pktmbuf_pkt_len(mbuf), sizeof(prox_rte_ipv6_hdr), rte_be_to_cpu_16(ipv6_hdr->payload_len));
650 int16_t option_len = rte_be_to_cpu_16(ipv6_hdr->payload_len) - sizeof(struct icmpv6_NA) + sizeof(struct icmpv6_option);
651 struct icmpv6_option *option = (struct icmpv6_option *)&neighbour_advertisement->options;
652 uint8_t *target_address = NULL;
653 while(option_len > 0) {
654 uint8_t type = option->type;
656 case ICMPv6_source_link_layer_address:
657 plog_dbg("Option %d = Source Link Layer Address\n", type);
659 case ICMPv6_target_link_layer_address:
660 if (option->length != 1) {
661 plog_err("Unexpected option length = %u for Target Link Layer Address\n", option->length);
664 target_address = option->data;
665 plog_dbg("Option %d = Target Link Layer Address = "MAC_BYTES_FMT"\n", type, MAC_BYTES(target_address));
668 plog_dbg("Option %d = Unknown Option\n", type);
671 if ((option->length == 0) || (option->length *8 > option_len)) {
672 plog_err("Unexpected option length (%d) received in option %d: %d\n", option->length, option->type, option->length);
676 option_len -=option->length * 8;
677 option = (struct icmpv6_option *)(((uint8_t *)option) + option->length * 8);
680 if (target_address == NULL) {
681 target_address = (uint8_t *)&neighbour_advertisement->destination_address;
683 struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *);
684 struct ipv6_addr *key = &neighbour_advertisement->destination_address;
686 ret = rte_hash_lookup(task->external_ip6_hash, (const void *)key);
687 if (unlikely(ret < 0)) {
688 // entry not found for this IP: we did not ask a request, delete the reply
689 plog_err("Unkown IP "IPv6_BYTES_FMT"", IPv6_BYTES(neighbour_advertisement->destination_address.bytes));
692 // entry found for this IP
693 uint16_t nb_requests = task->external_ip6_table[ret].nb_requests;
694 //memcpy(&hdr->d_addr.addr_bytes, &task->external_ip6_table[ret].mac, sizeof(prox_rte_ether_addr));
695 // If we receive a request from multiple task for the same IP, then we update all tasks
696 if (task->external_ip6_table[ret].nb_requests) {
697 rte_mbuf_refcnt_set(mbuf, nb_requests);
698 for (int i = 0; i < nb_requests; i++) {
699 struct rte_ring *ring = task->external_ip6_table[ret].rings[i];
700 tx_ring_ip6_data(tbase, ring, MAC_INFO_FROM_MASTER_FOR_IPV6, mbuf, &neighbour_advertisement->destination_address, *(uint64_t *)target_address);
702 task->external_ip6_table[ret].nb_requests = 0;
704 plog_err("UNEXPECTED nb_requests == 0");
710 static inline void handle_message(struct task_base *tbase, struct rte_mbuf *mbuf, int ring_id)
712 struct task_master *task = (struct task_master *)tbase;
713 prox_rte_ether_hdr *ether_hdr;
714 struct icmpv6 *icmpv6;
715 int command = get_command(mbuf);
716 uint8_t port = get_port(mbuf);
718 uint16_t vlan = 0, ether_type;
719 uint8_t vdev_port = prox_port_cfg[port].dpdk_mapping;
720 plogx_dbg("\tMaster received %s (%x) from mbuf %p\n", actions_string[command], command, mbuf);
721 struct my_arp_t *arp;
725 if (vdev_port != NO_VDEV_PORT) {
726 // If a virtual (net_tap) device is attached, send the (BGP) packet to this device
727 // The kernel will receive and handle it.
728 plogx_dbg("\tMaster forwarding BGP packet to TAP\n");
729 int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1);
735 if (vdev_port != NO_VDEV_PORT) {
736 // If a virtual (net_tap) device is attached, send the (PING) packet to this device
737 // The kernel will receive and handle it.
738 plogx_dbg("\tMaster forwarding packet to TAP\n");
739 int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1);
742 handle_icmp(tbase, mbuf);
744 case ARP_PKT_FROM_NET_TO_MASTER:
745 if (vdev_port != NO_VDEV_PORT) {
746 // If a virtual (net_tap) device is attached, send the (ARP) packet to this device
747 // The kernel will receive and handle it.
748 plogx_dbg("\tMaster forwarding packet to TAP\n");
749 int n = rte_eth_tx_burst(prox_port_cfg[port].dpdk_mapping, 0, &mbuf, 1);
752 ether_hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
753 ether_type = ether_hdr->ether_type;
754 if (ether_type == ETYPE_VLAN) {
755 prox_rte_vlan_hdr *vlan_hdr = (prox_rte_vlan_hdr *)(ether_hdr + 1);
756 arp = (struct my_arp_t *)(vlan_hdr + 1);
757 ether_type = vlan_hdr->eth_proto;
759 arp = (struct my_arp_t *)(ether_hdr + 1);
762 if (ether_type != ETYPE_ARP) {
763 plog_err("\tUnexpected message received: ARP_PKT_FROM_NET_TO_MASTER with ether_type %x\n", ether_type);
767 if (arp_is_gratuitous(arp)) {
768 plog_info("\tReceived gratuitous packet \n");
771 } else if (memcmp(arp, &arp_reply, 8) == 0) {
772 // uint32_t ip = arp->data.spa;
773 handle_arp_reply(tbase, mbuf, arp);
774 } else if (memcmp(arp, &arp_request, 8) == 0) {
775 handle_arp_request(tbase, mbuf, arp);
777 plog_info("\tReceived unexpected ARP operation %d\n", arp->oper);
782 case IP4_REQ_MAC_TO_MASTER:
783 if (vdev_port != NO_VDEV_PORT) {
784 // We send a packet to the kernel with the proper destnation IP address and our src IP address
785 // This means that if a generator sends packets from many sources all ARP will still
786 // be sent from the same IP src. This might be a limitation.
787 // This prevent to have to open as many sockets as there are sources MAC addresses
788 // We also always use the same UDP ports - as the packet will finally not leave the system anyhow
790 struct ether_hdr_arp *hdr_arp = rte_pktmbuf_mtod(mbuf, struct ether_hdr_arp *);
791 uint32_t ip = get_ip(mbuf);
792 vlan = ctrl_ring_get_vlan(mbuf);
793 struct rte_ring *ring = task->ctrl_tx_rings[get_core(mbuf) * MAX_TASKS_PER_CORE + get_task(mbuf)];
795 // First check whether MAC address is not already in kernel MAC table.
796 // If present in our hash with a non-null MAC, then present in kernel. A null MAC
797 // might just mean that we sent a request.
798 // If MAC present in kernel, do not send a packet towards the kernel to try to generate
799 // an ARP request, as the kernel would not generate it.
800 int ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip);
801 if ((ret >= 0) && (!prox_rte_is_zero_ether_addr(&task->external_ip_table[ret].mac))) {
802 memcpy(&hdr_arp->arp.data.sha, &task->external_ip_table[ret].mac, sizeof(prox_rte_ether_addr));
803 plogx_dbg("\tMaster ready to send MAC_INFO_FROM_MASTER ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n",
804 IP4(ip), MAC_BYTES(hdr_arp->arp.data.sha.addr_bytes));
805 tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbuf, ip);
809 struct sockaddr_in dst;
810 dst.sin_family = AF_INET;
811 dst.sin_addr.s_addr = ip;
812 dst.sin_port = rte_cpu_to_be_16(PROX_PSEUDO_PKT_PORT);
815 for (vlan_id = 0; vlan_id < prox_port_cfg[vdev_port].n_vlans; vlan_id++) {
816 if (prox_port_cfg[vdev_port].vlan_tags[vlan_id] == vlan)
819 if (vlan_id >= prox_port_cfg[vdev_port].n_vlans) {
821 plogx_info("\tDid not send to TAP IP "IPv4_BYTES_FMT" as wrong VLAN %d\n", IPv4_BYTES(((uint8_t*)&ip)), vlan);
825 int n = sendto(prox_port_cfg[vdev_port].fds[vlan_id], (char*)(&ip), 0, MSG_DONTROUTE, (struct sockaddr *)&dst, sizeof(struct sockaddr_in));
827 plogx_info("\tFailed to send to TAP IP "IPv4_BYTES_FMT" using fd %d, error = %d (%s)\n", IPv4_BYTES(((uint8_t*)&ip)), prox_port_cfg[vdev_port].fds[vlan_id], errno, strerror(errno));
829 plogx_dbg("\tSent %d bytes to TAP IP "IPv4_BYTES_FMT" using fd %d\n", n, IPv4_BYTES(((uint8_t*)&ip)), prox_port_cfg[vdev_port].fds[vlan_id]);
831 record_request(tbase, ip, port, ring);
835 handle_unknown_ip(tbase, mbuf);
837 case IP6_REQ_MAC_TO_MASTER:
838 handle_unknown_ip6(tbase, mbuf);
840 case NDP_PKT_FROM_NET_TO_MASTER:
841 ether_hdr = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
842 prox_rte_ipv6_hdr *ipv6_hdr = prox_get_ipv6_hdr(ether_hdr, rte_pktmbuf_pkt_len(mbuf), &vlan);
843 if (unlikely((!ipv6_hdr) || (ipv6_hdr->proto != ICMPv6))) {
846 plog_err("\tUnexpected message received: NDP_PKT_FROM_NET_TO_MASTER with ether_type %x\n", ether_hdr->ether_type);
848 plog_err("\tUnexpected message received: NDP_PKT_FROM_NET_TO_MASTER with ether_type %x and proto %x\n", ether_hdr->ether_type, ipv6_hdr->proto);
852 icmpv6 = (struct icmpv6 *)(ipv6_hdr + 1);
853 switch (icmpv6->type) {
855 plog_err("IPV6 ICMPV6 Destination Unreachable\n");
859 plog_err("IPV6 ICMPV6 packet too big\n");
863 plog_err("IPV6 ICMPV6 Time Exceeded\n");
867 plog_err("IPV6 ICMPV6 Parameter Problem\n");
871 handle_rs(tbase, mbuf, ipv6_hdr, vlan);
874 handle_ra(tbase, mbuf, ipv6_hdr, vlan);
877 handle_ns(tbase, mbuf, ipv6_hdr, vlan);
880 handle_na(tbase, mbuf, ipv6_hdr, vlan);
883 plog_err("IPV6 ICMPV6 Redirect not handled\n");
887 plog_err("Unexpected type %d in IPV6 ICMPV6\n", icmpv6->type);
893 plogx_dbg("\tMaster received unexpected message\n");
899 void init_ctrl_plane(struct task_base *tbase)
901 struct task_master *task = (struct task_master *)tbase;
902 int socket_id = rte_lcore_to_socket_id(prox_cfg.master);
903 uint32_t n_entries = MAX_ARP_ENTRIES * 4;
904 static char hash_name[30];
906 sprintf(hash_name, "A%03d_hash_arp_table", prox_cfg.master);
907 struct rte_hash_parameters hash_params = {
909 .entries = n_entries,
910 .hash_func = rte_hash_crc,
911 .hash_func_init_val = 0,
913 if (prox_cfg.flags & DSF_L3_ENABLED) {
914 hash_params.key_len = sizeof(uint32_t);
915 task->external_ip_hash = rte_hash_create(&hash_params);
916 PROX_PANIC(task->external_ip_hash == NULL, "Failed to set up external ip hash\n");
917 plog_info("\texternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
920 task->external_ip_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id);
921 PROX_PANIC(task->external_ip_table == NULL, "Failed to allocate memory for %u entries in external ip table\n", n_entries);
922 plog_info("\texternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table));
924 hash_params.key_len = sizeof(struct ip_port);
925 task->internal_ip_hash = rte_hash_create(&hash_params);
926 PROX_PANIC(task->internal_ip_hash == NULL, "Failed to set up internal ip hash\n");
927 plog_info("\tinternal ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
930 task->internal_ip_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id);
931 PROX_PANIC(task->internal_ip_table == NULL, "Failed to allocate memory for %u entries in internal ip table\n", n_entries);
932 plog_info("\tinternal ip table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table));
935 if (prox_cfg.flags & DSF_NDP_ENABLED) {
936 hash_params.key_len = sizeof(struct ipv6_addr);
937 task->external_ip6_hash = rte_hash_create(&hash_params);
938 PROX_PANIC(task->external_ip6_hash == NULL, "Failed to set up external ip6 hash\n");
939 plog_info("\texternal ip6 hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
942 task->external_ip6_table = (struct external_ip_table *)prox_zmalloc(n_entries * sizeof(struct external_ip_table), socket_id);
943 PROX_PANIC(task->external_ip6_table == NULL, "Failed to allocate memory for %u entries in external ip6 table\n", n_entries);
944 plog_info("\texternal ip6_table, with %d entries of size %ld\n", n_entries, sizeof(struct external_ip_table));
946 hash_params.key_len = sizeof(struct ip6_port);
947 task->internal_ip6_hash = rte_hash_create(&hash_params);
948 PROX_PANIC(task->internal_ip6_hash == NULL, "Failed to set up internal ip6 hash\n");
949 plog_info("\tinternal ip6 hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
952 task->internal_ip6_table = (struct ip_table *)prox_zmalloc(n_entries * sizeof(struct ip_table), socket_id);
953 PROX_PANIC(task->internal_ip6_table == NULL, "Failed to allocate memory for %u entries in internal ip6 table\n", n_entries);
954 plog_info("\tinternal ip6 table, with %d entries of size %ld\n", n_entries, sizeof(struct ip_table));
957 int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
958 PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno);
959 fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
961 struct sockaddr_nl sockaddr;
962 memset(&sockaddr, 0, sizeof(struct sockaddr_nl));
963 sockaddr.nl_family = AF_NETLINK;
964 sockaddr.nl_groups = RTMGRP_NEIGH | RTMGRP_NOTIFY;
965 int rc = bind(fd, (struct sockaddr *)&sockaddr, sizeof(struct sockaddr_nl));
966 PROX_PANIC(rc < 0, "Failed to bind to RTMGRP_NEIGH netlink group\n");
967 task->arp_fds.fd = fd;
968 task->arp_fds.events = POLL_IN;
969 plog_info("\tRTMGRP_NEIGH netlink group bound; fd = %d\n", fd);
971 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
972 PROX_PANIC(fd < 0, "Failed to open netlink socket: %d\n", errno);
973 fcntl(fd, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
974 struct sockaddr_nl sockaddr2;
975 memset(&sockaddr2, 0, sizeof(struct sockaddr_nl));
976 sockaddr2.nl_family = AF_NETLINK;
977 sockaddr2.nl_groups = RTMGRP_IPV4_ROUTE | RTMGRP_NOTIFY;
978 rc = bind(fd, (struct sockaddr *)&sockaddr2, sizeof(struct sockaddr_nl));
979 PROX_PANIC(rc < 0, "Failed to bind to RTMGRP_NEIGH netlink group\n");
980 task->route_fds.fd = fd;
981 task->route_fds.events = POLL_IN;
982 plog_info("\tRTMGRP_IPV4_ROUTE netlink group bound; fd = %d\n", fd);
984 static char name[] = "master_arp_nd_pool";
985 const int NB_ARP_MBUF = 1024;
986 const int ARP_MBUF_SIZE = 2048;
987 const int NB_CACHE_ARP_MBUF = 256;
988 struct rte_mempool *ret = rte_mempool_create(name, NB_ARP_MBUF, ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF,
989 sizeof(struct rte_pktmbuf_pool_private), rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, 0,
991 PROX_PANIC(ret == NULL, "Failed to allocate ARP memory pool on socket %u with %u elements\n",
992 rte_socket_id(), NB_ARP_MBUF);
993 plog_info("\tMempool %p (%s) size = %u * %u cache %u, socket %d\n", ret, name, NB_ARP_MBUF,
994 ARP_MBUF_SIZE, NB_CACHE_ARP_MBUF, rte_socket_id());
995 tbase->l3.arp_nd_pool = ret;
998 static void handle_route_event(struct task_base *tbase)
1000 struct task_master *task = (struct task_master *)tbase;
1001 struct rte_mbuf *mbufs[MAX_RING_BURST];
1002 int fd = task->route_fds.fd, interface_index, mask = -1;
1003 char interface_name[IF_NAMESIZE] = {0};
1004 int len = recv(fd, netlink_buf, sizeof(netlink_buf), 0);
1005 uint32_t ip = 0, gw_ip = 0;
1007 plog_err("Failed to recv from netlink: %d\n", errno);
1010 struct nlmsghdr * nl_hdr = (struct nlmsghdr *)netlink_buf;
1011 if (nl_hdr->nlmsg_flags & NLM_F_MULTI) {
1012 plog_err("Unexpected multipart netlink message\n");
1015 if ((nl_hdr->nlmsg_type != RTM_NEWROUTE) && (nl_hdr->nlmsg_type != RTM_DELROUTE))
1018 struct rtmsg *rtmsg = (struct rtmsg *)NLMSG_DATA(nl_hdr);
1019 int rtm_family = rtmsg->rtm_family;
1020 if (rtm_family != AF_INET) {
1021 plog_warn("Unhandled non IPV4 routing message\n");
1024 if ((rtmsg->rtm_table != RT_TABLE_MAIN) && (rtmsg->rtm_table != RT_TABLE_LOCAL))
1026 int dst_len = rtmsg->rtm_dst_len;
1028 struct rtattr *rta = (struct rtattr *)RTM_RTA(rtmsg);
1029 int rtl = RTM_PAYLOAD(nl_hdr);
1030 for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) {
1031 switch (rta->rta_type) {
1033 ip = *((uint32_t *)RTA_DATA(rta));
1036 interface_index = *((int *)RTA_DATA(rta));
1037 if (if_indextoname(interface_index, interface_name) == NULL) {
1038 plog_info("Unknown Interface Index %d\n", interface_index);
1042 mask = *((int *)RTA_DATA(rta));
1045 gw_ip = *((uint32_t *)RTA_DATA(rta));
1051 int dpdk_vdev_port = -1;
1052 for (int i = 0; i< prox_rte_eth_dev_count_avail(); i++) {
1053 for (int vlan_id = 0; vlan_id < prox_port_cfg[i].n_vlans; vlan_id++) {
1054 if (strcmp(prox_port_cfg[i].names[vlan_id], interface_name) == 0) {
1059 if (dpdk_vdev_port != -1)
1062 if (dpdk_vdev_port != -1) {
1063 plogx_info("Received netlink message on tap interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name, IP4(ip), dst_len, IP4(gw_ip));
1064 int ret1 = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)mbufs);
1065 if (unlikely(ret1 != 0)) {
1066 plog_err("Unable to allocate a mbuf for master to core communication\n");
1069 int dpdk_port = prox_port_cfg[dpdk_vdev_port].dpdk_mapping;
1070 tx_ring_route(tbase, task->internal_port_table[dpdk_port].ring, (nl_hdr->nlmsg_type == RTM_NEWROUTE), mbufs[0], ip, gw_ip, dst_len);
1072 plog_info("Received netlink message on unknown interface %s for IP "IPv4_BYTES_FMT"/%d, Gateway "IPv4_BYTES_FMT"\n", interface_name[0] ? interface_name:"", IP4(ip), dst_len, IP4(gw_ip));
1076 static void handle_arp_event(struct task_base *tbase)
1078 struct task_master *task = (struct task_master *)tbase;
1079 struct rte_mbuf *mbufs[MAX_RING_BURST];
1080 struct nlmsghdr * nl_hdr;
1081 int fd = task->arp_fds.fd;
1084 prox_rte_ether_addr mac;
1085 memset(&mac, 0, sizeof(mac));
1086 len = recv(fd, netlink_buf, sizeof(netlink_buf), 0);
1088 plog_err("Failed to recv from netlink: %d\n", errno);
1091 nl_hdr = (struct nlmsghdr *)netlink_buf;
1092 if (nl_hdr->nlmsg_flags & NLM_F_MULTI) {
1093 plog_err("Unexpected multipart netlink message\n");
1096 if ((nl_hdr->nlmsg_type != RTM_NEWNEIGH) && (nl_hdr->nlmsg_type != RTM_DELNEIGH))
1099 struct ndmsg *ndmsg = (struct ndmsg *)NLMSG_DATA(nl_hdr);
1100 int ndm_family = ndmsg->ndm_family;
1101 struct rtattr *rta = (struct rtattr *)RTM_RTA(ndmsg);
1102 int rtl = RTM_PAYLOAD(nl_hdr);
1103 for (; RTA_OK(rta, rtl); rta = RTA_NEXT(rta, rtl)) {
1104 switch (rta->rta_type) {
1106 ip = *((uint32_t *)RTA_DATA(rta));
1109 mac = *((prox_rte_ether_addr *)(uint64_t *)RTA_DATA(rta));
1115 plogx_info("Received netlink ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
1116 ret = rte_hash_lookup(task->external_ip_hash, (const void *)&ip);
1117 if (unlikely(ret < 0)) {
1118 // entry not found for this IP: we did not ask a request.
1119 // This can happen if the kernel updated the ARP table when receiving an ARP_REQUEST
1120 // We must record this, as the ARP entry is now in the kernel table
1121 if (prox_rte_is_zero_ether_addr(&mac)) {
1122 // Timeout or MAC deleted from kernel MAC table
1123 int ret = rte_hash_del_key(task->external_ip_hash, (const void *)&ip);
1124 plogx_dbg("ip "IPv4_BYTES_FMT" removed from external_ip_hash\n", IP4(ip));
1127 int ret = rte_hash_add_key(task->external_ip_hash, (const void *)&ip);
1128 if (unlikely(ret < 0)) {
1129 plogx_dbg("IP "IPv4_BYTES_FMT" not found in external_ip_hash and unable to add it\n", IP4(ip));
1132 memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr));
1133 plogx_dbg("ip "IPv4_BYTES_FMT" added in external_ip_hash with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
1137 // entry found for this IP
1138 uint16_t nb_requests = task->external_ip_table[ret].nb_requests;
1139 if (nb_requests == 0) {
1143 memcpy(&task->external_ip_table[ret].mac, &mac, sizeof(prox_rte_ether_addr));
1145 // If we receive a request from multiple task for the same IP, then we update all tasks
1146 int ret1 = rte_mempool_get(tbase->l3.arp_nd_pool, (void **)mbufs);
1147 if (unlikely(ret1 != 0)) {
1148 plog_err("Unable to allocate a mbuf for master to core communication\n");
1151 rte_mbuf_refcnt_set(mbufs[0], nb_requests);
1152 for (int i = 0; i < nb_requests; i++) {
1153 struct rte_ring *ring = task->external_ip_table[ret].rings[i];
1154 struct ether_hdr_arp *hdr = rte_pktmbuf_mtod(mbufs[0], struct ether_hdr_arp *);
1155 memcpy(&hdr->arp.data.sha, &mac, sizeof(prox_rte_ether_addr));
1156 tx_ring_ip(tbase, ring, MAC_INFO_FROM_MASTER, mbufs[0], ip);
1157 plog_dbg("MAC_INFO_FROM_MASTER ip "IPv4_BYTES_FMT" with mac "MAC_BYTES_FMT"\n", IP4(ip), MAC_BYTES(mac.addr_bytes));
1159 task->external_ip_table[ret].nb_requests = 0;
1163 static int handle_ctrl_plane_f(struct task_base *tbase, __attribute__((unused)) struct rte_mbuf **mbuf, uint16_t n_pkts)
1165 int ring_id = 0, j, ret = 0, n = 0;
1166 struct rte_mbuf *mbufs[MAX_RING_BURST];
1167 struct task_master *task = (struct task_master *)tbase;
1169 /* Handle_master works differently than other handle functions
1170 It is not handled by a DPDK dataplane core
1171 It is no thread_generic based, hence do not receive packets the same way
1174 ret = ring_deq(task->ctrl_rx_ring, mbufs);
1175 for (j = 0; j < ret; j++) {
1176 handle_message(tbase, mbufs[j], ring_id);
1178 for (int vdev_id = 0; vdev_id < task->max_vdev_id; vdev_id++) {
1179 struct vdev *vdev = &task->all_vdev[vdev_id];
1180 n = rte_eth_rx_burst(vdev->port_id, 0, mbufs, MAX_PKT_BURST);
1181 for (j = 0; j < n; j++) {
1182 tx_ring(tbase, vdev->ring, PKT_FROM_TAP, mbufs[j]);
1186 if ((task->max_vdev_id) && (poll(&task->arp_fds, 1, prox_cfg.poll_timeout) == POLL_IN)) {
1187 handle_arp_event(tbase);
1189 if (poll(&task->route_fds, 1, prox_cfg.poll_timeout) == POLL_IN) {
1190 handle_route_event(tbase);
1195 static void init_task_master(struct task_base *tbase, struct task_args *targs)
1197 if (prox_cfg.flags & DSF_CTRL_PLANE_ENABLED) {
1198 struct task_master *task = (struct task_master *)tbase;
1200 task->ctrl_rx_ring = targs->lconf->ctrl_rings_p[0];
1201 task->ctrl_tx_rings = ctrl_rings;
1202 init_ctrl_plane(tbase);
1203 handle_ctrl_plane = handle_ctrl_plane_f;
1207 static struct task_init task_init_master = {
1208 .mode_str = "master",
1209 .init = init_task_master,
1211 .flag_features = TASK_FEATURE_NEVER_DISCARDS,
1212 .size = sizeof(struct task_master)
1215 __attribute__((constructor)) static void reg_task_gen(void)
1217 reg_task(&task_init_master);