2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
20 #include <rte_table_hash.h>
21 #include <rte_ether.h>
22 #include <rte_version.h>
23 #include <rte_byteorder.h>
26 #include "prox_lua_types.h"
29 #include "task_init.h"
30 #include "task_base.h"
31 #include "prox_port_cfg.h"
34 #include "hash_utils.h"
36 #include "prox_cksum.h"
41 #include "parse_utils.h"
43 #include "prox_shared.h"
44 #include "prox_compat.h"
46 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
47 #define IPPROTO_IPIP IPPROTO_IPV4
50 struct ipv6_tun_dest {
51 struct ipv6_addr dst_addr;
52 struct ether_addr dst_mac;
55 typedef enum ipv6_tun_dir_t {
60 struct task_ipv6_tun_base {
61 struct task_base base;
62 struct ether_addr src_mac;
65 struct rte_mbuf* fake_packets[64];
66 uint16_t lookup_port_mask; // Mask used before looking up the port
67 void* lookup_table; // Fast lookup table for bindings
68 uint32_t runtime_flags;
72 struct task_ipv6_decap {
73 struct task_ipv6_tun_base base;
74 struct ether_addr dst_mac;
77 struct task_ipv6_encap {
78 struct task_ipv6_tun_base base;
80 struct ipv6_addr local_endpoint_addr;
81 uint8_t tunnel_hop_limit;
84 #define IPv6_VERSION 6
86 #define IPPROTO_IPV4 4
89 #define MAKE_KEY_FROM_FIELDS(ipv4_addr, port, port_mask) ( ((uint64_t)ipv4_addr << 16) | (port & port_mask) )
91 static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts);
92 static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts);
94 static void init_lookup_table(struct task_ipv6_tun_base* ptask, struct task_args *targ)
96 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
98 /* The lookup table is a per-core data structure to reduce the
99 memory footprint and improve cache utilization. Since
100 operations on the hash table are not safe, the data
101 structure can't be used on a per socket or on a system wide
103 ptask->lookup_table = prox_sh_find_core(targ->lconf->id, "ipv6_binding_table");
104 if (NULL == ptask->lookup_table) {
105 struct ipv6_tun_binding_table *table;
106 PROX_PANIC(!strcmp(targ->tun_bindings, ""), "No tun bindings specified\n");
107 int ret = lua_to_ip6_tun_binding(prox_lua(), GLOBAL, targ->tun_bindings, socket_id, &table);
108 PROX_PANIC(ret, "Failed to read tun_bindings config:\n %s\n", get_lua_to_errors());
110 static char hash_name[30];
111 sprintf(hash_name, "ipv6_tunnel_hash_table_%03d", targ->lconf->id);
113 struct prox_rte_table_params table_hash_params = {
116 .n_keys = (table->num_binding_entries * 4),
117 .n_buckets = (table->num_binding_entries * 2) >> 1,
118 .f_hash = (rte_table_hash_op_hash)hash_crc32,
120 .key_offset = HASH_METADATA_OFFSET(0),
123 plogx_info("IPv6 Tunnel allocating lookup table on socket %d\n", socket_id);
124 ptask->lookup_table = prox_rte_table_create(&table_hash_params, socket_id, sizeof(struct ipv6_tun_dest));
125 PROX_PANIC(ptask->lookup_table == NULL, "Error creating IPv6 Tunnel lookup table");
127 for (unsigned idx = 0; idx < table->num_binding_entries; idx++) {
129 void* entry_in_hash = NULL;
130 struct ipv6_tun_dest data;
131 struct ipv6_tun_binding_entry* entry = &table->entry[idx];
132 uint64_t key = MAKE_KEY_FROM_FIELDS(rte_cpu_to_be_32(entry->public_ipv4), entry->public_port, ptask->lookup_port_mask);
133 rte_memcpy(&data.dst_addr, &entry->endpoint_addr, sizeof(struct ipv6_addr));
134 rte_memcpy(&data.dst_mac, &entry->next_hop_mac, sizeof(struct ether_addr));
136 int ret = prox_rte_table_key8_add(ptask->lookup_table, &key, &data, &key_found, &entry_in_hash);
137 PROX_PANIC(ret, "Error adding entry (%d) to binding lookup table", idx);
138 PROX_PANIC(key_found, "key_found!!! for idx=%d\n", idx);
140 #ifdef DBG_IPV6_TUN_BINDING
141 plog_info("Bind: %x:0x%x (port_mask 0x%x) key=0x%"PRIx64"\n", entry->public_ipv4, entry->public_port, ptask->lookup_port_mask, key);
142 plog_info(" -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(entry->endpoint_addr.bytes), MAC_BYTES(entry->next_hop_mac.addr_bytes));
143 plog_info(" -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(data.dst_addr.bytes), MAC_BYTES(data.dst_mac.addr_bytes));
144 plog_info(" -> entry_in_hash=%p\n", entry_in_hash);
147 plogx_info("IPv6 Tunnel created %d lookup table entries\n", table->num_binding_entries);
149 prox_sh_add_core(targ->lconf->id, "ipv6_binding_table", ptask->lookup_table);
153 static void init_task_ipv6_tun_base(struct task_ipv6_tun_base* tun_base, struct task_args* targ)
155 memcpy(&tun_base->src_mac, find_reachable_port(targ), sizeof(tun_base->src_mac));
157 tun_base->lookup_port_mask = targ->lookup_port_mask; // Mask used before looking up the port
159 init_lookup_table(tun_base, targ);
161 for (uint32_t i = 0; i < 64; ++i) {
162 tun_base->fake_packets[i] = (struct rte_mbuf*)((uint8_t*)&tun_base->keys[i] - sizeof (struct rte_mbuf));
165 plogx_info("IPv6 Tunnel MAC="MAC_BYTES_FMT" port_mask=0x%x\n",
166 MAC_BYTES(tun_base->src_mac.addr_bytes), tun_base->lookup_port_mask);
168 struct prox_port_cfg *port = find_reachable_port(targ);
170 tun_base->offload_crc = port->requested_tx_offload & (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM);
174 static void init_task_ipv6_decap(struct task_base* tbase, struct task_args* targ)
176 struct task_ipv6_decap* tun_task = (struct task_ipv6_decap*)tbase;
177 struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)tun_task;
179 init_task_ipv6_tun_base(tun_base, targ);
180 tun_base->runtime_flags = targ->runtime_flags;
182 memcpy(&tun_task->dst_mac, &targ->edaddr, sizeof(tun_task->dst_mac));
185 static void init_task_ipv6_encap(struct task_base* tbase, struct task_args* targ)
187 struct task_ipv6_encap* tun_task = (struct task_ipv6_encap*)tbase;
188 struct task_ipv6_tun_base *tun_base = (struct task_ipv6_tun_base*)tun_task;
190 init_task_ipv6_tun_base(tun_base, targ);
192 rte_memcpy(&tun_task->local_endpoint_addr, &targ->local_ipv6, sizeof(tun_task->local_endpoint_addr));
193 tun_task->tunnel_hop_limit = targ->tunnel_hop_limit;
194 tun_base->runtime_flags = targ->runtime_flags;
197 static struct task_init task_init_ipv6_decap = {
198 .mode_str = "ipv6_decap",
199 .init = init_task_ipv6_decap,
200 .handle = handle_ipv6_decap_bulk,
201 .size = sizeof(struct task_ipv6_decap)
204 static struct task_init task_init_ipv6_encap = {
205 .mode_str = "ipv6_encap",
206 .init = init_task_ipv6_encap,
207 .handle = handle_ipv6_encap_bulk,
208 .size = sizeof(struct task_ipv6_encap)
211 __attribute__((constructor)) static void reg_task_ipv6_decap(void)
213 reg_task(&task_init_ipv6_decap);
216 __attribute__((constructor)) static void reg_task_ipv6_encap(void)
218 reg_task(&task_init_ipv6_encap);
221 static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest);
222 static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest);
224 static inline int extract_key_fields( __attribute__((unused)) struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint32_t* pAddr, uint16_t* pPort)
226 *pAddr = (dir == TUNNEL_DIR_DECAP) ? pip4->src_addr : pip4->dst_addr;
228 if (pip4->next_proto_id == IPPROTO_UDP) {
229 struct udp_hdr* pudp = (struct udp_hdr *)(pip4 + 1);
230 *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? pudp->src_port : pudp->dst_port);
232 else if (pip4->next_proto_id == IPPROTO_TCP) {
233 struct tcp_hdr* ptcp = (struct tcp_hdr *)(pip4 + 1);
234 *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? ptcp->src_port : ptcp->dst_port);
237 plog_warn("IPv6 Tunnel: IPv4 packet of unexpected type proto_id=0x%x\n", pip4->next_proto_id);
245 static inline void extract_key(struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint64_t* pkey)
247 uint32_t lookup_addr;
248 uint16_t lookup_port;
250 if (unlikely( extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port))) {
251 plog_warn("IPv6 Tunnel: Unable to extract fields from packet\n");
256 *pkey = MAKE_KEY_FROM_FIELDS(lookup_addr, lookup_port, ptask->lookup_port_mask);
259 static inline struct ipv4_hdr* get_ipv4_decap(struct rte_mbuf *mbuf)
261 struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
262 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
263 struct ipv4_hdr* pip4 = (struct ipv4_hdr*) (pip6 + 1); // TODO - Skip Option headers
268 static inline struct ipv4_hdr* get_ipv4_encap(struct rte_mbuf *mbuf)
270 struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
271 struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1);
276 static inline void extract_key_decap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey)
278 extract_key(ptask, get_ipv4_decap(mbuf), TUNNEL_DIR_DECAP, pkey);
281 static inline void extract_key_decap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts)
283 for (uint16_t j = 0; j < n_pkts; ++j) {
284 extract_key_decap(ptask, mbufs[j], &ptask->keys[j]);
288 static inline void extract_key_encap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey)
290 extract_key(ptask, get_ipv4_encap(mbuf), TUNNEL_DIR_ENCAP, pkey);
293 static inline void extract_key_encap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts)
295 for (uint16_t j = 0; j < n_pkts; ++j) {
296 extract_key_encap(ptask, mbufs[j], &ptask->keys[j]);
300 __attribute__((cold)) static void handle_error(struct task_ipv6_tun_base* ptask, struct rte_mbuf* mbuf, ipv6_tun_dir_t dir)
302 uint32_t lookup_addr;
303 uint16_t lookup_port;
306 struct ipv4_hdr* pip4 = (dir == TUNNEL_DIR_DECAP) ? get_ipv4_decap(mbuf) : get_ipv4_encap(mbuf);
307 extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port);
308 extract_key(ptask, pip4, dir, &key);
310 plog_warn("IPv6 Tunnel (%s) lookup failed for "IPv4_BYTES_FMT":%d [key=0x%"PRIx64"]\n",
311 (dir == TUNNEL_DIR_DECAP) ? "decap" : "encap",
312 IPv4_BYTES(((unsigned char*)&lookup_addr)), lookup_port, key);
315 static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts)
317 struct task_ipv6_decap* task = (struct task_ipv6_decap *)tbase;
318 uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
319 struct ipv6_tun_dest* entries[64];
320 uint8_t out[MAX_PKT_BURST];
321 uint64_t lookup_hit_mask;
324 prefetch_pkts(mbufs, n_pkts);
326 // Lookup to verify packets are valid for their respective tunnels (their sending lwB4)
327 extract_key_decap_bulk(&task->base, mbufs, n_pkts);
328 prox_rte_table_key8_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries);
330 if (likely(lookup_hit_mask == pkts_mask)) {
331 for (uint16_t j = 0; j < n_pkts; ++j) {
332 out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]);
336 for (uint16_t j = 0; j < n_pkts; ++j) {
337 if (unlikely(!((lookup_hit_mask >> j) & 0x1))) {
338 handle_error(&task->base, mbufs[j], TUNNEL_DIR_DECAP);
339 out[j] = OUT_DISCARD;
342 out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]);
346 return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out);
349 static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts)
351 struct task_ipv6_encap* task = (struct task_ipv6_encap *)tbase;
352 uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
353 struct ipv6_tun_dest* entries[64];
354 uint64_t lookup_hit_mask;
355 uint8_t out[MAX_PKT_BURST];
358 prefetch_first(mbufs, n_pkts);
360 extract_key_encap_bulk(&task->base, mbufs, n_pkts);
361 prox_rte_table_key8_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries);
363 if (likely(lookup_hit_mask == pkts_mask)) {
364 for (uint16_t j = 0; j < n_pkts; ++j) {
365 out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]);
369 for (uint16_t j = 0; j < n_pkts; ++j) {
370 if (unlikely(!((lookup_hit_mask >> j) & 0x1))) {
371 handle_error(&task->base, mbufs[j], TUNNEL_DIR_ENCAP);
372 out[j] = OUT_DISCARD;
375 out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]);
379 return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out);
382 static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest)
384 struct ether_hdr* peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *);
385 struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)ptask;
386 struct ipv4_hdr* pip4 = NULL;
388 if (unlikely(peth->ether_type != ETYPE_IPv6)) {
389 plog_warn("Received non IPv6 packet on ipv6 tunnel port\n");
394 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
395 int ipv6_hdr_len = sizeof(struct ipv6_hdr);
397 // TODO - Skip over any IPv6 Extension Header:
398 // If pip6->next_header is in (0, 43, 44, 50, 51, 60, 135), skip ahead pip->hdr_ext_len
399 // bytes and repeat. Increase ipv6_hdr_len with as much, each time.
401 if (unlikely(pip6->proto != IPPROTO_IPIP)) {
402 plog_warn("Received non IPv4 content within IPv6 tunnel packet\n");
407 // Discard IPv6 encapsulation
408 rte_pktmbuf_adj(rx_mbuf, ipv6_hdr_len);
409 peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *);
410 pip4 = (struct ipv4_hdr *)(peth + 1);
412 // Restore Ethernet header
413 ether_addr_copy(&ptask->base.src_mac, &peth->s_addr);
414 ether_addr_copy(&ptask->dst_mac, &peth->d_addr);
415 peth->ether_type = ETYPE_IPv4;
417 #ifdef GEN_DECAP_IPV6_TO_IPV4_CKSUM
418 // generate an IP checksum for ipv4 packet
419 if (tun_base->runtime_flags & TASK_TX_CRC) {
420 prox_ip_cksum(rx_mbuf, pip4, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), ptask->base.offload_crc);
427 static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest)
429 //plog_info("Found tunnel endpoint:"IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(tun_dest->dst_addr), MAC_BYTES(tun_dest->dst_mac.addr_bytes));
431 struct ether_hdr* peth = (struct ether_hdr *)(rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *));
432 struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1);
433 uint16_t ipv4_length = rte_be_to_cpu_16(pip4->total_length);
434 struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)ptask;
436 if (unlikely((pip4->version_ihl >> 4) != 4)) {
437 plog_warn("Received non IPv4 packet at ipv6 tunnel input\n");
442 if (pip4->time_to_live) {
443 pip4->time_to_live--;
446 plog_info("TTL = 0 => Dropping\n");
449 pip4->hdr_checksum = 0;
451 // Remove padding if any (we don't want to encapsulate garbage at end of IPv4 packet)
452 int padding = rte_pktmbuf_pkt_len(rx_mbuf) - (ipv4_length + sizeof(struct ether_hdr));
453 if (unlikely(padding > 0)) {
454 rte_pktmbuf_trim(rx_mbuf, padding);
458 const int extra_space = sizeof(struct ipv6_hdr);
459 peth = (struct ether_hdr *)rte_pktmbuf_prepend(rx_mbuf, extra_space);
462 ether_addr_copy(&ptask->base.src_mac, &peth->s_addr);
463 ether_addr_copy(&tun_dest->dst_mac, &peth->d_addr);
464 peth->ether_type = ETYPE_IPv6;
466 // Set up IPv6 Header
467 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
468 pip6->vtc_flow = rte_cpu_to_be_32(IPv6_VERSION << 28);
469 pip6->proto = IPPROTO_IPIP;
470 pip6->payload_len = rte_cpu_to_be_16(ipv4_length);
471 pip6->hop_limits = ptask->tunnel_hop_limit;
472 rte_memcpy(pip6->dst_addr, &tun_dest->dst_addr, sizeof(pip6->dst_addr));
473 rte_memcpy(pip6->src_addr, &ptask->local_endpoint_addr, sizeof(pip6->src_addr));
475 if (tun_base->runtime_flags & TASK_TX_CRC) {
476 // We modified the TTL in the IPv4 header, hence have to recompute the IPv4 checksum
477 #define TUNNEL_L2_LEN (sizeof(struct ether_hdr) + sizeof(struct ipv6_hdr))
478 prox_ip_cksum(rx_mbuf, pip4, TUNNEL_L2_LEN, sizeof(struct ipv4_hdr), ptask->base.offload_crc);