2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
20 #include <rte_table_hash.h>
21 #include <rte_ether.h>
22 #include <rte_version.h>
23 #include <rte_byteorder.h>
26 #include "prox_lua_types.h"
29 #include "task_init.h"
30 #include "task_base.h"
31 #include "prox_port_cfg.h"
34 #include "hash_utils.h"
36 #include "prox_cksum.h"
41 #include "parse_utils.h"
43 #include "prox_shared.h"
45 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
46 #define IPPROTO_IPIP IPPROTO_IPV4
49 struct ipv6_tun_dest {
50 struct ipv6_addr dst_addr;
51 struct ether_addr dst_mac;
54 typedef enum ipv6_tun_dir_t {
59 struct task_ipv6_tun_base {
60 struct task_base base;
61 struct ether_addr src_mac;
64 struct rte_mbuf* fake_packets[64];
65 uint16_t lookup_port_mask; // Mask used before looking up the port
66 void* lookup_table; // Fast lookup table for bindings
67 uint32_t runtime_flags;
71 struct task_ipv6_decap {
72 struct task_ipv6_tun_base base;
73 struct ether_addr dst_mac;
76 struct task_ipv6_encap {
77 struct task_ipv6_tun_base base;
79 struct ipv6_addr local_endpoint_addr;
80 uint8_t tunnel_hop_limit;
83 #define IPv6_VERSION 6
85 #define IPPROTO_IPV4 4
88 #define MAKE_KEY_FROM_FIELDS(ipv4_addr, port, port_mask) ( ((uint64_t)ipv4_addr << 16) | (port & port_mask) )
90 static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts);
91 static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts);
93 static void init_lookup_table(struct task_ipv6_tun_base* ptask, struct task_args *targ)
95 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
97 /* The lookup table is a per-core data structure to reduce the
98 memory footprint and improve cache utilization. Since
99 operations on the hash table are not safe, the data
100 structure can't be used on a per socket or on a system wide
102 ptask->lookup_table = prox_sh_find_core(targ->lconf->id, "ipv6_binding_table");
103 if (NULL == ptask->lookup_table) {
104 struct ipv6_tun_binding_table *table;
105 PROX_PANIC(!strcmp(targ->tun_bindings, ""), "No tun bindings specified\n");
106 int ret = lua_to_ip6_tun_binding(prox_lua(), GLOBAL, targ->tun_bindings, socket_id, &table);
107 PROX_PANIC(ret, "Failed to read tun_bindings config:\n %s\n", get_lua_to_errors());
109 struct rte_table_hash_key8_ext_params table_hash_params = {
110 .n_entries = (table->num_binding_entries * 4),
111 .n_entries_ext = (table->num_binding_entries * 2) >> 1,
112 .f_hash = hash_crc32,
114 .signature_offset = HASH_METADATA_OFFSET(8), // Ignored for dosig tables
115 .key_offset = HASH_METADATA_OFFSET(0),
117 plogx_info("IPv6 Tunnel allocating lookup table on socket %d\n", socket_id);
118 ptask->lookup_table = rte_table_hash_key8_ext_dosig_ops.
119 f_create(&table_hash_params, socket_id, sizeof(struct ipv6_tun_dest));
120 PROX_PANIC(ptask->lookup_table == NULL, "Error creating IPv6 Tunnel lookup table");
122 for (unsigned idx = 0; idx < table->num_binding_entries; idx++) {
124 void* entry_in_hash = NULL;
125 struct ipv6_tun_dest data;
126 struct ipv6_tun_binding_entry* entry = &table->entry[idx];
127 uint64_t key = MAKE_KEY_FROM_FIELDS(rte_cpu_to_be_32(entry->public_ipv4), entry->public_port, ptask->lookup_port_mask);
128 rte_memcpy(&data.dst_addr, &entry->endpoint_addr, sizeof(struct ipv6_addr));
129 rte_memcpy(&data.dst_mac, &entry->next_hop_mac, sizeof(struct ether_addr));
131 int ret = rte_table_hash_key8_ext_dosig_ops.f_add(ptask->lookup_table, &key, &data, &key_found, &entry_in_hash);
132 PROX_PANIC(ret, "Error adding entry (%d) to binding lookup table", idx);
133 PROX_PANIC(key_found, "key_found!!! for idx=%d\n", idx);
135 #ifdef DBG_IPV6_TUN_BINDING
136 plog_info("Bind: %x:0x%x (port_mask 0x%x) key=0x%"PRIx64"\n", entry->public_ipv4, entry->public_port, ptask->lookup_port_mask, key);
137 plog_info(" -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(entry->endpoint_addr.bytes), MAC_BYTES(entry->next_hop_mac.addr_bytes));
138 plog_info(" -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(data.dst_addr.bytes), MAC_BYTES(data.dst_mac.addr_bytes));
139 plog_info(" -> entry_in_hash=%p\n", entry_in_hash);
142 plogx_info("IPv6 Tunnel created %d lookup table entries\n", table->num_binding_entries);
144 prox_sh_add_core(targ->lconf->id, "ipv6_binding_table", ptask->lookup_table);
148 static void init_task_ipv6_tun_base(struct task_ipv6_tun_base* tun_base, struct task_args* targ)
150 memcpy(&tun_base->src_mac, find_reachable_port(targ), sizeof(tun_base->src_mac));
152 tun_base->lookup_port_mask = targ->lookup_port_mask; // Mask used before looking up the port
154 init_lookup_table(tun_base, targ);
156 for (uint32_t i = 0; i < 64; ++i) {
157 tun_base->fake_packets[i] = (struct rte_mbuf*)((uint8_t*)&tun_base->keys[i] - sizeof (struct rte_mbuf));
160 plogx_info("IPv6 Tunnel MAC="MAC_BYTES_FMT" port_mask=0x%x\n",
161 MAC_BYTES(tun_base->src_mac.addr_bytes), tun_base->lookup_port_mask);
163 struct prox_port_cfg *port = find_reachable_port(targ);
165 tun_base->offload_crc = port->capabilities.tx_offload_cksum;
169 static void init_task_ipv6_decap(struct task_base* tbase, struct task_args* targ)
171 struct task_ipv6_decap* tun_task = (struct task_ipv6_decap*)tbase;
172 struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)tun_task;
174 init_task_ipv6_tun_base(tun_base, targ);
175 tun_base->runtime_flags = targ->runtime_flags;
177 memcpy(&tun_task->dst_mac, &targ->edaddr, sizeof(tun_task->dst_mac));
180 static void init_task_ipv6_encap(struct task_base* tbase, struct task_args* targ)
182 struct task_ipv6_encap* tun_task = (struct task_ipv6_encap*)tbase;
183 struct task_ipv6_tun_base *tun_base = (struct task_ipv6_tun_base*)tun_task;
185 init_task_ipv6_tun_base(tun_base, targ);
187 rte_memcpy(&tun_task->local_endpoint_addr, &targ->local_ipv6, sizeof(tun_task->local_endpoint_addr));
188 tun_task->tunnel_hop_limit = targ->tunnel_hop_limit;
189 tun_base->runtime_flags = targ->runtime_flags;
192 static struct task_init task_init_ipv6_decap = {
193 .mode_str = "ipv6_decap",
194 .init = init_task_ipv6_decap,
195 .handle = handle_ipv6_decap_bulk,
196 .size = sizeof(struct task_ipv6_decap)
199 static struct task_init task_init_ipv6_encap = {
200 .mode_str = "ipv6_encap",
201 .init = init_task_ipv6_encap,
202 .handle = handle_ipv6_encap_bulk,
203 .size = sizeof(struct task_ipv6_encap)
206 __attribute__((constructor)) static void reg_task_ipv6_decap(void)
208 reg_task(&task_init_ipv6_decap);
211 __attribute__((constructor)) static void reg_task_ipv6_encap(void)
213 reg_task(&task_init_ipv6_encap);
216 static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest);
217 static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest);
219 static inline int extract_key_fields( __attribute__((unused)) struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint32_t* pAddr, uint16_t* pPort)
221 *pAddr = (dir == TUNNEL_DIR_DECAP) ? pip4->src_addr : pip4->dst_addr;
223 if (pip4->next_proto_id == IPPROTO_UDP) {
224 struct udp_hdr* pudp = (struct udp_hdr *)(pip4 + 1);
225 *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? pudp->src_port : pudp->dst_port);
227 else if (pip4->next_proto_id == IPPROTO_TCP) {
228 struct tcp_hdr* ptcp = (struct tcp_hdr *)(pip4 + 1);
229 *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? ptcp->src_port : ptcp->dst_port);
232 plog_warn("IPv6 Tunnel: IPv4 packet of unexpected type proto_id=0x%x\n", pip4->next_proto_id);
240 static inline void extract_key(struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint64_t* pkey)
242 uint32_t lookup_addr;
243 uint16_t lookup_port;
245 if (unlikely( extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port))) {
246 plog_warn("IPv6 Tunnel: Unable to extract fields from packet\n");
251 *pkey = MAKE_KEY_FROM_FIELDS(lookup_addr, lookup_port, ptask->lookup_port_mask);
254 static inline struct ipv4_hdr* get_ipv4_decap(struct rte_mbuf *mbuf)
256 struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
257 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
258 struct ipv4_hdr* pip4 = (struct ipv4_hdr*) (pip6 + 1); // TODO - Skip Option headers
263 static inline struct ipv4_hdr* get_ipv4_encap(struct rte_mbuf *mbuf)
265 struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
266 struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1);
271 static inline void extract_key_decap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey)
273 extract_key(ptask, get_ipv4_decap(mbuf), TUNNEL_DIR_DECAP, pkey);
276 static inline void extract_key_decap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts)
278 for (uint16_t j = 0; j < n_pkts; ++j) {
279 extract_key_decap(ptask, mbufs[j], &ptask->keys[j]);
283 static inline void extract_key_encap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey)
285 extract_key(ptask, get_ipv4_encap(mbuf), TUNNEL_DIR_ENCAP, pkey);
288 static inline void extract_key_encap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts)
290 for (uint16_t j = 0; j < n_pkts; ++j) {
291 extract_key_encap(ptask, mbufs[j], &ptask->keys[j]);
295 __attribute__((cold)) static void handle_error(struct task_ipv6_tun_base* ptask, struct rte_mbuf* mbuf, ipv6_tun_dir_t dir)
297 uint32_t lookup_addr;
298 uint16_t lookup_port;
301 struct ipv4_hdr* pip4 = (dir == TUNNEL_DIR_DECAP) ? get_ipv4_decap(mbuf) : get_ipv4_encap(mbuf);
302 extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port);
303 extract_key(ptask, pip4, dir, &key);
305 plog_warn("IPv6 Tunnel (%s) lookup failed for "IPv4_BYTES_FMT":%d [key=0x%"PRIx64"]\n",
306 (dir == TUNNEL_DIR_DECAP) ? "decap" : "encap",
307 IPv4_BYTES(((unsigned char*)&lookup_addr)), lookup_port, key);
310 static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts)
312 struct task_ipv6_decap* task = (struct task_ipv6_decap *)tbase;
313 uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
314 struct ipv6_tun_dest* entries[64];
315 uint8_t out[MAX_PKT_BURST];
316 uint64_t lookup_hit_mask;
319 prefetch_pkts(mbufs, n_pkts);
321 // Lookup to verify packets are valid for their respective tunnels (their sending lwB4)
322 extract_key_decap_bulk(&task->base, mbufs, n_pkts);
323 rte_table_hash_key8_ext_dosig_ops.f_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries);
325 if (likely(lookup_hit_mask == pkts_mask)) {
326 for (uint16_t j = 0; j < n_pkts; ++j) {
327 out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]);
331 for (uint16_t j = 0; j < n_pkts; ++j) {
332 if (unlikely(!((lookup_hit_mask >> j) & 0x1))) {
333 handle_error(&task->base, mbufs[j], TUNNEL_DIR_DECAP);
334 out[j] = OUT_DISCARD;
337 out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]);
341 return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out);
344 static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts)
346 struct task_ipv6_encap* task = (struct task_ipv6_encap *)tbase;
347 uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
348 struct ipv6_tun_dest* entries[64];
349 uint64_t lookup_hit_mask;
350 uint8_t out[MAX_PKT_BURST];
353 prefetch_first(mbufs, n_pkts);
355 extract_key_encap_bulk(&task->base, mbufs, n_pkts);
356 rte_table_hash_key8_ext_dosig_ops.f_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries);
358 if (likely(lookup_hit_mask == pkts_mask)) {
359 for (uint16_t j = 0; j < n_pkts; ++j) {
360 out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]);
364 for (uint16_t j = 0; j < n_pkts; ++j) {
365 if (unlikely(!((lookup_hit_mask >> j) & 0x1))) {
366 handle_error(&task->base, mbufs[j], TUNNEL_DIR_ENCAP);
367 out[j] = OUT_DISCARD;
370 out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]);
374 return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out);
377 static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest)
379 struct ether_hdr* peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *);
381 if (unlikely(peth->ether_type != ETYPE_IPv6)) {
382 plog_warn("Received non IPv6 packet on ipv6 tunnel port\n");
387 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
388 int ipv6_hdr_len = sizeof(struct ipv6_hdr);
390 // TODO - Skip over any IPv6 Extension Header:
391 // If pip6->next_header is in (0, 43, 44, 50, 51, 60, 135), skip ahead pip->hdr_ext_len
392 // bytes and repeat. Increase ipv6_hdr_len with as much, each time.
394 if (unlikely(pip6->proto != IPPROTO_IPIP)) {
395 plog_warn("Received non IPv4 content within IPv6 tunnel packet\n");
400 // Discard IPv6 encapsulation
401 rte_pktmbuf_adj(rx_mbuf, ipv6_hdr_len);
402 peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *);
404 // Restore Ethernet header
405 ether_addr_copy(&ptask->base.src_mac, &peth->s_addr);
406 ether_addr_copy(&ptask->dst_mac, &peth->d_addr);
407 peth->ether_type = ETYPE_IPv4;
412 static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest)
414 //plog_info("Found tunnel endpoint:"IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(tun_dest->dst_addr), MAC_BYTES(tun_dest->dst_mac.addr_bytes));
416 struct ether_hdr* peth = (struct ether_hdr *)(rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *));
417 struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1);
418 uint16_t ipv4_length = rte_be_to_cpu_16(pip4->total_length);
419 struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)ptask;
421 if (unlikely((pip4->version_ihl >> 4) != 4)) {
422 plog_warn("Received non IPv4 packet at ipv6 tunnel input\n");
427 if (pip4->time_to_live) {
428 pip4->time_to_live--;
431 plog_info("TTL = 0 => Dropping\n");
434 pip4->hdr_checksum = 0;
436 // Remove padding if any (we don't want to encapsulate garbage at end of IPv4 packet)
437 int padding = rte_pktmbuf_pkt_len(rx_mbuf) - (ipv4_length + sizeof(struct ether_hdr));
438 if (unlikely(padding > 0)) {
439 rte_pktmbuf_trim(rx_mbuf, padding);
443 const int extra_space = sizeof(struct ipv6_hdr);
444 peth = (struct ether_hdr *)rte_pktmbuf_prepend(rx_mbuf, extra_space);
447 ether_addr_copy(&ptask->base.src_mac, &peth->s_addr);
448 ether_addr_copy(&tun_dest->dst_mac, &peth->d_addr);
449 peth->ether_type = ETYPE_IPv6;
451 // Set up IPv6 Header
452 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
453 pip6->vtc_flow = rte_cpu_to_be_32(IPv6_VERSION << 28);
454 pip6->proto = IPPROTO_IPIP;
455 pip6->payload_len = rte_cpu_to_be_16(ipv4_length);
456 pip6->hop_limits = ptask->tunnel_hop_limit;
457 rte_memcpy(pip6->dst_addr, &tun_dest->dst_addr, sizeof(pip6->dst_addr));
458 rte_memcpy(pip6->src_addr, &ptask->local_endpoint_addr, sizeof(pip6->src_addr));
460 if (tun_base->runtime_flags & TASK_TX_CRC) {
461 // We modified the TTL in the IPv4 header, hence have to recompute the IPv4 checksum
462 #define TUNNEL_L2_LEN (sizeof(struct ether_hdr) + sizeof(struct ipv6_hdr))
463 prox_ip_cksum(rx_mbuf, pip4, TUNNEL_L2_LEN, sizeof(struct ipv4_hdr), ptask->base.offload_crc);