2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
20 #include <rte_table_hash.h>
21 #include <rte_ether.h>
22 #include <rte_version.h>
23 #include <rte_byteorder.h>
26 #include "prox_lua_types.h"
29 #include "task_init.h"
30 #include "task_base.h"
31 #include "prox_port_cfg.h"
34 #include "hash_utils.h"
36 #include "prox_cksum.h"
41 #include "parse_utils.h"
43 #include "prox_shared.h"
45 #if RTE_VERSION < RTE_VERSION_NUM(1,8,0,0)
46 #define IPPROTO_IPIP IPPROTO_IPV4
49 struct ipv6_tun_dest {
50 struct ipv6_addr dst_addr;
51 struct ether_addr dst_mac;
54 typedef enum ipv6_tun_dir_t {
59 struct task_ipv6_tun_base {
60 struct task_base base;
61 struct ether_addr src_mac;
64 struct rte_mbuf* fake_packets[64];
65 uint16_t lookup_port_mask; // Mask used before looking up the port
66 void* lookup_table; // Fast lookup table for bindings
67 uint32_t runtime_flags;
71 struct task_ipv6_decap {
72 struct task_ipv6_tun_base base;
73 struct ether_addr dst_mac;
76 struct task_ipv6_encap {
77 struct task_ipv6_tun_base base;
79 struct ipv6_addr local_endpoint_addr;
80 uint8_t tunnel_hop_limit;
83 #define IPv6_VERSION 6
85 #define IPPROTO_IPV4 4
88 #define MAKE_KEY_FROM_FIELDS(ipv4_addr, port, port_mask) ( ((uint64_t)ipv4_addr << 16) | (port & port_mask) )
90 static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts);
91 static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** rx_mbuf, const uint16_t n_pkts);
93 static void init_lookup_table(struct task_ipv6_tun_base* ptask, struct task_args *targ)
95 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
97 /* The lookup table is a per-core data structure to reduce the
98 memory footprint and improve cache utilization. Since
99 operations on the hash table are not safe, the data
100 structure can't be used on a per socket or on a system wide
102 ptask->lookup_table = prox_sh_find_core(targ->lconf->id, "ipv6_binding_table");
103 if (NULL == ptask->lookup_table) {
104 struct ipv6_tun_binding_table *table;
105 PROX_PANIC(!strcmp(targ->tun_bindings, ""), "No tun bindings specified\n");
106 int ret = lua_to_ip6_tun_binding(prox_lua(), GLOBAL, targ->tun_bindings, socket_id, &table);
107 PROX_PANIC(ret, "Failed to read tun_bindings config:\n %s\n", get_lua_to_errors());
109 struct rte_table_hash_key8_ext_params table_hash_params = {
110 .n_entries = (table->num_binding_entries * 4),
111 .n_entries_ext = (table->num_binding_entries * 2) >> 1,
112 .f_hash = hash_crc32,
114 .signature_offset = HASH_METADATA_OFFSET(8), // Ignored for dosig tables
115 .key_offset = HASH_METADATA_OFFSET(0),
117 plogx_info("IPv6 Tunnel allocating lookup table on socket %d\n", socket_id);
118 ptask->lookup_table = rte_table_hash_key8_ext_dosig_ops.
119 f_create(&table_hash_params, socket_id, sizeof(struct ipv6_tun_dest));
120 PROX_PANIC(ptask->lookup_table == NULL, "Error creating IPv6 Tunnel lookup table");
122 for (unsigned idx = 0; idx < table->num_binding_entries; idx++) {
124 void* entry_in_hash = NULL;
125 struct ipv6_tun_dest data;
126 struct ipv6_tun_binding_entry* entry = &table->entry[idx];
127 uint64_t key = MAKE_KEY_FROM_FIELDS(rte_cpu_to_be_32(entry->public_ipv4), entry->public_port, ptask->lookup_port_mask);
128 rte_memcpy(&data.dst_addr, &entry->endpoint_addr, sizeof(struct ipv6_addr));
129 rte_memcpy(&data.dst_mac, &entry->next_hop_mac, sizeof(struct ether_addr));
131 int ret = rte_table_hash_key8_ext_dosig_ops.f_add(ptask->lookup_table, &key, &data, &key_found, &entry_in_hash);
132 PROX_PANIC(ret, "Error adding entry (%d) to binding lookup table", idx);
133 PROX_PANIC(key_found, "key_found!!! for idx=%d\n", idx);
135 #ifdef DBG_IPV6_TUN_BINDING
136 plog_info("Bind: %x:0x%x (port_mask 0x%x) key=0x%"PRIx64"\n", entry->public_ipv4, entry->public_port, ptask->lookup_port_mask, key);
137 plog_info(" -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(entry->endpoint_addr.bytes), MAC_BYTES(entry->next_hop_mac.addr_bytes));
138 plog_info(" -> "IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(data.dst_addr.bytes), MAC_BYTES(data.dst_mac.addr_bytes));
139 plog_info(" -> entry_in_hash=%p\n", entry_in_hash);
142 plogx_info("IPv6 Tunnel created %d lookup table entries\n", table->num_binding_entries);
144 prox_sh_add_core(targ->lconf->id, "ipv6_binding_table", ptask->lookup_table);
148 static void init_task_ipv6_tun_base(struct task_ipv6_tun_base* tun_base, struct task_args* targ)
150 memcpy(&tun_base->src_mac, find_reachable_port(targ), sizeof(tun_base->src_mac));
152 tun_base->lookup_port_mask = targ->lookup_port_mask; // Mask used before looking up the port
154 init_lookup_table(tun_base, targ);
156 for (uint32_t i = 0; i < 64; ++i) {
157 tun_base->fake_packets[i] = (struct rte_mbuf*)((uint8_t*)&tun_base->keys[i] - sizeof (struct rte_mbuf));
160 plogx_info("IPv6 Tunnel MAC="MAC_BYTES_FMT" port_mask=0x%x\n",
161 MAC_BYTES(tun_base->src_mac.addr_bytes), tun_base->lookup_port_mask);
163 struct prox_port_cfg *port = find_reachable_port(targ);
165 tun_base->offload_crc = port->capabilities.tx_offload_cksum;
169 static void init_task_ipv6_decap(struct task_base* tbase, struct task_args* targ)
171 struct task_ipv6_decap* tun_task = (struct task_ipv6_decap*)tbase;
172 struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)tun_task;
174 init_task_ipv6_tun_base(tun_base, targ);
175 tun_base->runtime_flags = targ->runtime_flags;
177 memcpy(&tun_task->dst_mac, &targ->edaddr, sizeof(tun_task->dst_mac));
180 static void init_task_ipv6_encap(struct task_base* tbase, struct task_args* targ)
182 struct task_ipv6_encap* tun_task = (struct task_ipv6_encap*)tbase;
183 struct task_ipv6_tun_base *tun_base = (struct task_ipv6_tun_base*)tun_task;
185 init_task_ipv6_tun_base(tun_base, targ);
187 rte_memcpy(&tun_task->local_endpoint_addr, &targ->local_ipv6, sizeof(tun_task->local_endpoint_addr));
188 tun_task->tunnel_hop_limit = targ->tunnel_hop_limit;
189 tun_base->runtime_flags = targ->runtime_flags;
192 static struct task_init task_init_ipv6_decap = {
193 .mode_str = "ipv6_decap",
194 .init = init_task_ipv6_decap,
195 .handle = handle_ipv6_decap_bulk,
196 .size = sizeof(struct task_ipv6_decap)
199 static struct task_init task_init_ipv6_encap = {
200 .mode_str = "ipv6_encap",
201 .init = init_task_ipv6_encap,
202 .handle = handle_ipv6_encap_bulk,
203 .size = sizeof(struct task_ipv6_encap)
206 __attribute__((constructor)) static void reg_task_ipv6_decap(void)
208 reg_task(&task_init_ipv6_decap);
211 __attribute__((constructor)) static void reg_task_ipv6_encap(void)
213 reg_task(&task_init_ipv6_encap);
216 static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest);
217 static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, struct ipv6_tun_dest* tun_dest);
219 static inline int extract_key_fields( __attribute__((unused)) struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint32_t* pAddr, uint16_t* pPort)
221 *pAddr = (dir == TUNNEL_DIR_DECAP) ? pip4->src_addr : pip4->dst_addr;
223 if (pip4->next_proto_id == IPPROTO_UDP) {
224 struct udp_hdr* pudp = (struct udp_hdr *)(pip4 + 1);
225 *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? pudp->src_port : pudp->dst_port);
227 else if (pip4->next_proto_id == IPPROTO_TCP) {
228 struct tcp_hdr* ptcp = (struct tcp_hdr *)(pip4 + 1);
229 *pPort = rte_be_to_cpu_16((dir == TUNNEL_DIR_DECAP) ? ptcp->src_port : ptcp->dst_port);
232 plog_warn("IPv6 Tunnel: IPv4 packet of unexpected type proto_id=0x%x\n", pip4->next_proto_id);
240 static inline void extract_key(struct task_ipv6_tun_base* ptask, struct ipv4_hdr* pip4, ipv6_tun_dir_t dir, uint64_t* pkey)
242 uint32_t lookup_addr;
243 uint16_t lookup_port;
245 if (unlikely( extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port))) {
246 plog_warn("IPv6 Tunnel: Unable to extract fields from packet\n");
251 *pkey = MAKE_KEY_FROM_FIELDS(lookup_addr, lookup_port, ptask->lookup_port_mask);
254 static inline struct ipv4_hdr* get_ipv4_decap(struct rte_mbuf *mbuf)
256 struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
257 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
258 struct ipv4_hdr* pip4 = (struct ipv4_hdr*) (pip6 + 1); // TODO - Skip Option headers
263 static inline struct ipv4_hdr* get_ipv4_encap(struct rte_mbuf *mbuf)
265 struct ether_hdr* peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
266 struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1);
271 static inline void extract_key_decap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey)
273 extract_key(ptask, get_ipv4_decap(mbuf), TUNNEL_DIR_DECAP, pkey);
276 static inline void extract_key_decap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts)
278 for (uint16_t j = 0; j < n_pkts; ++j) {
279 extract_key_decap(ptask, mbufs[j], &ptask->keys[j]);
283 static inline void extract_key_encap(struct task_ipv6_tun_base* ptask, struct rte_mbuf *mbuf, uint64_t* pkey)
285 extract_key(ptask, get_ipv4_encap(mbuf), TUNNEL_DIR_ENCAP, pkey);
288 static inline void extract_key_encap_bulk(struct task_ipv6_tun_base* ptask, struct rte_mbuf **mbufs, uint16_t n_pkts)
290 for (uint16_t j = 0; j < n_pkts; ++j) {
291 extract_key_encap(ptask, mbufs[j], &ptask->keys[j]);
295 __attribute__((cold)) static void handle_error(struct task_ipv6_tun_base* ptask, struct rte_mbuf* mbuf, ipv6_tun_dir_t dir)
297 uint32_t lookup_addr;
298 uint16_t lookup_port;
301 struct ipv4_hdr* pip4 = (dir == TUNNEL_DIR_DECAP) ? get_ipv4_decap(mbuf) : get_ipv4_encap(mbuf);
302 extract_key_fields(ptask, pip4, dir, &lookup_addr, &lookup_port);
303 extract_key(ptask, pip4, dir, &key);
305 plog_warn("IPv6 Tunnel (%s) lookup failed for "IPv4_BYTES_FMT":%d [key=0x%"PRIx64"]\n",
306 (dir == TUNNEL_DIR_DECAP) ? "decap" : "encap",
307 IPv4_BYTES(((unsigned char*)&lookup_addr)), lookup_port, key);
310 static int handle_ipv6_decap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts)
312 struct task_ipv6_decap* task = (struct task_ipv6_decap *)tbase;
313 uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
314 struct ipv6_tun_dest* entries[64];
315 uint8_t out[MAX_PKT_BURST];
316 uint64_t lookup_hit_mask;
319 prefetch_pkts(mbufs, n_pkts);
321 // Lookup to verify packets are valid for their respective tunnels (their sending lwB4)
322 extract_key_decap_bulk(&task->base, mbufs, n_pkts);
323 rte_table_hash_key8_ext_dosig_ops.f_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries);
325 if (likely(lookup_hit_mask == pkts_mask)) {
326 for (uint16_t j = 0; j < n_pkts; ++j) {
327 out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]);
331 for (uint16_t j = 0; j < n_pkts; ++j) {
332 if (unlikely(!((lookup_hit_mask >> j) & 0x1))) {
333 handle_error(&task->base, mbufs[j], TUNNEL_DIR_DECAP);
334 out[j] = OUT_DISCARD;
337 out[j] = handle_ipv6_decap(task, mbufs[j], entries[j]);
341 return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out);
344 static int handle_ipv6_encap_bulk(struct task_base* tbase, struct rte_mbuf** mbufs, const uint16_t n_pkts)
346 struct task_ipv6_encap* task = (struct task_ipv6_encap *)tbase;
347 uint64_t pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t);
348 struct ipv6_tun_dest* entries[64];
349 uint64_t lookup_hit_mask;
350 uint8_t out[MAX_PKT_BURST];
353 prefetch_first(mbufs, n_pkts);
355 extract_key_encap_bulk(&task->base, mbufs, n_pkts);
356 rte_table_hash_key8_ext_dosig_ops.f_lookup(task->base.lookup_table, task->base.fake_packets, pkts_mask, &lookup_hit_mask, (void**)entries);
358 if (likely(lookup_hit_mask == pkts_mask)) {
359 for (uint16_t j = 0; j < n_pkts; ++j) {
360 out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]);
364 for (uint16_t j = 0; j < n_pkts; ++j) {
365 if (unlikely(!((lookup_hit_mask >> j) & 0x1))) {
366 handle_error(&task->base, mbufs[j], TUNNEL_DIR_ENCAP);
367 out[j] = OUT_DISCARD;
370 out[j] = handle_ipv6_encap(task, mbufs[j], entries[j]);
374 return task->base.base.tx_pkt(tbase, mbufs, n_pkts, out);
377 static inline uint8_t handle_ipv6_decap(struct task_ipv6_decap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest)
379 struct ether_hdr* peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *);
380 struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)ptask;
381 struct ipv4_hdr* pip4 = NULL;
383 if (unlikely(peth->ether_type != ETYPE_IPv6)) {
384 plog_warn("Received non IPv6 packet on ipv6 tunnel port\n");
389 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
390 int ipv6_hdr_len = sizeof(struct ipv6_hdr);
392 // TODO - Skip over any IPv6 Extension Header:
393 // If pip6->next_header is in (0, 43, 44, 50, 51, 60, 135), skip ahead pip->hdr_ext_len
394 // bytes and repeat. Increase ipv6_hdr_len with as much, each time.
396 if (unlikely(pip6->proto != IPPROTO_IPIP)) {
397 plog_warn("Received non IPv4 content within IPv6 tunnel packet\n");
402 // Discard IPv6 encapsulation
403 rte_pktmbuf_adj(rx_mbuf, ipv6_hdr_len);
404 peth = rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *);
405 pip4 = (struct ipv4_hdr *)(peth + 1);
407 // Restore Ethernet header
408 ether_addr_copy(&ptask->base.src_mac, &peth->s_addr);
409 ether_addr_copy(&ptask->dst_mac, &peth->d_addr);
410 peth->ether_type = ETYPE_IPv4;
412 #ifdef GEN_DECAP_IPV6_TO_IPV4_CKSUM
413 // generate an IP checksum for ipv4 packet
414 if (tun_base->runtime_flags & TASK_TX_CRC) {
415 prox_ip_cksum(rx_mbuf, pip4, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), ptask->base.offload_crc);
422 static inline uint8_t handle_ipv6_encap(struct task_ipv6_encap* ptask, struct rte_mbuf* rx_mbuf, __attribute__((unused)) struct ipv6_tun_dest* tun_dest)
424 //plog_info("Found tunnel endpoint:"IPv6_BYTES_FMT" ("MAC_BYTES_FMT")\n", IPv6_BYTES(tun_dest->dst_addr), MAC_BYTES(tun_dest->dst_mac.addr_bytes));
426 struct ether_hdr* peth = (struct ether_hdr *)(rte_pktmbuf_mtod(rx_mbuf, struct ether_hdr *));
427 struct ipv4_hdr* pip4 = (struct ipv4_hdr *)(peth + 1);
428 uint16_t ipv4_length = rte_be_to_cpu_16(pip4->total_length);
429 struct task_ipv6_tun_base* tun_base = (struct task_ipv6_tun_base*)ptask;
431 if (unlikely((pip4->version_ihl >> 4) != 4)) {
432 plog_warn("Received non IPv4 packet at ipv6 tunnel input\n");
437 if (pip4->time_to_live) {
438 pip4->time_to_live--;
441 plog_info("TTL = 0 => Dropping\n");
444 pip4->hdr_checksum = 0;
446 // Remove padding if any (we don't want to encapsulate garbage at end of IPv4 packet)
447 int padding = rte_pktmbuf_pkt_len(rx_mbuf) - (ipv4_length + sizeof(struct ether_hdr));
448 if (unlikely(padding > 0)) {
449 rte_pktmbuf_trim(rx_mbuf, padding);
453 const int extra_space = sizeof(struct ipv6_hdr);
454 peth = (struct ether_hdr *)rte_pktmbuf_prepend(rx_mbuf, extra_space);
457 ether_addr_copy(&ptask->base.src_mac, &peth->s_addr);
458 ether_addr_copy(&tun_dest->dst_mac, &peth->d_addr);
459 peth->ether_type = ETYPE_IPv6;
461 // Set up IPv6 Header
462 struct ipv6_hdr* pip6 = (struct ipv6_hdr *)(peth + 1);
463 pip6->vtc_flow = rte_cpu_to_be_32(IPv6_VERSION << 28);
464 pip6->proto = IPPROTO_IPIP;
465 pip6->payload_len = rte_cpu_to_be_16(ipv4_length);
466 pip6->hop_limits = ptask->tunnel_hop_limit;
467 rte_memcpy(pip6->dst_addr, &tun_dest->dst_addr, sizeof(pip6->dst_addr));
468 rte_memcpy(pip6->src_addr, &ptask->local_endpoint_addr, sizeof(pip6->src_addr));
470 if (tun_base->runtime_flags & TASK_TX_CRC) {
471 // We modified the TTL in the IPv4 header, hence have to recompute the IPv4 checksum
472 #define TUNNEL_L2_LEN (sizeof(struct ether_hdr) + sizeof(struct ipv6_hdr))
473 prox_ip_cksum(rx_mbuf, pip4, TUNNEL_L2_LEN, sizeof(struct ipv4_hdr), ptask->base.offload_crc);