2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_hash_crc.h>
20 #include <rte_ether.h>
22 #include <rte_version.h>
23 #include <rte_byteorder.h>
26 #include "prox_lua_types.h"
28 #include "prox_malloc.h"
29 #include "prox_cksum.h"
34 #include "task_init.h"
35 #include "task_base.h"
38 #include "prox_port_cfg.h"
39 #include "hash_entry_types.h"
40 #include "prox_shared.h"
41 #include "handle_cgnat.h"
43 #define ALL_32_BITS 0xffffffff
44 #define BIT_16_TO_31 0xffff0000
45 #define BIT_8_TO_15 0x0000ff00
46 #define BIT_0_TO_15 0x0000ffff
48 #define IP4(x) x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, x >> 24
53 } __attribute__((packed));
55 struct private_flow_entry {
58 uint32_t private_ip_idx;
65 } __attribute__((packed));
70 uint32_t private_ip_idx;
74 struct public_ip_config_info {
76 uint32_t max_port_count;
77 uint32_t port_free_count;
81 struct private_ip_info {
82 uint64_t mac_aging_time;
84 uint32_t public_ip_idx;
85 struct rte_ether *private_mac;
90 struct task_base base;
91 struct rte_hash *private_ip_hash;
92 struct rte_hash *private_ip_port_hash;
93 struct rte_hash *public_ip_port_hash;
94 struct private_flow_entry *private_flow_entries;
95 struct public_entry *public_entries;
96 struct next_hop *next_hops;
97 struct lcore_cfg *lconf;
98 struct rte_lpm *ipv4_lpm;
99 uint32_t total_free_port_count;
100 uint32_t number_free_rules;
102 uint32_t public_ip_count;
104 struct public_ip_config_info *public_ip_config_info;
105 struct private_ip_info *private_ip_info;
106 uint8_t runtime_flags;
108 uint64_t src_mac[PROX_MAX_PORTS];
109 uint64_t src_mac_from_dpdk_port[PROX_MAX_PORTS];
110 volatile int dump_public_hash;
111 volatile int dump_private_hash;
113 static __m128i proto_ipsrc_portsrc_mask;
114 static __m128i proto_ipdst_portdst_mask;
115 struct pkt_eth_ipv4 {
116 struct ether_hdr ether_hdr;
117 struct ipv4_hdr ipv4_hdr;
118 struct udp_hdr udp_hdr;
119 } __attribute__((packed));
121 void task_cgnat_dump_public_hash(struct task_nat *task)
123 task->dump_public_hash = 1;
126 void task_cgnat_dump_private_hash(struct task_nat *task)
128 task->dump_private_hash = 1;
131 static void set_l2(struct task_nat *task, struct rte_mbuf *mbuf, uint8_t nh_idx)
133 struct ether_hdr *peth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
134 *((uint64_t *)(&peth->d_addr)) = task->next_hops[nh_idx].mac_port_8bytes;
135 *((uint64_t *)(&peth->s_addr)) = task->src_mac[task->next_hops[nh_idx].mac_port.out_idx];
138 static uint8_t route_ipv4(struct task_nat *task, struct rte_mbuf *mbuf)
140 struct pkt_eth_ipv4 *pkt = rte_pktmbuf_mtod(mbuf, struct pkt_eth_ipv4 *);
141 struct ipv4_hdr *ip = &pkt->ipv4_hdr;
142 struct ether_hdr *peth_out;
146 switch(ip->next_proto_id) {
149 dst_ip = ip->dst_addr;
152 /* Routing for other protocols is not implemented */
153 plogx_info("Routing nit implemented for this protocol\n");
157 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,1)
158 uint32_t next_hop_index;
160 uint8_t next_hop_index;
162 if (unlikely(rte_lpm_lookup(task->ipv4_lpm, rte_bswap32(dst_ip), &next_hop_index) != 0)) {
163 uint8_t* dst_ipp = (uint8_t*)&dst_ip;
164 plog_warn("lpm_lookup failed for ip %d.%d.%d.%d: rc = %d\n",
165 dst_ipp[0], dst_ipp[1], dst_ipp[2], dst_ipp[3], -ENOENT);
169 tx_port = task->next_hops[next_hop_index].mac_port.out_idx;
170 set_l2(task, mbuf, next_hop_index);
174 static int release_ip(struct task_nat *task, uint32_t *ip_addr, int public_ip_idx)
179 static int release_port(struct task_nat *task, uint32_t public_ip_idx, uint16_t udp_src_port)
181 struct public_ip_config_info *public_ip_config_info = &task->public_ip_config_info[public_ip_idx];
182 if (public_ip_config_info->max_port_count > public_ip_config_info->port_free_count) {
183 public_ip_config_info->port_list[public_ip_config_info->port_free_count] = udp_src_port;
184 public_ip_config_info->port_free_count++;
185 task->total_free_port_count ++;
186 plogx_dbg("Now %d free ports for IP %d.%d.%d.%d\n", public_ip_config_info->port_free_count, IP4(public_ip_config_info->public_ip));
188 plogx_err("Unable to release port for ip index %d: max_port_count = %d, port_free_count = %d", public_ip_idx, public_ip_config_info->max_port_count, public_ip_config_info->port_free_count);
194 static int get_new_ip(struct task_nat *task, uint32_t *ip_addr)
196 struct public_ip_config_info *ip_info;
197 if (++task->last_ip >= task->public_ip_count)
199 for (uint32_t ip_idx = task->last_ip; ip_idx < task->public_ip_count; ip_idx++) {
200 ip_info = &task->public_ip_config_info[ip_idx];
201 plogx_dbg("Checking public IP index %d\n", ip_idx);
202 if ((ip_info->port_free_count) > 0) {
203 plogx_dbg("Public IP index %d (IP %d.%d.%d.%d) has %d free ports\n", ip_idx, IP4(ip_info->public_ip), ip_info->port_free_count);
204 *ip_addr = ip_info->public_ip;
205 task->last_ip = ip_idx;
209 for (uint32_t ip_idx = 0; ip_idx < task->last_ip; ip_idx++) {
210 ip_info = &task->public_ip_config_info[ip_idx];
211 if ((ip_info->port_free_count) > 0) {
212 plogx_dbg("Public IP index %d (IP %d.%d.%d.%d) has %d free ports\n", ip_idx, IP4(ip_info->public_ip), ip_info->port_free_count);
213 *ip_addr = ip_info->public_ip;
214 task->last_ip = ip_idx;
221 static int get_new_port(struct task_nat *task, uint32_t ip_idx, uint16_t *udp_src_port)
224 struct public_ip_config_info *public_ip_config_info = &task->public_ip_config_info[ip_idx];
225 if (public_ip_config_info->port_free_count > 0) {
226 public_ip_config_info->port_free_count--;
227 *udp_src_port = public_ip_config_info->port_list[public_ip_config_info->port_free_count];
228 task->total_free_port_count --;
229 plogx_info("Now %d free ports for IP %d.%d.%d.%d\n", public_ip_config_info->port_free_count, IP4(public_ip_config_info->public_ip));
235 static int delete_port_entry(struct task_nat *task, uint8_t proto, uint32_t private_ip, uint16_t private_port, uint32_t public_ip, uint16_t public_port, int public_ip_idx)
238 struct private_key private_key;
239 struct public_key public_key;
240 // private_key.proto = proto;
241 private_key.ip_addr = private_ip;
242 private_key.l4_port = private_port;
243 ret = rte_hash_del_key(task->private_ip_port_hash, (const void *)&private_key);
245 plogx_info("Unable delete key ip %d.%d.%d.%d / port %x in private ip_port hash\n", IP4(private_ip), private_port);
248 plogx_dbg("Deleted ip %d.%d.%d.%d / port %x from private ip_port hash\n", IP4(private_ip), private_port);
250 public_key.ip_addr = public_ip;
251 public_key.l4_port = public_port;
252 ret = rte_hash_del_key(task->public_ip_port_hash, (const void *)&public_key);
254 plogx_info("Unable delete key ip %d.%d.%d.%d / port %x in public ip_port hash\n", IP4(public_ip), public_port);
257 plogx_dbg("Deleted ip %d.%d.%d.%d / port %x (hash index %d) from public ip_port hash\n", IP4(public_ip), public_port, ret);
258 release_port(task, public_ip_idx, public_port);
263 static int add_new_port_entry(struct task_nat *task, uint8_t proto, int public_ip_idx, int private_ip_idx, uint32_t private_src_ip, uint16_t private_udp_port, struct rte_mbuf *mbuf, uint64_t tsc, uint16_t *port)
265 struct private_key private_key;
266 struct public_key public_key;
267 uint32_t ip = task->public_ip_config_info[public_ip_idx].public_ip;
269 if (get_new_port(task, public_ip_idx, port) < 0) {
270 plogx_info("Unable to find new port for IP %x\n", private_src_ip);
273 // private_key.proto = proto;
274 private_key.ip_addr = private_src_ip;
275 private_key.l4_port = private_udp_port;
276 ret = rte_hash_add_key(task->private_ip_port_hash, (const void *)&private_key);
278 plogx_info("Unable add ip %d.%d.%d.%d / port %x in private ip_port hash\n", IP4(private_src_ip), private_udp_port);
279 release_port(task, public_ip_idx, *port);
281 } else if (task->private_flow_entries[ret].ip_addr) {
282 plogx_dbg("Race condition properly handled: port alrerady added\n");
283 release_port(task, public_ip_idx, *port);
286 plogx_dbg("Added ip %d.%d.%d.%d / port %x in private ip_port hash => %d.%d.%d.%d / %d - index = %d\n", IP4(private_src_ip), private_udp_port, IP4(ip), *port, ret);
288 task->private_flow_entries[ret].ip_addr = ip;
289 task->private_flow_entries[ret].l4_port = *port;
290 task->private_flow_entries[ret].flow_time = tsc;
291 task->private_flow_entries[ret].private_ip_idx = private_ip_idx;
293 public_key.ip_addr = ip;
294 public_key.l4_port = *port;
295 plogx_dbg("Adding key ip %d.%d.%d.%d / port %x in public ip_port hash\n", IP4(ip), *port);
296 ret = rte_hash_add_key(task->public_ip_port_hash, (const void *)&public_key);
298 plogx_info("Unable add ip %x / port %x in public ip_port hash\n", ip, *port);
299 // TODO: remove from private_ip_port_hash
300 release_port(task, public_ip_idx, *port);
303 plogx_dbg("Added ip %d.%d.%d.%d / port %x in public ip_port hash\n", IP4(ip), *port);
305 task->public_entries[ret].ip_addr = private_src_ip;
306 task->public_entries[ret].l4_port = private_udp_port;
307 task->public_entries[ret].dpdk_port = mbuf->port;
308 task->public_entries[ret].private_ip_idx = private_ip_idx;
312 static int handle_nat_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
314 struct task_nat *task = (struct task_nat *)tbase;
315 uint8_t out[MAX_PKT_BURST];
317 uint32_t *ip_addr, public_ip, private_ip;
318 uint16_t *udp_src_port, port, private_port, public_port;
319 struct pkt_eth_ipv4 *pkt[MAX_PKT_BURST];
320 int ret, private_ip_idx, public_ip_idx = -1, port_idx;
323 uint64_t tsc = rte_rdtsc();
324 void *keys[MAX_PKT_BURST];
325 int32_t positions[MAX_PKT_BURST];
326 int map[MAX_PKT_BURST] = {0};
328 if (unlikely(task->dump_public_hash)) {
329 const struct public_key *next_key;
335 while ((ret = rte_hash_iterate(task->public_ip_port_hash, (const void **)&next_key, &next_data, &iter)) >= 0) {
336 plogx_info("Public entry %d (index %d): ip = %d.%d.%d.%d, port = %d ===> private entry: ip = %d.%d.%d.%d, port = %d\n", i++, ret, IP4(next_key->ip_addr), next_key->l4_port, IP4(task->public_entries[ret].ip_addr),task->public_entries[ret].l4_port);
338 task->dump_public_hash = 0;
340 if (unlikely(task->dump_private_hash)) {
341 const struct private_key *next_key;
347 while ((ret = rte_hash_iterate(task->private_ip_port_hash, (const void **)&next_key, &next_data, &iter)) >= 0) {
348 plogx_info("Private entry %d (index %d): ip = %d.%d.%d.%d, port = %d ===> public entry: ip = %d.%d.%d.%d, port = %d\n", i++, ret, IP4(next_key->ip_addr), next_key->l4_port, IP4(task->private_flow_entries[ret].ip_addr),task->private_flow_entries[ret].l4_port);
350 task->dump_private_hash = 0;
353 for (j = 0; j < n_pkts; ++j) {
356 for (j = 0; j < n_pkts; ++j) {
357 pkt[j] = rte_pktmbuf_mtod(mbufs[j], struct pkt_eth_ipv4 *);
361 struct private_key key[MAX_PKT_BURST];
362 for (j = 0; j < n_pkts; ++j) {
363 /* Currently, only support eth/ipv4 packets */
364 if (pkt[j]->ether_hdr.ether_type != ETYPE_IPv4) {
365 plogx_info("Currently, only support eth/ipv4 packets\n");
366 out[j] = OUT_DISCARD;
367 keys[j] = (void *)NULL;
370 key[j].ip_addr = pkt[j]->ipv4_hdr.src_addr;
371 key[j].l4_port = pkt[j]->udp_hdr.src_port;
374 ret = rte_hash_lookup_bulk(task->private_ip_port_hash, (const void **)&keys, n_pkts, positions);
375 if (unlikely(ret < 0)) {
376 plogx_info("lookup_bulk failed in private_ip_port_hash\n");
379 int n_new_mapping = 0;
380 for (j = 0; j < n_pkts; ++j) {
381 port_idx = positions[j];
382 if (unlikely(port_idx < 0)) {
383 plogx_dbg("ip %d.%d.%d.%d / port %x not found in private ip/port hash\n", IP4(pkt[j]->ipv4_hdr.src_addr), pkt[j]->udp_hdr.src_port);
384 map[n_new_mapping] = j;
385 keys[n_new_mapping++] = (void *)&(pkt[j]->ipv4_hdr.src_addr);
387 ip_addr = &(pkt[j]->ipv4_hdr.src_addr);
388 udp_src_port = &(pkt[j]->udp_hdr.src_port);
389 plogx_dbg("ip/port %d.%d.%d.%d / %x found in private ip/port hash\n", IP4(pkt[j]->ipv4_hdr.src_addr), pkt[j]->udp_hdr.src_port);
390 *ip_addr = task->private_flow_entries[port_idx].ip_addr;
391 *udp_src_port = task->private_flow_entries[port_idx].l4_port;
392 uint64_t flow_time = task->private_flow_entries[port_idx].flow_time;
393 if (flow_time + tsc_hz < tsc) {
394 task->private_flow_entries[port_idx].flow_time = tsc;
396 private_ip_idx = task->private_flow_entries[port_idx].private_ip_idx;
397 if (task->private_ip_info[private_ip_idx].mac_aging_time + tsc_hz < tsc)
398 task->private_ip_info[private_ip_idx].mac_aging_time = tsc;
399 prox_ip_udp_cksum(mbufs[j], &pkt[j]->ipv4_hdr, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), task->offload_crc);
400 out[j] = route_ipv4(task, mbufs[j]);
405 // Find whether at least IP is already known...
406 ret = rte_hash_lookup_bulk(task->private_ip_hash, (const void **)&keys, n_new_mapping, positions);
407 if (unlikely(ret < 0)) {
408 plogx_info("lookup_bulk failed for private_ip_hash\n");
409 for (int k = 0; k < n_new_mapping; ++k) {
411 out[j] = OUT_DISCARD;
415 for (int k = 0; k < n_new_mapping; ++k) {
416 private_ip_idx = positions[k];
418 ip_addr = &(pkt[j]->ipv4_hdr.src_addr);
419 proto = pkt[j]->ipv4_hdr.next_proto_id;
420 udp_src_port = &(pkt[j]->udp_hdr.src_port);
421 int new_ip_entry = 0;
423 if (unlikely(private_ip_idx < 0)) {
424 private_ip = *ip_addr;
425 private_port = *udp_src_port;
426 plogx_dbg("Did not find private ip %d.%d.%d.%d in ip hash table, looking for new public ip\n", IP4(*ip_addr));
427 // IP not found, need to get a new IP/port mapping
428 public_ip_idx = get_new_ip(task, &public_ip);
429 if (public_ip_idx < 0) {
430 plogx_info("Unable to find new ip/port\n");
431 out[j] = OUT_DISCARD;
434 plogx_dbg("found new public ip %d.%d.%d.%d at public IP index %d\n", IP4(public_ip), public_ip_idx);
436 private_ip_idx = rte_hash_add_key(task->private_ip_hash, (const void *)ip_addr);
437 // The key might be added multiple time - in case the same key was present in the bulk_lookup multiple times
438 // As such this is not an issue - the add_key will returns the index as for a new key
439 // This scenario should not happen often in real time use case
440 // as a for a new flow (flow renewal), probably only one packet will be sent (e.g. TCP SYN)
441 if (private_ip_idx < 0) {
442 release_ip(task, &public_ip, public_ip_idx);
443 plogx_info("Unable add ip %d.%d.%d.%d in private ip hash\n", IP4(*ip_addr));
444 out[j] = OUT_DISCARD;
446 } else if (task->private_ip_info[private_ip_idx].public_ip) {
447 plogx_info("race condition properly handled : ip %d.%d.%d.%d already in private ip hash\n", IP4(*ip_addr));
448 release_ip(task, &public_ip, public_ip_idx);
449 public_ip = task->private_ip_info[private_ip_idx].public_ip;
450 public_ip_idx = task->private_ip_info[private_ip_idx].public_ip_idx;
452 plogx_dbg("Added ip %d.%d.%d.%d in private ip hash\n", IP4(*ip_addr));
453 rte_memcpy(&task->private_ip_info[private_ip_idx].private_mac, ((uint8_t *)pkt) + 6, 6);
454 task->private_ip_info[private_ip_idx].public_ip = public_ip;
455 task->private_ip_info[private_ip_idx].static_entry = 0;
456 task->private_ip_info[private_ip_idx].public_ip_idx = public_ip_idx;
460 public_ip = task->private_ip_info[private_ip_idx].public_ip;
461 public_ip_idx = task->private_ip_info[private_ip_idx].public_ip_idx;
463 port_idx = add_new_port_entry(task, proto, public_ip_idx, private_ip_idx, *ip_addr, *udp_src_port, mbufs[j], tsc, &public_port);
465 // TODO: delete IP in ip_hash
466 if ((new_ip_entry) && (task->last_ip != 0)) {
467 release_ip(task, &public_ip, public_ip_idx);
469 } else if (new_ip_entry) {
470 release_ip(task, &public_ip, public_ip_idx);
471 task->last_ip = task->public_ip_count-1;
473 plogx_info("Failed to add new port entry\n");
474 out[j] = OUT_DISCARD;
477 private_ip = *ip_addr;
478 private_port = *udp_src_port;
479 plogx_info("Added new ip/port: private ip/port = %d.%d.%d.%d/%x public ip/port = %d.%d.%d.%d/%x, index = %d\n", IP4(private_ip), private_port, IP4(public_ip), public_port, port_idx);
481 // task->private_flow_entries[port_idx].ip_addr = task->private_ip_info[private_ip_idx].public_ip;
482 plogx_info("Added new port: private ip/port = %d.%d.%d.%d/%x, public ip/port = %d.%d.%d.%d/%x\n", IP4(private_ip), private_port, IP4(task->private_ip_info[private_ip_idx].public_ip), public_port);
483 *ip_addr = public_ip ;
484 *udp_src_port = public_port;
485 uint64_t flow_time = task->private_flow_entries[port_idx].flow_time;
486 if (flow_time + tsc_hz < tsc) {
487 task->private_flow_entries[port_idx].flow_time = tsc;
489 if (task->private_ip_info[private_ip_idx].mac_aging_time + tsc_hz < tsc)
490 task->private_ip_info[private_ip_idx].mac_aging_time = tsc;
491 prox_ip_udp_cksum(mbufs[j], &pkt[j]->ipv4_hdr, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), task->offload_crc);
492 // TODO: if route fails while just added new key in table, should we delete the key from the table?
493 out[j] = route_ipv4(task, mbufs[j]);
494 if (out[j] && new_entry) {
495 delete_port_entry(task, proto, private_ip, private_port, *ip_addr, *udp_src_port, public_ip_idx);
496 plogx_info("Deleted port: private ip/port = %d.%d.%d.%d/%x, public ip/port = %d.%d.%d.%d/%x\n", IP4(private_ip), private_port, IP4(*ip_addr), *udp_src_port);
500 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
502 struct public_key public_key[MAX_PKT_BURST];
503 for (j = 0; j < n_pkts; ++j) {
504 /* Currently, only support eth/ipv4 packets */
505 if (pkt[j]->ether_hdr.ether_type != ETYPE_IPv4) {
506 plogx_info("Currently, only support eth/ipv4 packets\n");
507 out[j] = OUT_DISCARD;
508 keys[j] = (void *)NULL;
511 public_key[j].ip_addr = pkt[j]->ipv4_hdr.dst_addr;
512 public_key[j].l4_port = pkt[j]->udp_hdr.dst_port;
513 keys[j] = &public_key[j];
515 ret = rte_hash_lookup_bulk(task->public_ip_port_hash, (const void **)&keys, n_pkts, positions);
517 plogx_err("Failed lookup bulk public_ip_port_hash\n");
520 for (j = 0; j < n_pkts; ++j) {
521 port_idx = positions[j];
522 ip_addr = &(pkt[j]->ipv4_hdr.dst_addr);
523 udp_src_port = &(pkt[j]->udp_hdr.dst_port);
525 plogx_err("Failed to find ip/port %d.%d.%d.%d/%x in public_ip_port_hash\n", IP4(*ip_addr), *udp_src_port);
526 out[j] = OUT_DISCARD;
528 plogx_dbg("Found ip/port %d.%d.%d.%d/%x in public_ip_port_hash\n", IP4(*ip_addr), *udp_src_port);
529 *ip_addr = task->public_entries[port_idx].ip_addr;
530 *udp_src_port = task->public_entries[port_idx].l4_port;
531 private_ip_idx = task->public_entries[port_idx].private_ip_idx;
532 plogx_dbg("Found private IP info for ip %d.%d.%d.%d\n", IP4(*ip_addr));
533 rte_memcpy(((uint8_t *)(pkt[j])) + 0, &task->private_ip_info[private_ip_idx].private_mac, 6);
534 rte_memcpy(((uint8_t *)(pkt[j])) + 6, &task->src_mac_from_dpdk_port[task->public_entries[port_idx].dpdk_port], 6);
535 out[j] = task->public_entries[port_idx].dpdk_port;
537 prox_ip_udp_cksum(mbufs[j], &pkt[j]->ipv4_hdr, sizeof(struct ether_hdr), sizeof(struct ipv4_hdr), task->offload_crc);
539 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
544 static int lua_to_hash_nat(struct task_args *targ, struct lua_State *L, enum lua_place from, const char *name, uint8_t socket)
546 struct rte_hash *tmp_priv_ip_hash, *tmp_priv_hash, *tmp_pub_hash;
547 struct private_flow_entry *tmp_priv_flow_entries;
548 struct public_entry *tmp_pub_entries;
549 uint32_t n_entries = 0;;
550 uint32_t ip_from, ip_to;
551 uint16_t port_from, port_to;
552 int ret, idx, pop, pop2, pop3, n_static_entries = 0;
553 uint32_t dst_ip1, dst_ip2;
554 struct val_range dst_port;
555 struct public_ip_config_info *ip_info;
556 struct public_ip_config_info *tmp_public_ip_config_info;
558 if ((pop = lua_getfrom(L, from, name)) < 0)
561 if (!lua_istable(L, -1)) {
562 plogx_err("Can't read cgnat since data is not a table\n");
566 struct tmp_public_ip {
572 struct tmp_static_ip {
576 struct tmp_static_ip_port {
580 uint16_t private_port;
581 uint16_t public_port;
585 uint32_t n_public_groups = 0;
586 uint32_t n_public_ip = 0;
587 uint32_t n_static_ip = 0;
588 uint32_t n_static_ip_port = 0;
590 struct tmp_public_ip *tmp_public_ip = NULL;
591 struct tmp_static_ip *tmp_static_ip = NULL;
592 struct tmp_static_ip_port *tmp_static_ip_port = NULL;
594 // Look for Dynamic entries configuration
595 plogx_info("Reading dynamic NAT table\n");
596 if ((pop2 = lua_getfrom(L, TABLE, "dynamic")) < 0) {
597 plogx_info("No dynamic table found\n");
599 uint64_t n_ip, n_port;
600 if (!lua_istable(L, -1)) {
601 plogx_err("Can't read cgnat since data is not a table\n");
605 n_public_groups = lua_tointeger(L, -1);
606 plogx_info("%d groups of public IP\n", n_public_groups);
607 tmp_public_ip = (struct tmp_public_ip *)malloc(n_public_groups * sizeof(struct tmp_public_ip));
608 PROX_PANIC(tmp_public_ip == NULL, "Failed to allocated tmp_public_ip\n");
612 while (lua_next(L, -2)) {
613 if (lua_to_ip(L, TABLE, "public_ip_range_start", &dst_ip1) ||
614 lua_to_ip(L, TABLE, "public_ip_range_stop", &dst_ip2) ||
615 lua_to_val_range(L, TABLE, "public_port", &dst_port))
617 PROX_PANIC(dst_ip2 < dst_ip1, "public_ip_range error: %d.%d.%d.%d < %d.%d.%d.%d\n", (dst_ip2 >> 24), (dst_ip2 >> 16) & 0xFF, (dst_ip2 >> 8) & 0xFF, dst_ip2 & 0xFF, dst_ip1 >> 24, (dst_ip1 >> 16) & 0xFF, (dst_ip1 >> 8) & 0xFF, dst_ip1 & 0xFF);
618 PROX_PANIC(dst_port.end < dst_port.beg, "public_port error: %d < %d\n", dst_port.end, dst_port.beg);
619 n_ip = dst_ip2 - dst_ip1 + 1;
620 n_port = dst_port.end - dst_port.beg + 1;
622 plogx_info("Found IP from %d.%d.%d.%d to %d.%d.%d.%d and port from %d to %d\n", dst_ip1 >> 24, (dst_ip1 >> 16) & 0xFF, (dst_ip1 >> 8) & 0xFF, dst_ip1 & 0xFF, (dst_ip2 >> 24), (dst_ip2 >> 16) & 0xFF, (dst_ip2 >> 8) & 0xFF, dst_ip2 & 0xFF, dst_port.beg, dst_port.end);
623 tmp_public_ip[i].ip_beg = dst_ip1;
624 tmp_public_ip[i].ip_end = dst_ip2;
625 tmp_public_ip[i].port_beg = dst_port.beg;
626 tmp_public_ip[i++].port_end = dst_port.end;
627 n_entries += n_ip * n_port;
634 if ((pop2 = lua_getfrom(L, TABLE, "static_ip")) < 0) {
635 plogx_info("No static ip table found\n");
637 if (!lua_istable(L, -1)) {
638 plogx_err("Can't read cgnat since data is not a table\n");
643 n_static_ip = lua_tointeger(L, -1);
644 plogx_info("%d entries in static ip table\n", n_static_ip);
646 tmp_static_ip = (struct tmp_static_ip *)malloc(n_static_ip * sizeof(struct tmp_static_ip));
647 PROX_PANIC(tmp_static_ip == NULL, "Failed to allocated tmp_static_ip\n");
649 while (lua_next(L, -2)) {
650 if (lua_to_ip(L, TABLE, "src_ip", &ip_from) ||
651 lua_to_ip(L, TABLE, "dst_ip", &ip_to))
653 ip_from = rte_bswap32(ip_from);
654 ip_to = rte_bswap32(ip_to);
655 tmp_static_ip[i].private_ip = ip_from;
656 tmp_static_ip[i++].public_ip = ip_to;
657 for (unsigned int j = 0; j < n_public_groups; j++) {
658 if ((tmp_public_ip[j].ip_beg <= ip_to) && (ip_to <= tmp_public_ip[j].ip_end)) {
659 PROX_PANIC(1, "list of static ip mapping overlap with list of dynamic IP => not supported yet\n");
669 if ((pop2 = lua_getfrom(L, TABLE, "static_ip_port")) < 0) {
670 plogx_info("No static table found\n");
672 if (!lua_istable(L, -1)) {
673 plogx_err("Can't read cgnat since data is not a table\n");
678 n_static_ip_port = lua_tointeger(L, -1);
679 plogx_info("%d entries in static table\n", n_static_ip_port);
681 tmp_static_ip_port = (struct tmp_static_ip_port *)malloc(n_static_ip_port * sizeof(struct tmp_static_ip_port));
682 PROX_PANIC(tmp_static_ip_port == NULL, "Failed to allocated tmp_static_ip_port\n");
685 while (lua_next(L, -2)) {
686 if (lua_to_ip(L, TABLE, "src_ip", &ip_from) ||
687 lua_to_ip(L, TABLE, "dst_ip", &ip_to) ||
688 lua_to_port(L, TABLE, "src_port", &port_from) ||
689 lua_to_port(L, TABLE, "dst_port", &port_to))
692 ip_from = rte_bswap32(ip_from);
693 ip_to = rte_bswap32(ip_to);
694 port_from = rte_bswap16(port_from);
695 port_to = rte_bswap16(port_to);
696 tmp_static_ip_port[i].private_ip = ip_from;
697 tmp_static_ip_port[i].public_ip = ip_to;
698 tmp_static_ip_port[i].private_port = port_from;
699 tmp_static_ip_port[i].public_port = port_to;
700 tmp_static_ip_port[i].n_ports = 1;
701 for (unsigned int j = 0; j < n_public_groups; j++) {
702 if ((tmp_public_ip[j].ip_beg <= rte_bswap32(ip_to)) && (rte_bswap32(ip_to) <= tmp_public_ip[j].ip_end)) {
703 tmp_static_ip_port[i].ip_found = j + 11;
704 PROX_PANIC(1, "list of static ip/port mapping overlap with list of dynamic IP => not supported yet\n");
707 for (unsigned int j = 0; j < n_static_ip; j++) {
708 if ((tmp_static_ip[j].public_ip == ip_to) ) {
709 tmp_static_ip_port[i].ip_found = j + 1;
710 PROX_PANIC(1, "list of static ip/port mapping overlap with list of static ip => not supported yet\n");
713 for (unsigned int j = 0; j <= i; j++) {
714 if (ip_to == tmp_static_ip_port[j].public_ip) {
715 tmp_static_ip_port[i].ip_found = j + 1;
716 tmp_static_ip_port[j].n_ports++;
717 tmp_static_ip_port[i].n_ports = 0;
721 if (!tmp_static_ip_port[i].ip_found) {
731 tmp_public_ip_config_info = (struct public_ip_config_info *)prox_zmalloc(n_public_ip * sizeof(struct public_ip_config_info), socket);
732 PROX_PANIC(tmp_public_ip_config_info == NULL, "Failed to allocate PUBLIC IP INFO\n");
733 plogx_info("%d PUBLIC IP INFO allocated\n", n_public_ip);
735 struct private_ip_info *tmp_priv_ip_info = (struct private_ip_info *)prox_zmalloc(4 * n_public_ip * sizeof(struct public_ip_config_info), socket);
736 PROX_PANIC(tmp_priv_ip_info == NULL, "Failed to allocate PRIVATE IP INFO\n");
737 plogx_info("%d PRIVATE IP INFO allocated\n", 4 * n_public_ip);
739 uint32_t ip_free_count = 0;
740 for (i = 0; i < n_public_groups; i++) {
741 for (uint32_t ip = tmp_public_ip[i].ip_beg; ip <= tmp_public_ip[i].ip_end; ip++) {
742 ip_info = &tmp_public_ip_config_info[ip_free_count];
743 ip_info->public_ip = rte_bswap32(ip);
744 ip_info->port_list = (uint16_t *)prox_zmalloc((dst_port.end - dst_port.beg) * sizeof(uint16_t), socket);
745 PROX_PANIC(ip_info->port_list == NULL, "Failed to allocate list of ports for ip %x\n", ip);
746 for (uint32_t port = tmp_public_ip[i].port_beg; port <= tmp_public_ip[i].port_end; port++) {
747 ip_info->port_list[ip_info->port_free_count] = rte_bswap16(port);
748 ip_info->port_free_count++;
750 ip_info->max_port_count = ip_info->port_free_count;
751 plogx_dbg("Added IP %d.%d.%d.%d with ports from %x to %x at index %x\n", IP4(ip_info->public_ip), tmp_public_ip[i].port_beg, tmp_public_ip[i].port_end, ip_free_count);
755 uint32_t public_ip_count = ip_free_count;
756 for (i = 0; i < n_static_ip; i++) {
757 ip_info = &tmp_public_ip_config_info[ip_free_count];
758 ip_info->public_ip = tmp_static_ip[i].public_ip;
759 ip_info->port_list = NULL;
760 ip_info->max_port_count = 0;
763 for (i = 0; i < n_static_ip_port; i++) {
764 if (!tmp_static_ip_port[i].ip_found) {
765 ip_info = &tmp_public_ip_config_info[ip_free_count];
766 ip_info->public_ip = tmp_static_ip_port[i].public_ip;
767 ip_info->port_list = (uint16_t *)prox_zmalloc(tmp_static_ip_port[i].n_ports * sizeof(uint16_t), socket);
768 PROX_PANIC(ip_info->port_list == NULL, "Failed to allocate list of ports for ip %x\n", tmp_static_ip_port[i].public_ip);
769 ip_info->port_list[ip_info->port_free_count] = tmp_static_ip_port[i].public_port;
770 ip_info->port_free_count++;
771 ip_info->max_port_count = ip_info->port_free_count;
774 for (unsigned j = 0; j < ip_free_count; j++) {
775 ip_info = &tmp_public_ip_config_info[j];
776 if (ip_info->public_ip == tmp_static_ip_port[i].public_ip) {
777 ip_info = &tmp_public_ip_config_info[j];
778 ip_info->port_list[ip_info->port_free_count] = tmp_static_ip_port[i].public_port;
779 ip_info->port_free_count++;
780 ip_info->max_port_count = ip_info->port_free_count;
786 plogx_info("%d entries in dynamic table\n", n_entries);
788 n_entries = n_entries * 4;
789 static char hash_name[30];
790 sprintf(hash_name, "A%03d_hash_nat_table", targ->lconf->id);
791 struct rte_hash_parameters hash_params = {
793 .entries = n_entries,
794 .key_len = sizeof(struct private_key),
795 .hash_func = rte_hash_crc,
796 .hash_func_init_val = 0,
798 plogx_info("hash table name = %s\n", hash_params.name);
799 struct private_key private_key;
800 struct public_key public_key;
801 tmp_priv_hash = rte_hash_create(&hash_params);
802 PROX_PANIC(tmp_priv_hash == NULL, "Failed to set up private hash table for NAT\n");
803 plogx_info("private hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
805 tmp_priv_flow_entries = (struct private_flow_entry *)prox_zmalloc(n_entries * sizeof(struct private_flow_entry), socket);
806 PROX_PANIC(tmp_priv_flow_entries == NULL, "Failed to allocate memory for private NAT %u entries\n", n_entries);
807 plogx_info("private data allocated, with %d entries of size %ld\n", n_entries, sizeof(struct private_flow_entry));
810 //hash_params.name[0]++;
811 plogx_info("hash table name = %s\n", hash_params.name);
812 hash_params.key_len = sizeof(uint32_t);
813 hash_params.entries = 4 * ip_free_count;
814 tmp_priv_ip_hash = rte_hash_create(&hash_params);
815 PROX_PANIC(tmp_priv_ip_hash == NULL, "Failed to set up private ip hash table for NAT\n");
816 plogx_info("private ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
819 //hash_params.name[0]++;
820 plogx_info("hash table name = %s\n", hash_params.name);
821 hash_params.entries = n_entries;
822 hash_params.key_len = sizeof(struct public_key),
823 tmp_pub_hash = rte_hash_create(&hash_params);
824 PROX_PANIC(tmp_pub_hash == NULL, "Failed to set up public hash table for NAT\n");
825 plogx_info("public hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
828 //hash_params.name[0]++;
829 tmp_pub_entries = (struct public_entry *)prox_zmalloc(n_entries * sizeof(struct public_entry), socket);
830 PROX_PANIC(tmp_pub_entries == NULL, "Failed to allocate memory for public NAT %u entries\n", n_entries);
831 plogx_info("public data allocated, with %d entries of size %ld\n", n_entries, sizeof(struct private_flow_entry));
833 for (i = 0; i < n_static_ip_port; i++) {
834 ip_to = tmp_static_ip_port[i].public_ip;
835 ip_from = tmp_static_ip_port[i].private_ip;
836 port_to = tmp_static_ip_port[i].public_port;
837 port_from = tmp_static_ip_port[i].private_port;
838 private_key.ip_addr = ip_from;
839 private_key.l4_port = port_from;
840 ret = rte_hash_lookup(tmp_priv_hash, (const void *)&private_key);
841 PROX_PANIC(ret >= 0, "Key %x %x already exists in NAT private hash table\n", ip_from, port_from);
843 idx = rte_hash_add_key(tmp_priv_ip_hash, (const void *)&ip_from);
844 PROX_PANIC(idx < 0, "Failed to add ip %x to NAT private hash table\n", ip_from);
845 ret = rte_hash_add_key(tmp_priv_hash, (const void *)&private_key);
846 PROX_PANIC(ret < 0, "Failed to add Key %x %x to NAT private hash table\n", ip_from, port_from);
847 tmp_priv_flow_entries[ret].ip_addr = ip_to;
848 tmp_priv_flow_entries[ret].flow_time = -1;
849 tmp_priv_flow_entries[ret].private_ip_idx = idx;
850 tmp_priv_flow_entries[ret].l4_port = port_to;
852 public_key.ip_addr = ip_to;
853 public_key.l4_port = port_to;
854 ret = rte_hash_lookup(tmp_pub_hash, (const void *)&public_key);
855 PROX_PANIC(ret >= 0, "Key %d.%d.%d.%d port %x (for private IP %d.%d.%d.%d port %x) already exists in NAT public hash table fir IP %d.%d.%d.%d port %x\n", IP4(ip_to), port_to, IP4(ip_from), port_from, IP4(tmp_pub_entries[ret].ip_addr), tmp_pub_entries[ret].l4_port);
857 ret = rte_hash_add_key(tmp_pub_hash, (const void *)&public_key);
858 PROX_PANIC(ret < 0, "Failed to add Key %x %x to NAT public hash table\n", ip_to, port_to);
859 tmp_pub_entries[ret].ip_addr = ip_from;
860 tmp_pub_entries[ret].l4_port = port_from;
861 tmp_pub_entries[ret].private_ip_idx = idx;
864 for (uint8_t task_id = 0; task_id < targ->lconf->n_tasks_all; ++task_id) {
865 struct task_args *target_targ = (struct task_args *)&(targ->lconf->targs[task_id]);
866 enum task_mode smode = target_targ->mode;
867 if (CGNAT == smode) {
868 target_targ->public_ip_count = public_ip_count;
869 target_targ->private_ip_hash = tmp_priv_ip_hash;
870 target_targ->private_ip_port_hash = tmp_priv_hash;
871 target_targ->private_ip_info = tmp_priv_ip_info;
872 target_targ->private_flow_entries = tmp_priv_flow_entries;
873 target_targ->public_ip_port_hash = tmp_pub_hash;
874 target_targ->public_entries = tmp_pub_entries;
875 target_targ->public_ip_config_info = tmp_public_ip_config_info;
881 static void early_init_task_nat(struct task_args *targ)
884 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
885 if (!targ->private_ip_hash) {
886 ret = lua_to_hash_nat(targ, prox_lua(), GLOBAL, targ->nat_table, socket_id);
887 PROX_PANIC(ret != 0, "Failed to load NAT table from lua:\n%s\n", get_lua_to_errors());
891 static void init_task_nat(struct task_base *tbase, struct task_args *targ)
893 struct task_nat *task = (struct task_nat *)tbase;
894 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
896 /* Use destination IP by default. */
897 task->private = targ->use_src;
899 PROX_PANIC(!strcmp(targ->nat_table, ""), "No nat table specified\n");
900 task->lconf = targ->lconf;
901 task->runtime_flags = targ->runtime_flags;
903 task->public_ip_count = targ->public_ip_count;
904 task->last_ip = targ->public_ip_count;
905 task->private_ip_hash = targ->private_ip_hash;
906 task->private_ip_port_hash = targ->private_ip_port_hash;
907 task->private_ip_info = targ->private_ip_info;
908 task->private_flow_entries = targ->private_flow_entries;
909 task->public_ip_port_hash = targ->public_ip_port_hash;
910 task->public_entries = targ->public_entries;
911 task->public_ip_config_info = targ->public_ip_config_info;
913 proto_ipsrc_portsrc_mask = _mm_set_epi32(BIT_0_TO_15, 0, ALL_32_BITS, BIT_8_TO_15);
914 proto_ipdst_portdst_mask = _mm_set_epi32(BIT_16_TO_31, ALL_32_BITS, 0, BIT_8_TO_15);
918 PROX_PANIC(!strcmp(targ->route_table, ""), "route table not specified\n");
919 if (targ->flags & TASK_ARG_LOCAL_LPM) {
920 int ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
921 PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors());
922 prox_sh_add_socket(socket_id, targ->route_table, lpm);
923 task->number_free_rules = lpm->n_free_rules;
925 lpm = prox_sh_find_socket(socket_id, targ->route_table);
927 int ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
928 PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors());
929 prox_sh_add_socket(socket_id, targ->route_table, lpm);
932 task->ipv4_lpm = lpm->rte_lpm;
933 task->next_hops = lpm->next_hops;
934 task->number_free_rules = lpm->n_free_rules;
936 for (uint32_t i = 0; i < MAX_HOP_INDEX; i++) {
937 int tx_port = task->next_hops[i].mac_port.out_idx;
938 if ((tx_port > targ->nb_txports - 1) && (tx_port > targ->nb_txrings - 1)) {
939 PROX_PANIC(1, "Routing Table contains port %d but only %d tx port/ %d ring:\n", tx_port, targ->nb_txports, targ->nb_txrings);
943 if (targ->nb_txrings) {
944 struct task_args *dtarg;
946 for (uint32_t i = 0; i < targ->nb_txrings; ++i) {
947 ct = targ->core_task_set[0].core_task[i];
948 dtarg = core_targ_get(ct.core, ct.task);
949 dtarg = find_reachable_task_sending_to_port(dtarg);
950 task->src_mac[i] = (0x0000ffffffffffff & ((*(uint64_t*)&prox_port_cfg[dtarg->tx_port_queue[0].port].eth_addr))) | ((uint64_t)ETYPE_IPv4 << (64 - 16));
951 task->src_mac_from_dpdk_port[dtarg->tx_port_queue[0].port] = task->src_mac[i];
952 plogx_dbg("src_mac = %lx for port %d %d\n", task->src_mac[i], i, dtarg->tx_port_queue[0].port);
955 for (uint32_t i = 0; i < targ->nb_txports; ++i) {
956 task->src_mac[i] = (0x0000ffffffffffff & ((*(uint64_t*)&prox_port_cfg[targ->tx_port_queue[i].port].eth_addr))) | ((uint64_t)ETYPE_IPv4 << (64 - 16));
957 task->src_mac_from_dpdk_port[targ->tx_port_queue[0].port] = task->src_mac[i];
958 plogx_dbg("src_mac = %lx for port %d %d\n", task->src_mac[i], i, targ->tx_port_queue[i].port);
962 struct prox_port_cfg *port = find_reachable_port(targ);
964 task->offload_crc = port->capabilities.tx_offload_cksum;
968 /* Basic static nat. */
969 static struct task_init task_init_nat = {
972 .early_init = early_init_task_nat,
973 .init = init_task_nat,
974 .handle = handle_nat_bulk,
976 .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_ROUTING|TASK_FEATURE_ZERO_RX,
978 .flag_features = TASK_FEATURE_TXQ_FLAGS_NOMULTSEGS|TASK_FEATURE_ROUTING|TASK_FEATURE_ZERO_RX,
980 .size = sizeof(struct task_nat),
981 .mbuf_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
984 __attribute__((constructor)) static void reg_task_nat(void)
986 reg_task(&task_init_nat);