2 // Copyright (c) 2010-2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
19 #include <rte_hash_crc.h>
20 #include <rte_ether.h>
22 #include <rte_version.h>
23 #include <rte_byteorder.h>
26 #include "prox_lua_types.h"
28 #include "prox_malloc.h"
29 #include "prox_cksum.h"
34 #include "task_init.h"
35 #include "task_base.h"
38 #include "prox_port_cfg.h"
39 #include "hash_entry_types.h"
40 #include "prox_shared.h"
41 #include "handle_cgnat.h"
43 #define ALL_32_BITS 0xffffffff
44 #define BIT_16_TO_31 0xffff0000
45 #define BIT_8_TO_15 0x0000ff00
46 #define BIT_0_TO_15 0x0000ffff
51 } __attribute__((packed));
53 struct private_flow_entry {
56 uint32_t private_ip_idx;
63 } __attribute__((packed));
68 uint32_t private_ip_idx;
72 struct public_ip_config_info {
74 uint32_t max_port_count;
75 uint32_t port_free_count;
79 struct private_ip_info {
80 uint64_t mac_aging_time;
82 uint32_t public_ip_idx;
83 struct rte_ether *private_mac;
88 struct task_base base;
89 struct rte_hash *private_ip_hash;
90 struct rte_hash *private_ip_port_hash;
91 struct rte_hash *public_ip_port_hash;
92 struct private_flow_entry *private_flow_entries;
93 struct public_entry *public_entries;
94 struct next_hop *next_hops;
95 struct lcore_cfg *lconf;
96 struct rte_lpm *ipv4_lpm;
97 uint32_t total_free_port_count;
98 uint32_t number_free_rules;
100 uint32_t public_ip_count;
102 struct public_ip_config_info *public_ip_config_info;
103 struct private_ip_info *private_ip_info;
104 uint8_t runtime_flags;
106 uint64_t src_mac[PROX_MAX_PORTS];
107 uint64_t src_mac_from_dpdk_port[PROX_MAX_PORTS];
108 volatile int dump_public_hash;
109 volatile int dump_private_hash;
111 static __m128i proto_ipsrc_portsrc_mask;
112 static __m128i proto_ipdst_portdst_mask;
113 struct pkt_eth_ipv4 {
114 prox_rte_ether_hdr ether_hdr;
115 prox_rte_ipv4_hdr ipv4_hdr;
116 prox_rte_udp_hdr udp_hdr;
117 } __attribute__((packed)) __attribute__((__aligned__(2)));
119 void task_cgnat_dump_public_hash(struct task_nat *task)
121 task->dump_public_hash = 1;
124 void task_cgnat_dump_private_hash(struct task_nat *task)
126 task->dump_private_hash = 1;
129 static void set_l2(struct task_nat *task, struct rte_mbuf *mbuf, uint8_t nh_idx)
131 prox_rte_ether_hdr *peth = rte_pktmbuf_mtod(mbuf, prox_rte_ether_hdr *);
132 *((uint64_t *)(&peth->d_addr)) = task->next_hops[nh_idx].mac_port_8bytes;
133 *((uint64_t *)(&peth->s_addr)) = task->src_mac[task->next_hops[nh_idx].mac_port.out_idx];
136 static uint8_t route_ipv4(struct task_nat *task, struct rte_mbuf *mbuf)
138 struct pkt_eth_ipv4 *pkt = rte_pktmbuf_mtod(mbuf, struct pkt_eth_ipv4 *);
139 prox_rte_ipv4_hdr *ip = &pkt->ipv4_hdr;
140 prox_rte_ether_hdr *peth_out;
144 switch(ip->next_proto_id) {
147 dst_ip = ip->dst_addr;
150 /* Routing for other protocols is not implemented */
151 plogx_info("Routing not implemented for this protocol\n");
155 #if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,1)
156 uint32_t next_hop_index;
158 uint8_t next_hop_index;
160 if (unlikely(rte_lpm_lookup(task->ipv4_lpm, rte_bswap32(dst_ip), &next_hop_index) != 0)) {
161 uint8_t* dst_ipp = (uint8_t*)&dst_ip;
162 plog_warn("lpm_lookup failed for ip %d.%d.%d.%d: rc = %d\n",
163 dst_ipp[0], dst_ipp[1], dst_ipp[2], dst_ipp[3], -ENOENT);
167 tx_port = task->next_hops[next_hop_index].mac_port.out_idx;
168 set_l2(task, mbuf, next_hop_index);
172 static int release_ip(struct task_nat *task, uint32_t *ip_addr, int public_ip_idx)
177 static int release_port(struct task_nat *task, uint32_t public_ip_idx, uint16_t udp_src_port)
179 struct public_ip_config_info *public_ip_config_info = &task->public_ip_config_info[public_ip_idx];
180 if (public_ip_config_info->max_port_count > public_ip_config_info->port_free_count) {
181 public_ip_config_info->port_list[public_ip_config_info->port_free_count] = udp_src_port;
182 public_ip_config_info->port_free_count++;
183 task->total_free_port_count ++;
184 plogx_dbg("Now %d free ports for IP %d.%d.%d.%d\n", public_ip_config_info->port_free_count, IP4(public_ip_config_info->public_ip));
186 plogx_err("Unable to release port for ip index %d: max_port_count = %d, port_free_count = %d", public_ip_idx, public_ip_config_info->max_port_count, public_ip_config_info->port_free_count);
192 static int get_new_ip(struct task_nat *task, uint32_t *ip_addr)
194 struct public_ip_config_info *ip_info;
195 if (++task->last_ip >= task->public_ip_count)
197 for (uint32_t ip_idx = task->last_ip; ip_idx < task->public_ip_count; ip_idx++) {
198 ip_info = &task->public_ip_config_info[ip_idx];
199 plogx_dbg("Checking public IP index %d\n", ip_idx);
200 if ((ip_info->port_free_count) > 0) {
201 plogx_dbg("Public IP index %d (IP %d.%d.%d.%d) has %d free ports\n", ip_idx, IP4(ip_info->public_ip), ip_info->port_free_count);
202 *ip_addr = ip_info->public_ip;
203 task->last_ip = ip_idx;
207 for (uint32_t ip_idx = 0; ip_idx < task->last_ip; ip_idx++) {
208 ip_info = &task->public_ip_config_info[ip_idx];
209 if ((ip_info->port_free_count) > 0) {
210 plogx_dbg("Public IP index %d (IP %d.%d.%d.%d) has %d free ports\n", ip_idx, IP4(ip_info->public_ip), ip_info->port_free_count);
211 *ip_addr = ip_info->public_ip;
212 task->last_ip = ip_idx;
219 static int get_new_port(struct task_nat *task, uint32_t ip_idx, uint16_t *udp_src_port)
222 struct public_ip_config_info *public_ip_config_info = &task->public_ip_config_info[ip_idx];
223 if (public_ip_config_info->port_free_count > 0) {
224 public_ip_config_info->port_free_count--;
225 *udp_src_port = public_ip_config_info->port_list[public_ip_config_info->port_free_count];
226 task->total_free_port_count --;
227 plogx_info("Now %d free ports for IP %d.%d.%d.%d\n", public_ip_config_info->port_free_count, IP4(public_ip_config_info->public_ip));
233 static int delete_port_entry(struct task_nat *task, uint8_t proto, uint32_t private_ip, uint16_t private_port, uint32_t public_ip, uint16_t public_port, int public_ip_idx)
236 struct private_key private_key;
237 struct public_key public_key;
238 // private_key.proto = proto;
239 private_key.ip_addr = private_ip;
240 private_key.l4_port = private_port;
241 ret = rte_hash_del_key(task->private_ip_port_hash, (const void *)&private_key);
243 plogx_info("Unable delete key ip %d.%d.%d.%d / port %x in private ip_port hash\n", IP4(private_ip), private_port);
246 plogx_dbg("Deleted ip %d.%d.%d.%d / port %x from private ip_port hash\n", IP4(private_ip), private_port);
248 public_key.ip_addr = public_ip;
249 public_key.l4_port = public_port;
250 ret = rte_hash_del_key(task->public_ip_port_hash, (const void *)&public_key);
252 plogx_info("Unable delete key ip %d.%d.%d.%d / port %x in public ip_port hash\n", IP4(public_ip), public_port);
255 plogx_dbg("Deleted ip %d.%d.%d.%d / port %x (hash index %d) from public ip_port hash\n", IP4(public_ip), public_port, ret);
256 release_port(task, public_ip_idx, public_port);
261 static int add_new_port_entry(struct task_nat *task, uint8_t proto, int public_ip_idx, int private_ip_idx, uint32_t private_src_ip, uint16_t private_udp_port, struct rte_mbuf *mbuf, uint64_t tsc, uint16_t *port)
263 struct private_key private_key;
264 struct public_key public_key;
265 uint32_t ip = task->public_ip_config_info[public_ip_idx].public_ip;
267 if (get_new_port(task, public_ip_idx, port) < 0) {
268 plogx_info("Unable to find new port for IP %x\n", private_src_ip);
271 // private_key.proto = proto;
272 private_key.ip_addr = private_src_ip;
273 private_key.l4_port = private_udp_port;
274 ret = rte_hash_add_key(task->private_ip_port_hash, (const void *)&private_key);
276 plogx_info("Unable add ip %d.%d.%d.%d / port %x in private ip_port hash\n", IP4(private_src_ip), private_udp_port);
277 release_port(task, public_ip_idx, *port);
279 } else if (task->private_flow_entries[ret].ip_addr) {
280 plogx_dbg("Race condition properly handled: port alrerady added\n");
281 release_port(task, public_ip_idx, *port);
284 plogx_dbg("Added ip %d.%d.%d.%d / port %x in private ip_port hash => %d.%d.%d.%d / %d - index = %d\n", IP4(private_src_ip), private_udp_port, IP4(ip), *port, ret);
286 task->private_flow_entries[ret].ip_addr = ip;
287 task->private_flow_entries[ret].l4_port = *port;
288 task->private_flow_entries[ret].flow_time = tsc;
289 task->private_flow_entries[ret].private_ip_idx = private_ip_idx;
291 public_key.ip_addr = ip;
292 public_key.l4_port = *port;
293 plogx_dbg("Adding key ip %d.%d.%d.%d / port %x in public ip_port hash\n", IP4(ip), *port);
294 ret = rte_hash_add_key(task->public_ip_port_hash, (const void *)&public_key);
296 plogx_info("Unable add ip %x / port %x in public ip_port hash\n", ip, *port);
297 // TODO: remove from private_ip_port_hash
298 release_port(task, public_ip_idx, *port);
301 plogx_dbg("Added ip %d.%d.%d.%d / port %x in public ip_port hash\n", IP4(ip), *port);
303 task->public_entries[ret].ip_addr = private_src_ip;
304 task->public_entries[ret].l4_port = private_udp_port;
305 task->public_entries[ret].dpdk_port = mbuf->port;
306 task->public_entries[ret].private_ip_idx = private_ip_idx;
310 static int handle_nat_bulk(struct task_base *tbase, struct rte_mbuf **mbufs, uint16_t n_pkts)
312 struct task_nat *task = (struct task_nat *)tbase;
313 uint8_t out[MAX_PKT_BURST] = {0};
315 uint32_t *ip_addr, public_ip, private_ip;
316 uint16_t *udp_src_port, port, private_port, public_port;
317 struct pkt_eth_ipv4 *pkt[MAX_PKT_BURST];
318 int ret, private_ip_idx, public_ip_idx = -1, port_idx;
321 uint64_t tsc = rte_rdtsc();
322 void *keys[MAX_PKT_BURST];
323 int32_t positions[MAX_PKT_BURST];
324 int map[MAX_PKT_BURST] = {0};
325 struct public_key null_key ={0};
327 if (unlikely(task->dump_public_hash)) {
328 const struct public_key *next_key;
334 while ((ret = rte_hash_iterate(task->public_ip_port_hash, (const void **)&next_key, &next_data, &iter)) >= 0) {
335 plogx_info("Public entry %d (index %d): ip = %d.%d.%d.%d, port = %d ===> private entry: ip = %d.%d.%d.%d, port = %d\n", i++, ret, IP4(next_key->ip_addr), next_key->l4_port, IP4(task->public_entries[ret].ip_addr),task->public_entries[ret].l4_port);
337 task->dump_public_hash = 0;
339 if (unlikely(task->dump_private_hash)) {
340 const struct private_key *next_key;
346 while ((ret = rte_hash_iterate(task->private_ip_port_hash, (const void **)&next_key, &next_data, &iter)) >= 0) {
347 plogx_info("Private entry %d (index %d): ip = %d.%d.%d.%d, port = %d ===> public entry: ip = %d.%d.%d.%d, port = %d\n", i++, ret, IP4(next_key->ip_addr), next_key->l4_port, IP4(task->private_flow_entries[ret].ip_addr),task->private_flow_entries[ret].l4_port);
349 task->dump_private_hash = 0;
352 for (j = 0; j < n_pkts; ++j) {
355 for (j = 0; j < n_pkts; ++j) {
356 pkt[j] = rte_pktmbuf_mtod(mbufs[j], struct pkt_eth_ipv4 *);
360 struct private_key key[MAX_PKT_BURST];
361 for (j = 0; j < n_pkts; ++j) {
362 /* Currently, only support eth/ipv4 packets */
363 if (pkt[j]->ether_hdr.ether_type != ETYPE_IPv4) {
364 plogx_info("Currently, only support eth/ipv4 packets\n");
365 out[j] = OUT_DISCARD;
366 keys[j] = (void *)&null_key;
369 key[j].ip_addr = pkt[j]->ipv4_hdr.src_addr;
370 key[j].l4_port = pkt[j]->udp_hdr.src_port;
373 ret = rte_hash_lookup_bulk(task->private_ip_port_hash, (const void **)&keys, n_pkts, positions);
374 if (unlikely(ret < 0)) {
375 plogx_info("lookup_bulk failed in private_ip_port_hash\n");
378 int n_new_mapping = 0;
379 for (j = 0; j < n_pkts; ++j) {
380 port_idx = positions[j];
381 if (out[j] != OUT_DISCARD) {
382 if (unlikely(port_idx < 0)) {
383 plogx_dbg("ip %d.%d.%d.%d / port %x not found in private ip/port hash\n", IP4(pkt[j]->ipv4_hdr.src_addr), pkt[j]->udp_hdr.src_port);
384 map[n_new_mapping] = j;
385 keys[n_new_mapping++] = (void *)&(pkt[j]->ipv4_hdr.src_addr);
387 ip_addr = &(pkt[j]->ipv4_hdr.src_addr);
388 udp_src_port = &(pkt[j]->udp_hdr.src_port);
389 plogx_dbg("ip/port %d.%d.%d.%d / %x found in private ip/port hash\n", IP4(pkt[j]->ipv4_hdr.src_addr), pkt[j]->udp_hdr.src_port);
390 *ip_addr = task->private_flow_entries[port_idx].ip_addr;
391 *udp_src_port = task->private_flow_entries[port_idx].l4_port;
392 uint64_t flow_time = task->private_flow_entries[port_idx].flow_time;
393 if (flow_time + tsc_hz < tsc) {
394 task->private_flow_entries[port_idx].flow_time = tsc;
396 private_ip_idx = task->private_flow_entries[port_idx].private_ip_idx;
397 if (task->private_ip_info[private_ip_idx].mac_aging_time + tsc_hz < tsc)
398 task->private_ip_info[private_ip_idx].mac_aging_time = tsc;
399 prox_ip_udp_cksum(mbufs[j], &pkt[j]->ipv4_hdr, sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr), task->offload_crc);
400 out[j] = route_ipv4(task, mbufs[j]);
406 // Find whether at least IP is already known...
407 ret = rte_hash_lookup_bulk(task->private_ip_hash, (const void **)&keys, n_new_mapping, positions);
408 if (unlikely(ret < 0)) {
409 plogx_info("lookup_bulk failed for private_ip_hash\n");
410 for (int k = 0; k < n_new_mapping; ++k) {
412 out[j] = OUT_DISCARD;
416 for (int k = 0; k < n_new_mapping; ++k) {
417 private_ip_idx = positions[k];
419 ip_addr = &(pkt[j]->ipv4_hdr.src_addr);
420 proto = pkt[j]->ipv4_hdr.next_proto_id;
421 udp_src_port = &(pkt[j]->udp_hdr.src_port);
422 int new_ip_entry = 0;
424 if (unlikely(private_ip_idx < 0)) {
425 private_ip = *ip_addr;
426 private_port = *udp_src_port;
427 plogx_dbg("Did not find private ip %d.%d.%d.%d in ip hash table, looking for new public ip\n", IP4(*ip_addr));
428 // IP not found, need to get a new IP/port mapping
429 public_ip_idx = get_new_ip(task, &public_ip);
430 if (public_ip_idx < 0) {
431 plogx_info("Unable to find new ip/port\n");
432 out[j] = OUT_DISCARD;
435 plogx_dbg("found new public ip %d.%d.%d.%d at public IP index %d\n", IP4(public_ip), public_ip_idx);
437 private_ip_idx = rte_hash_add_key(task->private_ip_hash, (const void *)ip_addr);
438 // The key might be added multiple time - in case the same key was present in the bulk_lookup multiple times
439 // As such this is not an issue - the add_key will returns the index as for a new key
440 // This scenario should not happen often in real time use case
441 // as a for a new flow (flow renewal), probably only one packet will be sent (e.g. TCP SYN)
442 if (private_ip_idx < 0) {
443 release_ip(task, &public_ip, public_ip_idx);
444 plogx_info("Unable add ip %d.%d.%d.%d in private ip hash\n", IP4(*ip_addr));
445 out[j] = OUT_DISCARD;
447 } else if (task->private_ip_info[private_ip_idx].public_ip) {
448 plogx_info("race condition properly handled : ip %d.%d.%d.%d already in private ip hash\n", IP4(*ip_addr));
449 release_ip(task, &public_ip, public_ip_idx);
450 public_ip = task->private_ip_info[private_ip_idx].public_ip;
451 public_ip_idx = task->private_ip_info[private_ip_idx].public_ip_idx;
453 plogx_dbg("Added ip %d.%d.%d.%d in private ip hash\n", IP4(*ip_addr));
454 rte_memcpy(&task->private_ip_info[private_ip_idx].private_mac, ((uint8_t *)pkt) + 6, 6);
455 task->private_ip_info[private_ip_idx].public_ip = public_ip;
456 task->private_ip_info[private_ip_idx].static_entry = 0;
457 task->private_ip_info[private_ip_idx].public_ip_idx = public_ip_idx;
461 public_ip = task->private_ip_info[private_ip_idx].public_ip;
462 public_ip_idx = task->private_ip_info[private_ip_idx].public_ip_idx;
464 port_idx = add_new_port_entry(task, proto, public_ip_idx, private_ip_idx, *ip_addr, *udp_src_port, mbufs[j], tsc, &public_port);
466 // TODO: delete IP in ip_hash
467 if ((new_ip_entry) && (task->last_ip != 0)) {
468 release_ip(task, &public_ip, public_ip_idx);
470 } else if (new_ip_entry) {
471 release_ip(task, &public_ip, public_ip_idx);
472 task->last_ip = task->public_ip_count-1;
474 plogx_info("Failed to add new port entry\n");
475 out[j] = OUT_DISCARD;
478 private_ip = *ip_addr;
479 private_port = *udp_src_port;
480 plogx_info("Added new ip/port: private ip/port = %d.%d.%d.%d/%x public ip/port = %d.%d.%d.%d/%x, index = %d\n", IP4(private_ip), private_port, IP4(public_ip), public_port, port_idx);
482 // task->private_flow_entries[port_idx].ip_addr = task->private_ip_info[private_ip_idx].public_ip;
483 plogx_info("Added new port: private ip/port = %d.%d.%d.%d/%x, public ip/port = %d.%d.%d.%d/%x\n", IP4(private_ip), private_port, IP4(task->private_ip_info[private_ip_idx].public_ip), public_port);
484 *ip_addr = public_ip ;
485 *udp_src_port = public_port;
486 uint64_t flow_time = task->private_flow_entries[port_idx].flow_time;
487 if (flow_time + tsc_hz < tsc) {
488 task->private_flow_entries[port_idx].flow_time = tsc;
490 if (task->private_ip_info[private_ip_idx].mac_aging_time + tsc_hz < tsc)
491 task->private_ip_info[private_ip_idx].mac_aging_time = tsc;
492 prox_ip_udp_cksum(mbufs[j], &pkt[j]->ipv4_hdr, sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr), task->offload_crc);
493 // TODO: if route fails while just added new key in table, should we delete the key from the table?
494 out[j] = route_ipv4(task, mbufs[j]);
495 if (out[j] && new_entry) {
496 delete_port_entry(task, proto, private_ip, private_port, *ip_addr, *udp_src_port, public_ip_idx);
497 plogx_info("Deleted port: private ip/port = %d.%d.%d.%d/%x, public ip/port = %d.%d.%d.%d/%x\n", IP4(private_ip), private_port, IP4(*ip_addr), *udp_src_port);
501 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
503 struct public_key public_key[MAX_PKT_BURST];
504 for (j = 0; j < n_pkts; ++j) {
505 /* Currently, only support eth/ipv4 packets */
506 if (pkt[j]->ether_hdr.ether_type != ETYPE_IPv4) {
507 plogx_info("Currently, only support eth/ipv4 packets\n");
508 out[j] = OUT_DISCARD;
509 keys[j] = (void *)&null_key;
512 public_key[j].ip_addr = pkt[j]->ipv4_hdr.dst_addr;
513 public_key[j].l4_port = pkt[j]->udp_hdr.dst_port;
514 keys[j] = &public_key[j];
516 ret = rte_hash_lookup_bulk(task->public_ip_port_hash, (const void **)&keys, n_pkts, positions);
518 plogx_err("Failed lookup bulk public_ip_port_hash\n");
521 for (j = 0; j < n_pkts; ++j) {
522 port_idx = positions[j];
523 ip_addr = &(pkt[j]->ipv4_hdr.dst_addr);
524 udp_src_port = &(pkt[j]->udp_hdr.dst_port);
525 if (out[j] != OUT_DISCARD) {
527 plogx_err("Failed to find ip/port %d.%d.%d.%d/%x in public_ip_port_hash\n", IP4(*ip_addr), *udp_src_port);
528 out[j] = OUT_DISCARD;
530 plogx_dbg("Found ip/port %d.%d.%d.%d/%x in public_ip_port_hash\n", IP4(*ip_addr), *udp_src_port);
531 *ip_addr = task->public_entries[port_idx].ip_addr;
532 *udp_src_port = task->public_entries[port_idx].l4_port;
533 private_ip_idx = task->public_entries[port_idx].private_ip_idx;
534 plogx_dbg("Found private IP info for ip %d.%d.%d.%d\n", IP4(*ip_addr));
535 rte_memcpy(((uint8_t *)(pkt[j])) + 0, &task->private_ip_info[private_ip_idx].private_mac, 6);
536 rte_memcpy(((uint8_t *)(pkt[j])) + 6, &task->src_mac_from_dpdk_port[task->public_entries[port_idx].dpdk_port], 6);
537 out[j] = task->public_entries[port_idx].dpdk_port;
540 prox_ip_udp_cksum(mbufs[j], &pkt[j]->ipv4_hdr, sizeof(prox_rte_ether_hdr), sizeof(prox_rte_ipv4_hdr), task->offload_crc);
542 return task->base.tx_pkt(&task->base, mbufs, n_pkts, out);
547 static int lua_to_hash_nat(struct task_args *targ, struct lua_State *L, enum lua_place from, const char *name, uint8_t socket)
549 struct rte_hash *tmp_priv_ip_hash, *tmp_priv_hash, *tmp_pub_hash;
550 struct private_flow_entry *tmp_priv_flow_entries;
551 struct public_entry *tmp_pub_entries;
552 uint32_t n_entries = 0;;
553 uint32_t ip_from, ip_to;
554 uint16_t port_from, port_to;
555 int ret, idx, pop, pop2, pop3, n_static_entries = 0;
556 uint32_t dst_ip1, dst_ip2;
557 struct val_range dst_port;
558 struct public_ip_config_info *ip_info;
559 struct public_ip_config_info *tmp_public_ip_config_info;
561 if ((pop = lua_getfrom(L, from, name)) < 0)
564 if (!lua_istable(L, -1)) {
565 plogx_err("Can't read cgnat since data is not a table\n");
569 struct tmp_public_ip {
575 struct tmp_static_ip {
579 struct tmp_static_ip_port {
583 uint16_t private_port;
584 uint16_t public_port;
588 uint32_t n_public_groups = 0;
589 uint32_t n_public_ip = 0;
590 uint32_t n_static_ip = 0;
591 uint32_t n_static_ip_port = 0;
593 struct tmp_public_ip *tmp_public_ip = NULL;
594 struct tmp_static_ip *tmp_static_ip = NULL;
595 struct tmp_static_ip_port *tmp_static_ip_port = NULL;
597 // Look for Dynamic entries configuration
598 plogx_info("Reading dynamic NAT table\n");
599 if ((pop2 = lua_getfrom(L, TABLE, "dynamic")) < 0) {
600 plogx_info("No dynamic table found\n");
602 uint64_t n_ip, n_port;
603 if (!lua_istable(L, -1)) {
604 plogx_err("Can't read cgnat since data is not a table\n");
608 n_public_groups = lua_tointeger(L, -1);
609 plogx_info("%d groups of public IP\n", n_public_groups);
610 tmp_public_ip = (struct tmp_public_ip *)malloc(n_public_groups * sizeof(struct tmp_public_ip));
611 PROX_PANIC(tmp_public_ip == NULL, "Failed to allocated tmp_public_ip\n");
615 while (lua_next(L, -2)) {
616 if (lua_to_ip(L, TABLE, "public_ip_range_start", &dst_ip1) ||
617 lua_to_ip(L, TABLE, "public_ip_range_stop", &dst_ip2) ||
618 lua_to_val_range(L, TABLE, "public_port", &dst_port))
620 PROX_PANIC(dst_ip2 < dst_ip1, "public_ip_range error: %d.%d.%d.%d < %d.%d.%d.%d\n", (dst_ip2 >> 24), (dst_ip2 >> 16) & 0xFF, (dst_ip2 >> 8) & 0xFF, dst_ip2 & 0xFF, dst_ip1 >> 24, (dst_ip1 >> 16) & 0xFF, (dst_ip1 >> 8) & 0xFF, dst_ip1 & 0xFF);
621 PROX_PANIC(dst_port.end < dst_port.beg, "public_port error: %d < %d\n", dst_port.end, dst_port.beg);
622 n_ip = dst_ip2 - dst_ip1 + 1;
623 n_port = dst_port.end - dst_port.beg + 1;
625 plogx_info("Found IP from %d.%d.%d.%d to %d.%d.%d.%d and port from %d to %d\n", dst_ip1 >> 24, (dst_ip1 >> 16) & 0xFF, (dst_ip1 >> 8) & 0xFF, dst_ip1 & 0xFF, (dst_ip2 >> 24), (dst_ip2 >> 16) & 0xFF, (dst_ip2 >> 8) & 0xFF, dst_ip2 & 0xFF, dst_port.beg, dst_port.end);
626 tmp_public_ip[i].ip_beg = dst_ip1;
627 tmp_public_ip[i].ip_end = dst_ip2;
628 tmp_public_ip[i].port_beg = dst_port.beg;
629 tmp_public_ip[i++].port_end = dst_port.end;
630 n_entries += n_ip * n_port;
637 if ((pop2 = lua_getfrom(L, TABLE, "static_ip")) < 0) {
638 plogx_info("No static ip table found\n");
640 if (!lua_istable(L, -1)) {
641 plogx_err("Can't read cgnat since data is not a table\n");
646 n_static_ip = lua_tointeger(L, -1);
647 plogx_info("%d entries in static ip table\n", n_static_ip);
649 tmp_static_ip = (struct tmp_static_ip *)malloc(n_static_ip * sizeof(struct tmp_static_ip));
650 PROX_PANIC(tmp_static_ip == NULL, "Failed to allocated tmp_static_ip\n");
652 while (lua_next(L, -2)) {
653 if (lua_to_ip(L, TABLE, "src_ip", &ip_from) ||
654 lua_to_ip(L, TABLE, "dst_ip", &ip_to))
656 ip_from = rte_bswap32(ip_from);
657 ip_to = rte_bswap32(ip_to);
658 tmp_static_ip[i].private_ip = ip_from;
659 tmp_static_ip[i++].public_ip = ip_to;
660 for (unsigned int j = 0; j < n_public_groups; j++) {
661 if ((tmp_public_ip[j].ip_beg <= ip_to) && (ip_to <= tmp_public_ip[j].ip_end)) {
662 PROX_PANIC(1, "list of static ip mapping overlap with list of dynamic IP => not supported yet\n");
672 if ((pop2 = lua_getfrom(L, TABLE, "static_ip_port")) < 0) {
673 plogx_info("No static table found\n");
675 if (!lua_istable(L, -1)) {
676 plogx_err("Can't read cgnat since data is not a table\n");
681 n_static_ip_port = lua_tointeger(L, -1);
682 plogx_info("%d entries in static table\n", n_static_ip_port);
684 tmp_static_ip_port = (struct tmp_static_ip_port *)malloc(n_static_ip_port * sizeof(struct tmp_static_ip_port));
685 PROX_PANIC(tmp_static_ip_port == NULL, "Failed to allocated tmp_static_ip_port\n");
688 while (lua_next(L, -2)) {
689 if (lua_to_ip(L, TABLE, "src_ip", &ip_from) ||
690 lua_to_ip(L, TABLE, "dst_ip", &ip_to) ||
691 lua_to_port(L, TABLE, "src_port", &port_from) ||
692 lua_to_port(L, TABLE, "dst_port", &port_to))
695 ip_from = rte_bswap32(ip_from);
696 ip_to = rte_bswap32(ip_to);
697 port_from = rte_bswap16(port_from);
698 port_to = rte_bswap16(port_to);
699 tmp_static_ip_port[i].private_ip = ip_from;
700 tmp_static_ip_port[i].public_ip = ip_to;
701 tmp_static_ip_port[i].private_port = port_from;
702 tmp_static_ip_port[i].public_port = port_to;
703 tmp_static_ip_port[i].n_ports = 1;
704 for (unsigned int j = 0; j < n_public_groups; j++) {
705 if ((tmp_public_ip[j].ip_beg <= rte_bswap32(ip_to)) && (rte_bswap32(ip_to) <= tmp_public_ip[j].ip_end)) {
706 tmp_static_ip_port[i].ip_found = j + 11;
707 PROX_PANIC(1, "list of static ip/port mapping overlap with list of dynamic IP => not supported yet\n");
710 for (unsigned int j = 0; j < n_static_ip; j++) {
711 if ((tmp_static_ip[j].public_ip == ip_to) ) {
712 tmp_static_ip_port[i].ip_found = j + 1;
713 PROX_PANIC(1, "list of static ip/port mapping overlap with list of static ip => not supported yet\n");
716 for (unsigned int j = 0; j <= i; j++) {
717 if (ip_to == tmp_static_ip_port[j].public_ip) {
718 tmp_static_ip_port[i].ip_found = j + 1;
719 tmp_static_ip_port[j].n_ports++;
720 tmp_static_ip_port[i].n_ports = 0;
724 if (!tmp_static_ip_port[i].ip_found) {
734 tmp_public_ip_config_info = (struct public_ip_config_info *)prox_zmalloc(n_public_ip * sizeof(struct public_ip_config_info), socket);
735 PROX_PANIC(tmp_public_ip_config_info == NULL, "Failed to allocate PUBLIC IP INFO\n");
736 plogx_info("%d PUBLIC IP INFO allocated\n", n_public_ip);
738 struct private_ip_info *tmp_priv_ip_info = (struct private_ip_info *)prox_zmalloc(4 * n_public_ip * sizeof(struct public_ip_config_info), socket);
739 PROX_PANIC(tmp_priv_ip_info == NULL, "Failed to allocate PRIVATE IP INFO\n");
740 plogx_info("%d PRIVATE IP INFO allocated\n", 4 * n_public_ip);
742 uint32_t ip_free_count = 0;
743 for (i = 0; i < n_public_groups; i++) {
744 for (uint32_t ip = tmp_public_ip[i].ip_beg; ip <= tmp_public_ip[i].ip_end; ip++) {
745 ip_info = &tmp_public_ip_config_info[ip_free_count];
746 ip_info->public_ip = rte_bswap32(ip);
747 ip_info->port_list = (uint16_t *)prox_zmalloc((dst_port.end - dst_port.beg) * sizeof(uint16_t), socket);
748 PROX_PANIC(ip_info->port_list == NULL, "Failed to allocate list of ports for ip %x\n", ip);
749 for (uint32_t port = tmp_public_ip[i].port_beg; port <= tmp_public_ip[i].port_end; port++) {
750 ip_info->port_list[ip_info->port_free_count] = rte_bswap16(port);
751 ip_info->port_free_count++;
753 ip_info->max_port_count = ip_info->port_free_count;
754 plogx_dbg("Added IP %d.%d.%d.%d with ports from %x to %x at index %x\n", IP4(ip_info->public_ip), tmp_public_ip[i].port_beg, tmp_public_ip[i].port_end, ip_free_count);
758 uint32_t public_ip_count = ip_free_count;
759 for (i = 0; i < n_static_ip; i++) {
760 ip_info = &tmp_public_ip_config_info[ip_free_count];
761 ip_info->public_ip = tmp_static_ip[i].public_ip;
762 ip_info->port_list = NULL;
763 ip_info->max_port_count = 0;
766 for (i = 0; i < n_static_ip_port; i++) {
767 if (!tmp_static_ip_port[i].ip_found) {
768 ip_info = &tmp_public_ip_config_info[ip_free_count];
769 ip_info->public_ip = tmp_static_ip_port[i].public_ip;
770 ip_info->port_list = (uint16_t *)prox_zmalloc(tmp_static_ip_port[i].n_ports * sizeof(uint16_t), socket);
771 PROX_PANIC(ip_info->port_list == NULL, "Failed to allocate list of ports for ip %x\n", tmp_static_ip_port[i].public_ip);
772 ip_info->port_list[ip_info->port_free_count] = tmp_static_ip_port[i].public_port;
773 ip_info->port_free_count++;
774 ip_info->max_port_count = ip_info->port_free_count;
777 for (unsigned j = 0; j < ip_free_count; j++) {
778 ip_info = &tmp_public_ip_config_info[j];
779 if (ip_info->public_ip == tmp_static_ip_port[i].public_ip) {
780 ip_info = &tmp_public_ip_config_info[j];
781 ip_info->port_list[ip_info->port_free_count] = tmp_static_ip_port[i].public_port;
782 ip_info->port_free_count++;
783 ip_info->max_port_count = ip_info->port_free_count;
789 plogx_info("%d entries in dynamic table\n", n_entries);
791 n_entries = n_entries * 4;
792 static char hash_name[30];
793 sprintf(hash_name, "A%03d_hash_nat_table", targ->lconf->id);
794 struct rte_hash_parameters hash_params = {
796 .entries = n_entries,
797 .key_len = sizeof(struct private_key),
798 .hash_func = rte_hash_crc,
799 .hash_func_init_val = 0,
802 plogx_info("hash table name = %s\n", hash_params.name);
803 struct private_key private_key;
804 struct public_key public_key;
805 tmp_priv_hash = rte_hash_create(&hash_params);
806 PROX_PANIC(tmp_priv_hash == NULL, "Failed to set up private hash table for NAT\n");
807 plogx_info("private hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
809 tmp_priv_flow_entries = (struct private_flow_entry *)prox_zmalloc(n_entries * sizeof(struct private_flow_entry), socket);
810 PROX_PANIC(tmp_priv_flow_entries == NULL, "Failed to allocate memory for private NAT %u entries\n", n_entries);
811 plogx_info("private data allocated, with %d entries of size %ld\n", n_entries, sizeof(struct private_flow_entry));
814 //hash_params.name[0]++;
815 plogx_info("hash table name = %s\n", hash_params.name);
816 hash_params.key_len = sizeof(uint32_t);
817 hash_params.entries = 4 * ip_free_count;
818 tmp_priv_ip_hash = rte_hash_create(&hash_params);
819 PROX_PANIC(tmp_priv_ip_hash == NULL, "Failed to set up private ip hash table for NAT\n");
820 plogx_info("private ip hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
823 //hash_params.name[0]++;
824 plogx_info("hash table name = %s\n", hash_params.name);
825 hash_params.entries = n_entries;
826 hash_params.key_len = sizeof(struct public_key),
827 tmp_pub_hash = rte_hash_create(&hash_params);
828 PROX_PANIC(tmp_pub_hash == NULL, "Failed to set up public hash table for NAT\n");
829 plogx_info("public hash table allocated, with %d entries of size %d\n", hash_params.entries, hash_params.key_len);
832 //hash_params.name[0]++;
833 tmp_pub_entries = (struct public_entry *)prox_zmalloc(n_entries * sizeof(struct public_entry), socket);
834 PROX_PANIC(tmp_pub_entries == NULL, "Failed to allocate memory for public NAT %u entries\n", n_entries);
835 plogx_info("public data allocated, with %d entries of size %ld\n", n_entries, sizeof(struct private_flow_entry));
837 for (i = 0; i < n_static_ip_port; i++) {
838 ip_to = tmp_static_ip_port[i].public_ip;
839 ip_from = tmp_static_ip_port[i].private_ip;
840 port_to = tmp_static_ip_port[i].public_port;
841 port_from = tmp_static_ip_port[i].private_port;
842 private_key.ip_addr = ip_from;
843 private_key.l4_port = port_from;
844 ret = rte_hash_lookup(tmp_priv_hash, (const void *)&private_key);
845 PROX_PANIC(ret >= 0, "Key %x %x already exists in NAT private hash table\n", ip_from, port_from);
847 idx = rte_hash_add_key(tmp_priv_ip_hash, (const void *)&ip_from);
848 PROX_PANIC(idx < 0, "Failed to add ip %x to NAT private hash table\n", ip_from);
849 ret = rte_hash_add_key(tmp_priv_hash, (const void *)&private_key);
850 PROX_PANIC(ret < 0, "Failed to add Key %x %x to NAT private hash table\n", ip_from, port_from);
851 tmp_priv_flow_entries[ret].ip_addr = ip_to;
852 tmp_priv_flow_entries[ret].flow_time = -1;
853 tmp_priv_flow_entries[ret].private_ip_idx = idx;
854 tmp_priv_flow_entries[ret].l4_port = port_to;
856 public_key.ip_addr = ip_to;
857 public_key.l4_port = port_to;
858 ret = rte_hash_lookup(tmp_pub_hash, (const void *)&public_key);
859 PROX_PANIC(ret >= 0, "Key %d.%d.%d.%d port %x (for private IP %d.%d.%d.%d port %x) already exists in NAT public hash table fir IP %d.%d.%d.%d port %x\n", IP4(ip_to), port_to, IP4(ip_from), port_from, IP4(tmp_pub_entries[ret].ip_addr), tmp_pub_entries[ret].l4_port);
861 ret = rte_hash_add_key(tmp_pub_hash, (const void *)&public_key);
862 PROX_PANIC(ret < 0, "Failed to add Key %x %x to NAT public hash table\n", ip_to, port_to);
863 tmp_pub_entries[ret].ip_addr = ip_from;
864 tmp_pub_entries[ret].l4_port = port_from;
865 tmp_pub_entries[ret].private_ip_idx = idx;
868 for (uint8_t task_id = 0; task_id < targ->lconf->n_tasks_all; ++task_id) {
869 struct task_args *target_targ = (struct task_args *)&(targ->lconf->targs[task_id]);
870 enum task_mode smode = target_targ->mode;
871 if (CGNAT == smode) {
872 target_targ->public_ip_count = public_ip_count;
873 target_targ->private_ip_hash = tmp_priv_ip_hash;
874 target_targ->private_ip_port_hash = tmp_priv_hash;
875 target_targ->private_ip_info = tmp_priv_ip_info;
876 target_targ->private_flow_entries = tmp_priv_flow_entries;
877 target_targ->public_ip_port_hash = tmp_pub_hash;
878 target_targ->public_entries = tmp_pub_entries;
879 target_targ->public_ip_config_info = tmp_public_ip_config_info;
885 static void early_init_task_nat(struct task_args *targ)
888 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
889 if (!targ->private_ip_hash) {
890 ret = lua_to_hash_nat(targ, prox_lua(), GLOBAL, targ->nat_table, socket_id);
891 PROX_PANIC(ret != 0, "Failed to load NAT table from lua:\n%s\n", get_lua_to_errors());
895 static void init_task_nat(struct task_base *tbase, struct task_args *targ)
897 struct task_nat *task = (struct task_nat *)tbase;
898 const int socket_id = rte_lcore_to_socket_id(targ->lconf->id);
900 /* Use destination IP by default. */
901 task->private = targ->use_src;
903 PROX_PANIC(!strcmp(targ->nat_table, ""), "No nat table specified\n");
904 task->lconf = targ->lconf;
905 task->runtime_flags = targ->runtime_flags;
907 task->public_ip_count = targ->public_ip_count;
908 task->last_ip = targ->public_ip_count;
909 task->private_ip_hash = targ->private_ip_hash;
910 task->private_ip_port_hash = targ->private_ip_port_hash;
911 task->private_ip_info = targ->private_ip_info;
912 task->private_flow_entries = targ->private_flow_entries;
913 task->public_ip_port_hash = targ->public_ip_port_hash;
914 task->public_entries = targ->public_entries;
915 task->public_ip_config_info = targ->public_ip_config_info;
917 proto_ipsrc_portsrc_mask = _mm_set_epi32(BIT_0_TO_15, 0, ALL_32_BITS, BIT_8_TO_15);
918 proto_ipdst_portdst_mask = _mm_set_epi32(BIT_16_TO_31, ALL_32_BITS, 0, BIT_8_TO_15);
922 PROX_PANIC(!strcmp(targ->route_table, ""), "route table not specified\n");
923 if (targ->flags & TASK_ARG_LOCAL_LPM) {
924 int ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
925 PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors());
926 prox_sh_add_socket(socket_id, targ->route_table, lpm);
927 task->number_free_rules = lpm->n_free_rules;
929 lpm = prox_sh_find_socket(socket_id, targ->route_table);
931 int ret = lua_to_lpm4(prox_lua(), GLOBAL, targ->route_table, socket_id, &lpm);
932 PROX_PANIC(ret, "Failed to load IPv4 LPM:\n%s\n", get_lua_to_errors());
933 prox_sh_add_socket(socket_id, targ->route_table, lpm);
936 task->ipv4_lpm = lpm->rte_lpm;
937 task->next_hops = lpm->next_hops;
938 task->number_free_rules = lpm->n_free_rules;
940 for (uint32_t i = 0; i < MAX_HOP_INDEX; i++) {
941 int tx_port = task->next_hops[i].mac_port.out_idx;
942 if ((tx_port > targ->nb_txports - 1) && (tx_port > targ->nb_txrings - 1)) {
943 PROX_PANIC(1, "Routing Table contains port %d but only %d tx port/ %d ring:\n", tx_port, targ->nb_txports, targ->nb_txrings);
947 if (targ->nb_txrings) {
948 struct task_args *dtarg;
950 for (uint32_t i = 0; i < targ->nb_txrings; ++i) {
951 ct = targ->core_task_set[0].core_task[i];
952 dtarg = core_targ_get(ct.core, ct.task);
953 dtarg = find_reachable_task_sending_to_port(dtarg);
954 task->src_mac[i] = (0x0000ffffffffffff & ((*(uint64_t*)&prox_port_cfg[dtarg->tx_port_queue[0].port].eth_addr))) | ((uint64_t)ETYPE_IPv4 << (64 - 16));
955 task->src_mac_from_dpdk_port[dtarg->tx_port_queue[0].port] = task->src_mac[i];
956 plogx_dbg("src_mac = %lx for port %d %d\n", task->src_mac[i], i, dtarg->tx_port_queue[0].port);
959 for (uint32_t i = 0; i < targ->nb_txports; ++i) {
960 task->src_mac[i] = (0x0000ffffffffffff & ((*(uint64_t*)&prox_port_cfg[targ->tx_port_queue[i].port].eth_addr))) | ((uint64_t)ETYPE_IPv4 << (64 - 16));
961 task->src_mac_from_dpdk_port[targ->tx_port_queue[0].port] = task->src_mac[i];
962 plogx_dbg("src_mac = %lx for port %d %d\n", task->src_mac[i], i, targ->tx_port_queue[i].port);
966 struct prox_port_cfg *port = find_reachable_port(targ);
968 task->offload_crc = port->requested_tx_offload & (RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM);
972 /* Basic static nat. */
973 static struct task_init task_init_nat = {
976 .early_init = early_init_task_nat,
977 .init = init_task_nat,
978 .handle = handle_nat_bulk,
980 .flag_features = TASK_FEATURE_TXQ_FLAGS_NOOFFLOADS|TASK_FEATURE_ROUTING|TASK_FEATURE_ZERO_RX,
982 .flag_features = TASK_FEATURE_ROUTING|TASK_FEATURE_ZERO_RX,
984 .size = sizeof(struct task_nat),
987 __attribute__((constructor)) static void reg_task_nat(void)
989 reg_task(&task_init_nat);