gateway: Created common code for routing in gateway
[samplevnf.git] / common / VIL / conntrack / rte_cnxn_tracking.c
1 /*
2 // Copyright (c) 2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <rte_ether.h>
18 #include <rte_prefetch.h>
19 #include <rte_cycles.h>
20 #include <rte_malloc.h>
21 #include <rte_memcpy.h>
22 #include <rte_timer.h>
23 #include <rte_spinlock.h>
24 #include "rte_cnxn_tracking.h"
25 #include "rte_ct_tcp.h"
26
27 #define CNXN_TRX_DEBUG 0
28 #define TESTING_TIMERS 0
29 #define RTE_CT_TIMER_EXPIRED_DUMP 0
30
31 #define META_DATA_OFFSET 128
32 #define ETHERNET_START (META_DATA_OFFSET + RTE_PKTMBUF_HEADROOM)
33 #define ETH_HDR_SIZE 14
34 #define IP_START (ETHERNET_START + ETH_HDR_SIZE)
35 #define PROTOCOL_START (IP_START + 9)
36 #define SRC_ADDR_START (IP_START + 12)
37 #define TCP_START (IP_START + 20)
38
39 /* IPV6 changes */
40 #define PROTOCOL_START_IPV6 (IP_START + 6)
41 #define SRC_ADDR_START_IPV6 (IP_START + 8)
42 #define TCP_START_IPV6 (IP_START + 40)
43
44 #define TCP_PROTOCOL 6
45 #define UDP_PROTOCOL 17
46 #define TCP_FW_IPV4_KEY_SIZE 16
47
48 #define TCP_FW_IPV6_KEY_SIZE 40
49
50 #define IPv4_HEADER_SIZE 20
51 #define IPv6_HEADER_SIZE 40
52
53 #define IP_VERSION_4 4
54 #define IP_VERSION_6 6
55 static void
56 rte_ct_cnxn_tracker_batch_lookup_basic_type(
57         struct rte_ct_cnxn_tracker *ct,
58         struct rte_mbuf **pkts,
59         uint64_t *pkts_mask,
60         uint64_t no_new_cnxn_mask,
61         uint64_t *reply_pkt_mask,
62         uint64_t *hijack_mask,
63         uint8_t ip_hdr_size_bytes);
64
65 /*
66  * Check if the packet is valid for the given connection. "original_direction"
67  * is false if the address order need to be "flipped".See create_cnxn_hashkey().
68  * True otherwise. Return 0 if the packet is valid, or a negative otherwise.
69  */
70
71 /* IP/TCP header print for debugging */
72 static void
73 rte_ct_cnxn_print_pkt(struct rte_mbuf *pkt, uint8_t type)
74 {
75         int i;
76         uint8_t *rd = RTE_MBUF_METADATA_UINT8_PTR(pkt, IP_START);
77
78         printf("\n");
79         printf("IP and TCP/UDP headers:\n");
80
81         if (type == IP_VERSION_4) {
82                 for (i = 0; i < 40; i++) {
83                         printf("%02x ", rd[i]);
84                         if ((i & 3) == 3)
85                                 printf("\n");
86                 }
87                 printf("\n");
88         }
89
90         if (type == IP_VERSION_6) {
91                 for (i = 0; i < 60; i++) {
92                         printf("%02x ", rd[i]);
93                         if ((i & 3) == 3)
94                                 printf("\n");
95                 }
96                 printf("\n");
97         }
98
99 }
100
101 static void
102 rte_cnxn_ip_type(uint8_t *type, struct rte_mbuf *pkt)
103 {
104
105         int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
106
107         if (ip_hdr_size_bytes == IPv4_HEADER_SIZE)
108                 *type = IP_VERSION_4;
109
110         if (ip_hdr_size_bytes == IPv6_HEADER_SIZE)
111                 *type = IP_VERSION_6;
112 }
113
114 static void
115 rte_ct_print_hashkey(uint32_t *key)
116 {
117         printf("Key: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x \\\n",
118                                  key[0], key[1], key[2], key[3],
119                                  key[4], key[5], key[6], key[7], key[8], key[9]);
120 }
121
122 /*
123  * Create a hash key consisting of the source address/port, the destination
124  * address/ports, and the tcp protocol number. The address/port combos are
125  * treated as two 48 bit numbers and sorted. Thus the key is always the
126  * same regardless of the direction of the packet. Remembering if the numbers
127  * were "flipped" from the order in the packet, and comparing that to whether
128  * the original hash key was flipped, tells if this packet is from the same
129  * direction as the original sender or the response direction. Returns 1 (true)
130  * if the key was left in the original direction.
131  */
132 uint8_t
133 rte_ct_create_cnxn_hashkey(
134         uint32_t *src_addr,
135         uint32_t *dst_addr,
136         uint16_t src_port,
137         uint16_t dst_port,
138         uint8_t proto,
139         uint32_t *key,
140         uint8_t type)
141 {
142         uint8_t hash_order_original_direction = 1;
143
144         key[9] = proto;
145
146         if (type == IP_VERSION_4) {
147                 uint32_t source = *src_addr;
148                 uint32_t dest = *dst_addr;
149
150                 key[3] = key[4] = key[5] = key[6] = key[7] = key[8] = 0;
151
152                 if ((source < dest)
153                                 || ((source == dest) && (src_port < dst_port))) {
154                         key[0] = source;
155                         key[1] = dest;
156                         key[2] = (src_port << 16) | dst_port;
157                 } else {
158                         key[0] = dest;
159                         key[1] = source;
160                         key[2] = (dst_port << 16) | src_port;
161                         hash_order_original_direction = 0;
162                 }
163         }
164
165         if (type == IP_VERSION_6) {
166                 int ip_cmp = memcmp(src_addr, dst_addr, 16);
167                 uint32_t *lo_addr;
168                 uint32_t *hi_addr;
169
170                 if ((ip_cmp < 0) || ((ip_cmp == 0) && (src_port < dst_port))) {
171                         lo_addr = src_addr;
172                         hi_addr = dst_addr;
173                         key[8] = (src_port << 16) | dst_port;
174                 } else {
175                         lo_addr = dst_addr;
176                         hi_addr = src_addr;
177                         key[8] = (dst_port << 16) | src_port;
178                         hash_order_original_direction = 0;
179                 }
180                 key[0] = lo_addr[0];
181                 key[1] = lo_addr[1];
182                 key[2] = lo_addr[2];
183                 key[3] = lo_addr[3];
184                 key[4] = hi_addr[0];
185                 key[5] = hi_addr[1];
186                 key[6] = hi_addr[2];
187                 key[7] = hi_addr[3];
188
189         }
190 #ifdef ALGDBG
191          rte_ct_print_hashkey(key);
192 #endif
193         return hash_order_original_direction;
194 }
195
196
197 int
198 rte_ct_get_IP_hdr_size(struct rte_mbuf *pkt)
199 {
200         /* NOTE: Only supporting IP headers with no options at this time, so
201          * header is fixed size
202          */
203         /* TODO: Need to find defined contstants for start of Ether and
204          * IP headers.
205          */
206         uint8_t hdr_chk = RTE_MBUF_METADATA_UINT8(pkt, IP_START);
207
208         hdr_chk = hdr_chk >> 4;
209
210         if (hdr_chk == IP_VERSION_4)
211                 return IPv4_HEADER_SIZE;
212
213         else if (hdr_chk == IP_VERSION_6)
214                 return IPv6_HEADER_SIZE;
215
216         else    /* Not IPv4 header with no options, return negative. */
217                 return -1;
218         /*
219          * int ip_hdr_size_bytes = (ihdr->version_ihl & IPV4_HDR_IHL_MASK) *
220          * IPV4_IHL_MULTIPLIER;
221          * return ip_hdr_size_bytes;
222          */
223 }
224
225 static void
226 rte_ct_set_timer_for_new_cnxn(
227                 struct rte_ct_cnxn_tracker *ct,
228                 struct rte_ct_cnxn_data *cd)
229 {
230         cd->state_used_for_timer = RTE_CT_TCP_NONE;
231         rte_ct_set_cnxn_timer_for_tcp(ct, cd, RTE_CT_TCP_SYN_SENT);
232 }
233
234 /*
235  * The connection data is stored in a hash table which makes use of the bulk
236  * lookup optimization provided in DPDK. All of the packets seen in one call
237  * to rte_ct_cnxn_tracker_batch_lookup are done in one hash table lookup. The
238  * number of packets is the number being processed by the pipeline (default
239  * max 32, absolute max 64). For any TCP or UDP packet that does not have
240  * an existing (pseudo-)connection in the table (i.e. was a miss on the hash
241  * lookup), a new connection must be added.
242  *
243  * It is possible, for UDP, that the first packet for a (pseudo-)connection and
244  * a subsequent packet are in the same batch. This means that when looking for
245  * new connections in a batch the first one must add the connection, the
246  * second and subsequent (in that batch) that are part of the same connection
247  * must use that newly created one, not create another table entry.
248  *
249  * Any newly created entries are "remembered" in linear table, which is search
250  * when processing hash tables misses. All the entries in that table are
251  * "forgotten" at the start of a new batch.
252  *
253  * A linear table may seem slow, but consider:
254  * - out of millions of packets/second, this involves at most 64.
255  * - this affects only UDP. TCP connections are set up using an acknowledgement
256  *   protocl, so would not have multiple packets for new connection in
257  *   same batch (TODO)
258  * - the number of new connections in a batch would usually be zero, or a low
259  *   number like 1
260  * - all the data to search through should still be in cache
261  */
262
263 static inline void
264 rte_ct_remember_new_connection(
265         struct rte_ct_cnxn_tracker *ct,
266         struct rte_ct_cnxn_data *entry)
267 {
268         ct->latest_connection++;
269         ct->new_connections[ct->latest_connection] = entry;
270 }
271
272 static struct rte_ct_cnxn_data *
273 rte_ct_search_new_connections(struct rte_ct_cnxn_tracker *ct, uint32_t *key)
274 {
275         int i;
276
277         for (i = 0; i <= ct->latest_connection; i++) {
278                 uint32_t *cnxn_key = ct->new_connections[i]->key;
279                 int key_cmp = memcmp(cnxn_key, key,
280                                 sizeof(ct->new_connections[i]->key));
281
282                 if (key_cmp == 0)
283                         return ct->new_connections[i];
284         }
285         return NULL;
286 }
287
288 static inline void rte_ct_forget_new_connections(struct rte_ct_cnxn_tracker *ct)
289 {
290         ct->latest_connection = -1;
291 }
292
293
294
295
296 static enum rte_ct_packet_action
297 rte_ct_handle_tcp_lookup(
298         struct  rte_ct_cnxn_tracker *ct,
299         struct  rte_mbuf *packet,
300         uint8_t pkt_num,
301         uint8_t key_is_client_order,
302         uint32_t *key,
303         int     hash_table_entry,
304         int     no_new_cnxn,
305         uint8_t ip_hdr_size_bytes)
306 {
307         struct rte_ct_cnxn_data new_cnxn_data;
308
309         memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
310         enum rte_ct_packet_action packet_action;
311
312         #ifdef CT_CGNAT
313         int32_t position = hash_table_entry;
314         ct->positions[pkt_num] = position;
315         #endif
316
317         /* rte_ct_cnxn_print_pkt(packet); */
318         if (hash_table_entry >= 0) {
319                 /*
320                  * connection found for this packet.
321                  * Check that this is a valid packet for connection
322                  */
323
324                 struct rte_ct_cnxn_data *entry =
325                                 &ct->hash_table_entries[hash_table_entry];
326
327                 packet_action = rte_ct_verify_tcp_packet(ct, entry, packet,
328                                 key_is_client_order, ip_hdr_size_bytes);
329
330                 switch (packet_action) {
331
332                 case RTE_CT_FORWARD_PACKET:
333                         entry->counters.packets_forwarded++;
334                         break;
335
336                 case RTE_CT_DROP_PACKET:
337                         entry->counters.packets_dropped++;
338                         return RTE_CT_DROP_PACKET;
339
340                 case RTE_CT_REOPEN_CNXN_AND_FORWARD_PACKET:
341                         /* Entry already in hash table, just re-initialize */
342
343                         /* Don't use syproxy on re-init, since it
344                          * is a valid connection
345                          */
346
347                         if (rte_ct_tcp_new_connection(ct, &new_cnxn_data,
348                                                 packet, 0, ip_hdr_size_bytes) !=
349                                         RTE_CT_DROP_PACKET) {
350                                 rte_memcpy(&entry->ct_protocol.tcp_ct_data,
351                                 &new_cnxn_data.ct_protocol.tcp_ct_data,
352                                 sizeof(new_cnxn_data.ct_protocol.tcp_ct_data));
353                                 rte_ct_set_timer_for_new_cnxn(ct, entry);
354                                 if (ct->counters->sessions_reactivated > 0)
355                                         ct->counters->sessions_reactivated--;
356                         }
357
358                         break;
359
360                 case RTE_CT_SEND_SERVER_SYN:
361                         ct->counters->pkts_forwarded++;
362                         /* packet modified, send back to original source */
363                         return RTE_CT_SEND_SERVER_SYN;
364
365                 case RTE_CT_SEND_SERVER_ACK:
366                         ct->counters->pkts_forwarded++;
367                         /* packet modified, send back to original source */
368                         return RTE_CT_SEND_SERVER_ACK;
369
370                 case RTE_CT_HIJACK:
371                         ct->counters->pkts_forwarded++;
372                         /* packet saved with connection, notify VNF
373                          * to hijack it
374                          */
375                         return RTE_CT_HIJACK;
376
377                 case RTE_CT_DESTROY_CNXN_AND_FORWARD_PACKET:
378
379                         /*
380                          * Forward the packet because it is "legal", but destroy
381                          * the connection by removing it from the hash table and
382                          * cancelling any timer. There is a remote possibility
383                          * (perhaps impossible?) that a later packet in the same
384                          * batch is for this connection. Due to the batch
385                          * lookup, which has already happened, the later packet
386                          * thinks that the connection is valid. This might cause
387                          * a timer to be set. Eventually, it would time out so
388                          * the only bug case occurs if the hash table also, in
389                          * the same batch, allocates this entry for a new
390                          * connection before the above packet is received. The
391                          * chances of this happening seem impossibly small but
392                          * this case should perhaps be investigated further.
393                          */
394
395                         if (rte_hash_del_key(ct->rhash, entry->key) >= 0) {
396                                 /*
397                                  * if rte_hash_del_key >= 0, then the connection
398                                  * was found in the hash table and removed.
399                                  * Counters must be updated, and the timer
400                                  * cancelled. If the result was < 0, then the
401                                  * connection must have already been deleted,
402                                  * and it must have been deleted in this batch
403                                  * of packets processed. Do nothing.
404                                  */
405
406                                 ct->counters->sessions_closed++;
407                                 if (ct->counters->current_active_sessions > 0)
408                                         ct->counters->current_active_sessions--;
409                                 rte_ct_cancel_cnxn_timer(entry);
410                         }
411                         entry->counters.packets_forwarded++;
412                         break;
413
414                 default:
415                         break;
416                 }
417         } else {
418                 /* try to add new connection */
419                 struct rte_ct_cnxn_data *new_hash_entry;
420
421                 if (no_new_cnxn) {
422                         ct->counters->pkts_drop_invalid_conn++;
423                         return RTE_CT_DROP_PACKET;
424                 }
425
426                 packet_action = rte_ct_tcp_new_connection(ct, &new_cnxn_data,
427                                 packet, ct->misc_options.synproxy_enabled,
428                                 ip_hdr_size_bytes);
429
430                 if (unlikely(packet_action == RTE_CT_DROP_PACKET)) {
431                         ct->counters->pkts_drop_invalid_conn++;
432                         return RTE_CT_DROP_PACKET;
433                 }
434
435                 /* This packet creates a connection . */
436                 int32_t position = rte_hash_add_key(ct->rhash, key);
437                 if (position < 0) {
438                         printf
439                                         ("Failed to add new connection to hash table %d, pkt_num:%d\n",
440                                          position, pkt_num);
441                         return RTE_CT_DROP_PACKET;
442                 }
443         #ifdef CT_CGNAT
444         ct->positions[pkt_num] = position;
445         #endif
446                 new_hash_entry = &ct->hash_table_entries[position];
447
448                 /* update fields in new_cnxn_data not set by new_connection */
449
450                 memcpy(new_cnxn_data.key, key, sizeof(new_cnxn_data.key));
451                 new_cnxn_data.key_is_client_order = key_is_client_order;
452                 new_cnxn_data.protocol = TCP_PROTOCOL;
453                 rte_cnxn_ip_type(&new_cnxn_data.type, packet);
454                 rte_memcpy(new_hash_entry, &new_cnxn_data,
455                                 sizeof(struct rte_ct_cnxn_data));
456                 new_hash_entry->counters.packets_forwarded = 1;
457                 new_hash_entry->counters.packets_dropped = 0;
458                 ct->counters->current_active_sessions++;
459                 ct->counters->sessions_activated++;
460
461                 if (packet_action == RTE_CT_SEND_CLIENT_SYNACK) {
462                         /* this is a synproxied connecton */
463                         /* must remember mss, window scaling etc. from client */
464
465                         rte_sp_parse_options(packet, new_hash_entry);
466
467                         /*
468                          * update packet to a SYN/ACK directed to the client,
469                          * including default header options
470                          */
471
472                         rte_sp_cvt_to_spoofed_client_synack(new_hash_entry,
473                                         packet);
474
475                         /*
476                          * run updated packet through connection tracking so
477                          * cnxn data updated appropriately and timer set for syn
478                          * received state, not syn sent.
479                          */
480                         packet_action = rte_ct_verify_tcp_packet(ct,
481                                         new_hash_entry, packet,
482                                         !key_is_client_order,
483                                         ip_hdr_size_bytes);
484
485                         if (unlikely(packet_action != RTE_CT_FORWARD_PACKET)) {
486                                 /* should never get here */
487                                 printf("Serious error in synproxy generating ");
488                                 printf("SYN/ACK\n");
489                                 return RTE_CT_DROP_PACKET;
490                         }
491                         ct->counters->pkts_forwarded++;
492                         /* spoofed packet good to go */
493                         return RTE_CT_SEND_CLIENT_SYNACK;
494                 }
495                 rte_ct_set_timer_for_new_cnxn(ct, new_hash_entry);
496
497         }
498
499         /* TODO: is it possible that earlier packet in this batch caused new
500          * entry to be added for the connection? Seems unlikely, since it
501          * would require multiple packets from the same side of the connection
502          * one after another immediately, and the TCP connection OPEN requires
503          * acknowledgement before further packets. What about simultaneous
504          * OPEN? Only if both sides are on same input port. Is that possible?
505          */
506         /* if made it here, packet will be forwarded */
507         ct->counters->pkts_forwarded++;
508         return RTE_CT_FORWARD_PACKET;
509 }
510
511 static uint64_t
512 rte_ct_cnxn_tracker_batch_lookup_basic(
513         struct rte_ct_cnxn_tracker *ct,
514         struct rte_mbuf **pkts,
515         uint64_t pkts_mask,
516         uint64_t no_new_cnxn_mask,
517         uint64_t *reply_pkt_mask,
518         uint64_t *hijack_mask)
519 {
520         /* bitmap of packets left to process */
521         uint64_t pkts_to_process = pkts_mask;
522         /* bitmap of valid packets to return */
523         uint64_t valid_packets = pkts_mask;
524         uint8_t compacting_map[RTE_HASH_LOOKUP_BULK_MAX];
525         /* for pkt, key in originators direction? */
526         uint8_t key_orig_dir[RTE_HASH_LOOKUP_BULK_MAX];
527         uint32_t packets_for_lookup = 0;
528         int32_t positions[RTE_HASH_LOOKUP_BULK_MAX];
529         uint32_t i;
530         struct rte_ct_cnxn_data new_cnxn_data;
531
532         if (CNXN_TRX_DEBUG > 1) {
533                 printf("Enter cnxn tracker %p", ct);
534                 printf(" synproxy batch lookup with packet mask %p\n",
535                                 (void *)pkts_mask);
536         }
537
538         rte_ct_forget_new_connections(ct);
539         *reply_pkt_mask = 0;
540         *hijack_mask = 0;
541
542         /*
543          * Use bulk lookup into hash table for performance reasons. Cannot have
544          * "empty slots" in the bulk lookup,so need to create a compacted table.
545          */
546
547         for (; pkts_to_process;) {
548                 uint8_t pos = (uint8_t) __builtin_ctzll(pkts_to_process);
549                 /* bitmask representing only this packet */
550                 uint64_t pkt_mask = 1LLU << pos;
551                 /* remove this packet from remaining list */
552                 pkts_to_process &= ~pkt_mask;
553
554                 struct rte_mbuf *pkt = pkts[pos];
555
556                 int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
557
558                 if (unlikely(ip_hdr_size_bytes < 0)) {
559                         /* Not IPv4, ignore. */
560                         continue;
561                 }
562
563                 void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
564
565                 /* TCP and UDP ports at same offset, just use TCP for
566                  * offset calculation
567                  */
568                 struct tcp_hdr *thdr =
569                         (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
570                                         (IP_START + ip_hdr_size_bytes));
571                 uint16_t src_port = rte_bswap16(thdr->src_port);
572                 uint16_t dst_port = rte_bswap16(thdr->dst_port);
573
574                 if (ip_hdr_size_bytes == IPv4_HEADER_SIZE) {
575                         struct ipv4_hdr *ihdr = (struct ipv4_hdr *)ip_hdr;
576                         uint8_t proto = ihdr->next_proto_id;
577
578                         if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
579                                 /* only tracking TCP and UDP at this time */
580                                 continue;
581                         }
582
583                         /*
584                          * Load the addresses and ports, and convert from Intel
585                          * to network byte order. Strictly speaking, it is not
586                          * necessary to do this conversion, as this data is only
587                          * used to create a hash key.
588                          */
589                         uint32_t src_addr = rte_bswap32(ihdr->src_addr);
590                         uint32_t dst_addr = rte_bswap32(ihdr->dst_addr);
591
592                         if (CNXN_TRX_DEBUG > 2) {
593                                 if (CNXN_TRX_DEBUG > 4)
594                                         rte_ct_cnxn_print_pkt(pkt,
595                                                         IP_VERSION_4);
596                         }
597                         /* need to create compacted table of pointers to pass
598                          * to bulk lookup
599                          */
600
601                         compacting_map[packets_for_lookup] = pos;
602                         key_orig_dir[packets_for_lookup] =
603                                 rte_ct_create_cnxn_hashkey(&src_addr, &dst_addr,
604                                                 src_port, dst_port,
605                                                 proto,
606                                                 &ct->hash_keys
607                                                 [packets_for_lookup][0],
608                                                 IP_VERSION_4);
609                         packets_for_lookup++;
610                 }
611
612                 if (ip_hdr_size_bytes == IPv6_HEADER_SIZE) {
613                         struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
614                         uint8_t proto = ihdr->proto;
615
616                         if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
617                                 /* only tracking TCP and UDP at this time */
618                                 continue;
619                         }
620
621                         if (CNXN_TRX_DEBUG > 2) {
622                                 if (CNXN_TRX_DEBUG > 4)
623                                         rte_ct_cnxn_print_pkt(pkt,
624                                                         IP_VERSION_6);
625                         }
626
627                         /* need to create compacted table of pointers to pass
628                          * to bulk lookup
629                          */
630
631                         compacting_map[packets_for_lookup] = pos;
632                         key_orig_dir[packets_for_lookup] =
633                                 rte_ct_create_cnxn_hashkey(
634                                                 (uint32_t *) ihdr->src_addr,
635                                                 (uint32_t *) ihdr->dst_addr,
636                                                 src_port, dst_port,
637                                                 proto,
638                                                 &ct->hash_keys
639                                                 [packets_for_lookup][0],
640                                                 IP_VERSION_6);
641                         packets_for_lookup++;
642                 }
643
644         }
645
646         if (unlikely(packets_for_lookup == 0))
647                 return valid_packets;   /* no suitable packet for lookup */
648
649         /* Clear all the data to make sure no stack garbage is in it */
650         memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
651
652         /* lookup all tcp & udp packets in the connection table */
653
654         int lookup_result =
655                         rte_hash_lookup_bulk(ct->rhash, (const void **)&ct->hash_key_ptrs,
656                                  packets_for_lookup, &positions[0]);
657
658         if (unlikely(lookup_result < 0)) {
659                 /* TODO: change a log */
660                 printf("Unexpected hash table problem, discarding all packets");
661                 return 0;       /* unknown error, just discard all packets */
662         }
663 #ifdef ALGDBG
664         for (i = 0; i < packets_for_lookup; i++) {
665                 if (positions[i] >= 0)
666                 printf("@CT positions[i]= %d, compacting_map[i]= %d\n",
667                         positions[i], compacting_map[i]);
668         }
669 #endif
670         for (i = 0; i < packets_for_lookup; i++) {
671                 /* index into hash table entries */
672                 int hash_table_entry = positions[i];
673                 /* index into packet table of this packet */
674                 uint8_t pkt_index = compacting_map[i];
675                 /* bitmask representing only this packet */
676                 uint64_t pkt_mask = 1LLU << pkt_index;
677                 uint8_t key_is_client_order = key_orig_dir[i];
678                 uint32_t *key = ct->hash_key_ptrs[pkt_index];
679                 uint8_t protocol = *(key + 9);
680                 struct rte_mbuf *packet = pkts[pkt_index];
681                 int no_new_cnxn = (pkt_mask & no_new_cnxn_mask) != 0;
682
683                  /* rte_ct_print_hashkey(key); */
684
685                 if (protocol == TCP_PROTOCOL) {
686                         enum rte_ct_packet_action tcp_pkt_action;
687
688                         int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(packet);
689                         tcp_pkt_action = rte_ct_handle_tcp_lookup(ct, packet,
690                                         pkt_index, key_is_client_order,
691                                         key, hash_table_entry, no_new_cnxn,
692                                         ip_hdr_size_bytes);
693
694                         switch (tcp_pkt_action) {
695
696                         case RTE_CT_SEND_CLIENT_SYNACK:
697                         case RTE_CT_SEND_SERVER_ACK:
698                                 /* altered packet or copy must be returned
699                                  * to originator
700                                  */
701                                 *reply_pkt_mask |= pkt_mask;
702                                 /* FALL-THROUGH */
703
704                         case RTE_CT_SEND_SERVER_SYN:
705                         case RTE_CT_FORWARD_PACKET:
706                                 break;
707
708                         case RTE_CT_HIJACK:
709                                 *hijack_mask |= pkt_mask;
710                                 break;
711
712                         default:
713                                 /* bad packet, clear mask to drop */
714                                 valid_packets ^= pkt_mask;
715                                 ct->counters->pkts_drop++;
716                                 break;
717                         }
718
719                         /* rte_ct_cnxn_print_pkt(pkts[pkt_index]); */
720                 } else {        /* UDP entry */
721
722                         if (hash_table_entry >= 0) {
723                                 /*
724                                  * connection found for this packet. Check that
725                                  * this is a valid packet for connection
726                                  */
727
728                                 struct rte_ct_cnxn_data *entry =
729                                                 &ct->hash_table_entries[hash_table_entry];
730
731                                 if (rte_ct_udp_packet
732                                                 (ct, entry, pkts[pkt_index],
733                                                  key_is_client_order)) {
734                                         entry->counters.packets_forwarded++;
735                                         ct->counters->pkts_forwarded++;
736                                 }
737                         } else {
738                                 /*
739                                  * connection not found in bulk hash lookup,
740                                  * but might have been added in this batch
741                                  */
742
743                                 struct rte_ct_cnxn_data *recent_entry =
744                                                 rte_ct_search_new_connections(ct, key);
745
746                                 if (recent_entry != NULL) {
747                                         if (rte_ct_udp_packet(ct, recent_entry,
748                                                         pkts[pkt_index],
749                                                         key_is_client_order)) {
750                                                 recent_entry->counters.
751                                                         packets_forwarded++;
752                                                 ct->counters->pkts_forwarded++;
753                                         }
754                                 } else {
755                                         /* no existing connection, try to add
756                                          * new one
757                                          */
758
759                                         if (no_new_cnxn) {
760                                                 /* new cnxn not allowed, clear
761                                                  * mask to drop
762                                                  */
763                                                 valid_packets ^= pkt_mask;
764                                                 ct->counters->pkts_drop++;
765                                                 ct->counters->
766                                                 pkts_drop_invalid_conn++;
767                                                 continue;
768                                         }
769
770                                         if (rte_ct_udp_new_connection(ct,
771                                                         &new_cnxn_data,
772                                                         pkts[pkt_index])) {
773                                                 /* This packet creates a
774                                                  * connection .
775                                                  */
776                                                 int32_t position =
777                                                         rte_hash_add_key(
778                                                                 ct->rhash, key);
779
780                                         if (position < 0)
781                                                 continue;
782
783                                                 struct rte_ct_cnxn_data
784                                                         *new_hash_entry = &ct->
785                                                 hash_table_entries[position];
786
787                                                 /*
788                                                  *update fields in new_cnxn_data
789                                                  * not set by "new_connection"
790                                                  */
791
792                                                 memcpy(new_cnxn_data.key, key,
793                                                 sizeof(new_cnxn_data.key));
794
795                                                 new_cnxn_data.
796                                                         key_is_client_order
797                                                         = key_is_client_order;
798                                                 new_cnxn_data.protocol =
799                                                         UDP_PROTOCOL;
800                                                 rte_cnxn_ip_type(
801                                                         &new_cnxn_data.type,
802                                                         packet);
803                                                 rte_memcpy(new_hash_entry,
804                                                         &new_cnxn_data,
805                                                         sizeof(struct
806                                                         rte_ct_cnxn_data));
807
808                                                 new_hash_entry->counters.
809                                                         packets_forwarded = 1;
810                                                 ct->counters->pkts_forwarded++;
811                                                 new_hash_entry->counters.
812                                                         packets_dropped = 0;
813                                                 ct->counters->pkts_drop = 0;
814                                                 ct->counters->
815                                                 current_active_sessions++;
816                                                 ct->counters->
817                                                         sessions_activated++;
818
819                                                 new_hash_entry->
820                                                         state_used_for_timer
821                                                         = RTE_CT_UDP_NONE;
822                                                 rte_ct_set_cnxn_timer_for_udp(
823                                                         ct,
824                                                         new_hash_entry,
825                                                         RTE_CT_UDP_UNREPLIED);
826
827                                                 rte_ct_remember_new_connection(
828                                                                 ct,
829                                                                 new_hash_entry);
830                                         }
831                                 }
832
833                         }
834
835                 }               /* UDP */
836         }                       /* packets_for_lookup */
837
838         if (CNXN_TRX_DEBUG > 1) {
839                 printf("Exit cnxn tracker synproxy batch lookup with");
840                 printf(" packet mask %p\n", (void *)valid_packets);
841         }
842
843         return valid_packets;
844 }
845
846 uint64_t
847 rte_ct_cnxn_tracker_batch_lookup_with_synproxy(
848         struct rte_ct_cnxn_tracker *ct,
849         struct rte_mbuf **pkts,
850         uint64_t pkts_mask,
851         struct rte_synproxy_helper *sp_helper)
852 {
853         return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask, 0,
854                         &sp_helper->reply_pkt_mask, &sp_helper->hijack_mask);
855 }
856 #ifdef CT_CGNAT
857 uint64_t cgnapt_ct_process(
858         struct rte_ct_cnxn_tracker *ct,
859         struct rte_mbuf **pkts,
860         uint64_t pkts_mask,
861         struct rte_CT_helper *ct_helper)
862 {
863 /* to disable SynProxy for CGNAT */
864         rte_ct_disable_synproxy(ct);
865         return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
866                                         ct_helper->no_new_cnxn_mask,
867                                         &ct_helper->reply_pkt_mask,
868                                         &ct_helper->hijack_mask);
869 }
870 #endif/*CT-CGNAT*/
871 uint64_t
872 rte_ct_cnxn_tracker_batch_lookup(
873         struct rte_ct_cnxn_tracker *ct,
874         struct rte_mbuf **pkts,
875         uint64_t pkts_mask,
876         struct rte_CT_helper *ct_helper)
877 {
878
879         return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
880                         ct_helper->no_new_cnxn_mask,
881                         &ct_helper->reply_pkt_mask, &ct_helper->hijack_mask);
882 }
883
884
885 void rte_ct_cnxn_tracker_batch_lookup_type(
886         struct rte_ct_cnxn_tracker *ct,
887         struct rte_mbuf **pkts,
888         uint64_t *pkts_mask,
889         struct rte_CT_helper *ct_helper,
890         uint8_t ip_hdr_size_bytes)
891 {
892
893         rte_ct_cnxn_tracker_batch_lookup_basic_type(ct, pkts, pkts_mask,
894                         ct_helper->no_new_cnxn_mask,
895                         &ct_helper->reply_pkt_mask, &ct_helper->hijack_mask,
896                         ip_hdr_size_bytes);
897 }
898
899
900
901 uint64_t
902 rte_ct_cnxn_tracker_batch_lookup_with_new_cnxn_control(
903         struct rte_ct_cnxn_tracker *ct,
904         struct rte_mbuf **pkts,
905         uint64_t pkts_mask,
906         uint64_t no_new_cnxn_mask)
907 {
908         uint64_t dont_care;
909
910         return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
911                         no_new_cnxn_mask,
912                         &dont_care, &dont_care);
913 }
914
915
916 int
917 rte_ct_initialize_default_timeouts(struct rte_ct_cnxn_tracker *new_cnxn_tracker)
918 {
919
920         /* timer system init */
921
922         uint64_t hertz = rte_get_tsc_hz();
923
924         new_cnxn_tracker->hertz = hertz;
925         new_cnxn_tracker->timing_cycles_per_timing_step = hertz / 10;
926         new_cnxn_tracker->timing_100ms_steps_previous = 0;
927         new_cnxn_tracker->timing_100ms_steps = 0;
928         new_cnxn_tracker->timing_last_time = rte_get_tsc_cycles();
929
930         /* timeouts in seconds */
931         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
932                 [RTE_CT_TCP_SYN_SENT] = 120 * hertz;
933         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
934                 [RTE_CT_TCP_SYN_RECV] = 60 * hertz;
935         /* 5 * DAYS */
936         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
937                 [RTE_CT_TCP_ESTABLISHED] = 60 * 60 * 24 * 5 * hertz;
938
939         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
940                 [RTE_CT_TCP_FIN_WAIT] = 120 * hertz;
941         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
942                 [RTE_CT_TCP_CLOSE_WAIT] = 60 * hertz;
943         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
944                 [RTE_CT_TCP_LAST_ACK] = 30 * hertz;
945         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
946                 [RTE_CT_TCP_TIME_WAIT] = 120 * hertz;
947         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
948                 [RTE_CT_TCP_CLOSE] = 10 * hertz;
949         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
950                 [RTE_CT_TCP_SYN_SENT_2] = 120 * hertz;
951         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
952                 [RTE_CT_TCP_RETRANS] = 300 * hertz;
953         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
954                 [RTE_CT_TCP_UNACK] = 300 * hertz;
955
956         new_cnxn_tracker->ct_timeout.udptimeout.udp_timeouts
957                 [RTE_CT_UDP_UNREPLIED] = 30 * hertz;
958         new_cnxn_tracker->ct_timeout.udptimeout.udp_timeouts
959                 [RTE_CT_UDP_REPLIED] = 180 * hertz;
960         /* miscellaneous init */
961         new_cnxn_tracker->misc_options.tcp_max_retrans =
962                 RTE_CT_TCP_MAX_RETRANS;
963         new_cnxn_tracker->misc_options.tcp_loose = 0;
964         new_cnxn_tracker->misc_options.tcp_be_liberal = 0;
965 #ifdef CT_CGNAT
966         int i;
967         for (i=0; i < RTE_HASH_LOOKUP_BULK_MAX ;i ++ )
968                         new_cnxn_tracker->positions[i] = -1;
969 #endif
970
971         return 0;
972 }
973
974 struct rte_CT_counter_block rte_CT_counter_table[MAX_CT_INSTANCES]
975 __rte_cache_aligned;
976 int rte_CT_hi_counter_block_in_use = -1;
977
978 int
979 rte_ct_initialize_cnxn_tracker_with_synproxy(
980         struct rte_ct_cnxn_tracker *new_cnxn_tracker,
981         uint32_t max_connection_count,
982         char *name,
983         uint16_t pointer_offset)
984 {
985         uint32_t i;
986         uint32_t size;
987         struct rte_CT_counter_block *counter_ptr;
988         /*
989          * TODO: Should number of entries be something like
990          * max_connection_count * 1.1 to allow for unused space
991          * and thus increased performance of hash table, at a cost of memory???
992          */
993
994         new_cnxn_tracker->pointer_offset = pointer_offset;
995
996         memset(new_cnxn_tracker->name, '\0', sizeof(new_cnxn_tracker->name));
997         strncpy(new_cnxn_tracker->name, name, strlen(new_cnxn_tracker->name));
998         //strcpy(new_cnxn_tracker->name, name);
999         /* + (max_connection_count >> 3); */
1000         uint32_t number_of_entries = max_connection_count;
1001
1002         size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_ct_cnxn_data) *
1003                         number_of_entries);
1004         new_cnxn_tracker->hash_table_entries =
1005                 rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
1006         if (new_cnxn_tracker->hash_table_entries == NULL) {
1007                 printf(" Not enough memory, or invalid arguments\n");
1008                 return -1;
1009         }
1010         new_cnxn_tracker->num_cnxn_entries = number_of_entries;
1011
1012         /* initialize all timers */
1013
1014         for (i = 0; i < number_of_entries; i++)
1015                 rte_timer_init(&new_cnxn_tracker->hash_table_entries[i].timer);
1016
1017         /* pointers for temp storage used during bulk hash */
1018         for (i = 0; i < RTE_HASH_LOOKUP_BULK_MAX; i++)
1019                 new_cnxn_tracker->hash_key_ptrs[i] =
1020                                 &new_cnxn_tracker->hash_keys[i][0];
1021
1022         /*
1023          * Now allocate a counter block entry.It appears that the initialization
1024          * of these threads is serialized on core 0 so no lock is necessary
1025          */
1026
1027         if (rte_CT_hi_counter_block_in_use == MAX_CT_INSTANCES)
1028                 return -1;
1029
1030         rte_CT_hi_counter_block_in_use++;
1031         counter_ptr = &rte_CT_counter_table[rte_CT_hi_counter_block_in_use];
1032
1033         new_cnxn_tracker->counters = counter_ptr;
1034
1035         /* set up hash table parameters, then create hash table */
1036         struct rte_hash_parameters rhash_parms = {
1037                 .name = name,
1038                 .entries = number_of_entries,
1039                 .hash_func = NULL,      /* use default hash */
1040                 .key_len = 40,
1041                 .hash_func_init_val = 0,
1042                 .socket_id = rte_socket_id(),
1043                 .extra_flag = 1 /*This is needed for TSX memory*/
1044         };
1045
1046         new_cnxn_tracker->rhash = rte_hash_create(&rhash_parms);
1047
1048         return 0;
1049 }
1050
1051 int
1052 rte_ct_initialize_cnxn_tracker(
1053         struct rte_ct_cnxn_tracker *new_cnxn_tracker,
1054         uint32_t max_connection_count,
1055         char *name)
1056 {
1057         return rte_ct_initialize_cnxn_tracker_with_synproxy(new_cnxn_tracker,
1058                                 max_connection_count, name, 0);
1059 }
1060
1061 int
1062 rte_ct_free_cnxn_tracker_resources(struct rte_ct_cnxn_tracker *old_cnxn_tracker)
1063 {
1064         rte_free(old_cnxn_tracker->hash_table_entries);
1065         rte_hash_free(old_cnxn_tracker->rhash);
1066         return 0;
1067 }
1068
1069 int
1070 rte_ct_get_cnxn_tracker_size(void)
1071 {
1072         return sizeof(struct rte_ct_cnxn_tracker);
1073 }
1074
1075 void
1076 rte_ct_cnxn_timer_expired(struct rte_timer *rt, void *arg);
1077
1078 static void
1079 rte_ct_set_cnxn_timer(
1080         struct rte_ct_cnxn_tracker *ct,
1081         struct rte_ct_cnxn_data *cd,
1082         uint64_t ticks_until_timeout)
1083 {
1084         /*
1085          * pointer to cnxn_data will be stored in timer system as pointer to
1086          * rte_timer for later cast back to cnxn_data during timeout handling
1087          */
1088
1089         struct rte_timer *rt = (struct rte_timer *)cd;
1090         #ifdef CT_CGNAT
1091         /* execute timeout on timer core */
1092         uint32_t core_id = get_timer_core_id();
1093         #else
1094         /* execute timeout on current core */
1095         uint32_t core_id = rte_lcore_id();
1096         #endif
1097         /* safe to reset since timeouts handled synchronously
1098          * by rte_timer_manage
1099          */
1100         int success = rte_timer_reset(rt, ticks_until_timeout, SINGLE, core_id,
1101                         rte_ct_cnxn_timer_expired, ct);
1102
1103         if (success < 0) {
1104                 /* TODO: Change to log, perhaps something else?
1105                  * This should not happen
1106                  */
1107                 printf("CNXN_TRACKER: Failed to set connection timer.\n");
1108         }
1109 }
1110
1111 /*
1112  * For the given connection, set a timeout based on the given state. If the
1113 * timer is already set, this call will reset the timer with a new value.
1114  */
1115
1116 void
1117 rte_ct_set_cnxn_timer_for_tcp(
1118         struct rte_ct_cnxn_tracker *ct,
1119         struct rte_ct_cnxn_data *cd,
1120         uint8_t tcp_state)
1121 {
1122
1123         cd->expected_timeout =
1124                         (ct->timing_100ms_steps * ct->timing_cycles_per_timing_step) +
1125                         ct->ct_timeout.tcptimeout.tcp_timeouts[tcp_state];
1126
1127         if (tcp_state == cd->state_used_for_timer) {
1128                 /*
1129                  * Don't reset timer, too expensive. Instead, determine time
1130                  * elapsed since start of timer. When this timer expires, the
1131                  * timer will be reset to the elapsed timer. So if in a state
1132                  * with a 5 minute timer last sees a packet 4 minutes into the
1133                  * timer, the timer when expires will be reset to 4 minutes.
1134                  * This means the timer will then expire 5 minutes after
1135                  * the last packet.
1136                  */
1137                 return;
1138         }
1139
1140         if (TESTING_TIMERS)
1141                 printf("Set Timer for connection %p and state %s\n", cd,
1142                                          rte_ct_tcp_names[tcp_state]);
1143
1144         rte_ct_set_cnxn_timer(ct, cd,
1145                                                 ct->ct_timeout.
1146                                                 tcptimeout.tcp_timeouts[tcp_state]);
1147         cd->state_used_for_timer = tcp_state;
1148 }
1149
1150 /*
1151  * For the given connection, set a timeout based on the given state.
1152  * If the timer is already set,
1153  * this call will reset the timer with a new value.
1154  */
1155
1156 void
1157 rte_ct_set_cnxn_timer_for_udp(
1158         struct rte_ct_cnxn_tracker *ct,
1159         struct rte_ct_cnxn_data *cd,
1160         uint8_t udp_state)
1161 {
1162
1163         cd->expected_timeout = (ct->timing_cycles_per_timing_step) +
1164                         ct->ct_timeout.udptimeout.udp_timeouts[udp_state];
1165
1166         if (udp_state == cd->state_used_for_timer) {
1167                 /*
1168                  * Don't reset timer, too expensive. Instead, determine time
1169                  * elapsed since start of timer. When this timer expires, the
1170                  * timer will be reset to the elapsed timer. So if in a state
1171                  * with a 5 minute timer last sees a packet 4 minutes into the
1172                  * timer, the timer when expires will be reset to 4 minutes.
1173                  * This means the timer will then
1174                  * expire 5 minutes after the last packet.
1175                  */
1176                 return;
1177         }
1178
1179         if (TESTING_TIMERS)
1180                 printf("Set Timer for connection %p and state %s\n", cd,
1181                                          rte_ct_udp_names[udp_state]);
1182         rte_ct_set_cnxn_timer(ct, cd,
1183                                                 ct->ct_timeout.
1184                                                 udptimeout.udp_timeouts[udp_state]);
1185         cd->state_used_for_timer = udp_state;
1186 }
1187
1188 /* Cancel the timer associated with the connection.
1189  * Safe to call if no timer set.
1190  */
1191         void
1192 rte_ct_cancel_cnxn_timer(struct rte_ct_cnxn_data *cd)
1193 {
1194         if (TESTING_TIMERS)
1195                 printf("Cancel Timer\n");
1196
1197         rte_timer_stop(&cd->timer);
1198 }
1199
1200 void
1201 rte_ct_handle_expired_timers(struct rte_ct_cnxn_tracker *ct)
1202 {
1203         /*
1204          * If current time (in 100 ms increments) is different from the
1205          * time it was last viewed, then check for and process expired timers.
1206          */
1207
1208         uint64_t new_time = rte_get_tsc_cycles();
1209         uint64_t time_diff = new_time - ct->timing_last_time;
1210
1211         if (time_diff >= ct->timing_cycles_per_timing_step) {
1212                 ct->timing_last_time = new_time;
1213                 ct->timing_100ms_steps++;
1214         }
1215
1216         if (ct->timing_100ms_steps != ct->timing_100ms_steps_previous) {
1217                 rte_timer_manage();
1218                 ct->timing_100ms_steps_previous = ct->timing_100ms_steps;
1219         }
1220 }
1221
1222 /* timer has expired. Need to delete connection entry */
1223
1224 void
1225 rte_ct_cnxn_timer_expired(struct rte_timer *rt, void *arg)
1226 {
1227         /* the pointer to the rte_timer was actually a pointer
1228          * to the cnxn data
1229          */
1230         struct rte_ct_cnxn_data *cd = (struct rte_ct_cnxn_data *)rt;
1231         struct rte_ct_cnxn_tracker *ct = (struct rte_ct_cnxn_tracker *)arg;
1232         int success = 0;
1233
1234         /*
1235          * Check to see if the timer has "really" expired. If traffic occured
1236          * since the timer was set, the timer needs be extended, so that timer
1237          * expires the appropriate amount after that last packet.
1238          */
1239
1240         uint64_t current_time = ct->timing_100ms_steps *
1241                 ct->timing_cycles_per_timing_step;
1242
1243         if (cd->expected_timeout >= current_time) {
1244                 uint64_t time_diff = cd->expected_timeout - current_time;
1245
1246                 rte_ct_set_cnxn_timer(ct, cd, time_diff);
1247                 return;
1248         }
1249
1250         if (cd->protocol == TCP_PROTOCOL) {
1251                 if (cd->state_used_for_timer == RTE_CT_TCP_TIME_WAIT ||
1252                                 cd->state_used_for_timer == RTE_CT_TCP_CLOSE)
1253                         ct->counters->sessions_closed++;
1254                 else
1255                         ct->counters->sessions_timedout++;
1256                 /* if synproxied connection, free list of buffered
1257                  * packets if any
1258                  */
1259
1260                 if (cd->ct_protocol.synproxy_data.synproxied)
1261                         rte_ct_release_buffered_packets(ct, cd);
1262
1263         } else if (cd->protocol == UDP_PROTOCOL)
1264                 ct->counters->sessions_closed++;
1265         if (ct->counters->current_active_sessions > 0)
1266                 ct->counters->current_active_sessions--;
1267
1268         if (RTE_CT_TIMER_EXPIRED_DUMP) {
1269                 uint64_t percent = (cd->counters.packets_dropped * 10000) /
1270                                 (cd->counters.packets_forwarded +
1271                                  cd->counters.packets_dropped);
1272
1273                 if (cd->protocol == TCP_PROTOCOL) {
1274                         printf("CnxnTrkr %s, timed-out TCP Connection: %p,",
1275                                         ct->name, cd);
1276                         printf(" %s, pkts forwarded %"
1277                                 PRIu64 ", pkts dropped %" PRIu64
1278                                 ", drop%% %u.%u\n",
1279                                 rte_ct_tcp_names[cd->state_used_for_timer],
1280                                 cd->counters.packets_forwarded,
1281                                 cd->counters.packets_dropped,
1282                                 (uint32_t) (percent / 100),
1283                                 (uint32_t) (percent % 100));
1284                 } else if (cd->protocol == UDP_PROTOCOL) {
1285                         printf("CnxnTrkr %s, Timed-out UDP Connection: %p,",
1286                                         ct->name, cd);
1287                         printf(" %s, pkts forwarded %" PRIu64
1288                                 ", pkts dropped %" PRIu64 ", drop%% %u.%u\n",
1289                                 rte_ct_udp_names[cd->state_used_for_timer],
1290                                 cd->counters.packets_forwarded,
1291                                 cd->counters.packets_dropped,
1292                                 (uint32_t) (percent / 100),
1293                                 (uint32_t) (percent % 100));
1294                 }
1295         }
1296
1297         success = rte_hash_del_key(ct->rhash, &cd->key);
1298
1299         if (success < 0) {
1300                 /* TODO: change to a log */
1301                 rte_ct_print_hashkey(cd->key);
1302         }
1303
1304 }
1305
1306 struct rte_CT_counter_block *
1307 rte_ct_get_counter_address(struct rte_ct_cnxn_tracker *ct)
1308 {
1309         return ct->counters;
1310 }
1311
1312 int
1313 rte_ct_set_configuration_options(struct rte_ct_cnxn_tracker *ct,
1314                 char *name, char *value)
1315 {
1316         /* check non-time values first */
1317         int ival = atoi(value);
1318
1319         /* tcp_loose */
1320         if (strcmp(name, "tcp_loose") == 0) {
1321                 ct->misc_options.tcp_loose = ival;
1322                 return 0;
1323         }
1324
1325         /* tcp_be_liberal */
1326         if (strcmp(name, "tcp_be_liberal") == 0) {
1327                 ct->misc_options.tcp_be_liberal = ival;
1328                 return 0;
1329         }
1330
1331         /* tcp_max_retrans */
1332         if (strcmp(name, "tcp_max_retrans") == 0) {
1333                 ct->misc_options.tcp_max_retrans = ival;
1334                 return 0;
1335         }
1336
1337         uint64_t time_value = ival * ct->hertz;
1338
1339
1340         /* configuration of timer values */
1341
1342         /* tcp_syn_sent */
1343         if (strcmp(name, "tcp_syn_sent") == 0) {
1344                 if (time_value == 0)
1345                         return -1;
1346                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_SENT] =
1347                         time_value;
1348                 return 0;
1349         }
1350
1351         /* tcp_syn_recv */
1352         if (strcmp(name, "tcp_syn_recv") == 0) {
1353                 if (time_value == 0)
1354                         return -1;
1355                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_RECV] =
1356                         time_value;
1357                 return 0;
1358         }
1359
1360         /* tcp_established */
1361         if (strcmp(name, "tcp_established") == 0) {
1362                 if (time_value == 0)
1363                         return -1;
1364                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_ESTABLISHED] =
1365                         time_value;
1366                 return 0;
1367         }
1368
1369         /* tcp_fin_wait */
1370         if (strcmp(name, "tcp_fin_wait") == 0) {
1371                 if (time_value == 0)
1372                         return -1;
1373                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_FIN_WAIT] =
1374                         time_value;
1375                 return 0;
1376         }
1377
1378         /* tcp_close_wait */
1379         if (strcmp(name, "tcp_close_wait") == 0) {
1380                 if (time_value == 0)
1381                         return -1;
1382                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_CLOSE_WAIT] =
1383                         time_value;
1384                 return 0;
1385         }
1386
1387         /* tcp_last_ack */
1388         if (strcmp(name, "tcp_last_ack") == 0) {
1389                 if (time_value == 0)
1390                         return -1;
1391                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_LAST_ACK] =
1392                         time_value;
1393                 return 0;
1394         }
1395
1396         /* tcp_time_wait */
1397         if (strcmp(name, "tcp_time_wait") == 0) {
1398                 if (time_value == 0)
1399                         return -1;
1400                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_TIME_WAIT] =
1401                         time_value;
1402                 return 0;
1403         }
1404
1405         /* tcp_close */
1406         if (strcmp(name, "tcp_close") == 0) {
1407                 if (time_value == 0)
1408                         return -1;
1409                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_CLOSE] =
1410                         time_value;
1411                 return 0;
1412         }
1413
1414         /* tcp_syn_sent_2 */
1415         if (strcmp(name, "tcp_syn_sent_2") == 0) {
1416                 if (time_value == 0)
1417                         return -1;
1418                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_SENT_2] =
1419                         time_value;
1420                 return 0;
1421         }
1422
1423         /* tcp_retrans */
1424         if (strcmp(name, "tcp_retrans") == 0) {
1425                 if (time_value == 0)
1426                         return -1;
1427                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_RETRANS] =
1428                         time_value;
1429                 return 0;
1430         }
1431
1432         /* tcp_unack */
1433         if (strcmp(name, "tcp_unack") == 0) {
1434                 if (time_value == 0)
1435                         return -1;
1436                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_UNACK] =
1437                         time_value;
1438                 return 0;
1439         }
1440
1441         /* udp_unreplied */
1442         if (strcmp(name, "udp_unreplied") == 0) {
1443                 if (time_value == 0)
1444                         return -1;
1445                 ct->ct_timeout.udptimeout.udp_timeouts[RTE_CT_UDP_UNREPLIED] =
1446                         time_value;
1447                 return 0;
1448         }
1449
1450         /* udp_replied */
1451         if (strcmp(name, "udp_replied") == 0) {
1452                 if (time_value == 0)
1453                         return -1;
1454                 ct->ct_timeout.udptimeout.udp_timeouts[RTE_CT_UDP_REPLIED] =
1455                         time_value;
1456                 return 0;
1457         }
1458         return 1;
1459 }
1460
1461 static void
1462 rte_ct_cnxn_tracker_batch_lookup_basic_type(
1463                 struct rte_ct_cnxn_tracker *ct,
1464                 struct rte_mbuf **pkts,
1465                 uint64_t *pkts_mask,
1466                 uint64_t no_new_cnxn_mask,
1467                 uint64_t *reply_pkt_mask,
1468                 uint64_t *hijack_mask,
1469                 uint8_t ip_hdr_size_bytes)
1470 {
1471         /* bitmap of packets left to process */
1472         uint64_t pkts_to_process = *pkts_mask;
1473         /* bitmap of valid packets to return */
1474         uint8_t compacting_map[RTE_HASH_LOOKUP_BULK_MAX];
1475         /* for pkt, key in originators direction? */
1476         uint8_t key_orig_dir[RTE_HASH_LOOKUP_BULK_MAX];
1477         uint32_t packets_for_lookup = 0;
1478         int32_t positions[RTE_HASH_LOOKUP_BULK_MAX];
1479         uint32_t i;
1480         struct rte_ct_cnxn_data new_cnxn_data;
1481         struct rte_ct_cnxn_data *cnxn_data_entry[RTE_HASH_LOOKUP_BULK_MAX];
1482
1483         rte_prefetch0(ct->hash_table_entries);
1484         rte_prefetch0(ct->rhash);
1485
1486         if (CNXN_TRX_DEBUG > 1) {
1487                 printf("Enter cnxn tracker %p", ct);
1488                 printf(" synproxy batch lookup with packet mask %p\n",
1489                                 (void *)*pkts_mask);
1490         }
1491
1492         rte_ct_forget_new_connections(ct);
1493         *reply_pkt_mask = 0;
1494         *hijack_mask = 0;
1495
1496         /*
1497          * Use bulk lookup into hash table for performance reasons. Cannot have
1498          * "empty slots" in the bulk lookup,so need to create a compacted table.
1499          */
1500
1501         switch (ip_hdr_size_bytes) {
1502         case IPv4_HEADER_SIZE:
1503                 for (; pkts_to_process;) {
1504                         uint8_t pos = (uint8_t) __builtin_ctzll(
1505                                         pkts_to_process);
1506                         /* bitmask representing only this packet */
1507                         uint64_t pkt_mask = 1LLU << pos;
1508                         /* remove this packet from remaining list */
1509                         pkts_to_process &= ~pkt_mask;
1510
1511                         struct rte_mbuf *pkt = pkts[pos];
1512
1513
1514                         /* TCP and UDP ports at same offset, just use TCP for
1515                          * offset calculation
1516                          */
1517                         struct tcp_hdr *thdr = (struct tcp_hdr *)
1518                                 RTE_MBUF_METADATA_UINT32_PTR(pkt,
1519                                                 (IP_START + ip_hdr_size_bytes));
1520                         uint16_t src_port = rte_bswap16(thdr->src_port);
1521                         uint16_t dst_port = rte_bswap16(thdr->dst_port);
1522
1523                         struct ipv4_hdr *ihdr = (struct ipv4_hdr *)
1524                                 RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
1525                         uint8_t proto = ihdr->next_proto_id;
1526
1527                         if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
1528                                 /* only tracking TCP and UDP at this time */
1529                                 continue;
1530                         }
1531
1532                         /*
1533                          * Load the addresses and ports, and convert from Intel
1534                          * to network byte order. Strictly speaking, it is not
1535                          * necessary to do this conversion, as this data is only
1536                          * used to create a hash key.
1537                          */
1538                         uint32_t src_addr = rte_bswap32(ihdr->src_addr);
1539                         uint32_t dst_addr = rte_bswap32(ihdr->dst_addr);
1540
1541                         if (CNXN_TRX_DEBUG > 2) {
1542                                 if (CNXN_TRX_DEBUG > 4)
1543                                         rte_ct_cnxn_print_pkt(pkt,
1544                                                         IP_VERSION_4);
1545                         }
1546                         /* need to create compacted table of pointers to pass
1547                          * to bulk lookup
1548                          */
1549
1550                         compacting_map[packets_for_lookup] = pos;
1551                         key_orig_dir[packets_for_lookup] =
1552                                 rte_ct_create_cnxn_hashkey(&src_addr, &dst_addr,
1553                                                 src_port, dst_port,
1554                                                 proto,
1555                                                 &ct->hash_keys
1556                                                 [packets_for_lookup][0],
1557                                                 IP_VERSION_4);
1558                         packets_for_lookup++;
1559                 }
1560                 break;
1561         case IPv6_HEADER_SIZE:
1562                 for (; pkts_to_process;) {
1563                         uint8_t pos = (uint8_t) __builtin_ctzll(
1564                                         pkts_to_process);
1565                         /* bitmask representing only this packet */
1566                         uint64_t pkt_mask = 1LLU << pos;
1567                         /* remove this packet from remaining list */
1568                         pkts_to_process &= ~pkt_mask;
1569
1570                         struct rte_mbuf *pkt = pkts[pos];
1571
1572
1573                         void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt,
1574                                         IP_START);
1575
1576                         /* TCP and UDP ports at same offset, just use TCP for
1577                          * offset calculation
1578                          */
1579                         struct tcp_hdr *thdr = (struct tcp_hdr *)
1580                                 RTE_MBUF_METADATA_UINT32_PTR(pkt,
1581                                                 (IP_START + ip_hdr_size_bytes));
1582                         uint16_t src_port = rte_bswap16(thdr->src_port);
1583                         uint16_t dst_port = rte_bswap16(thdr->dst_port);
1584
1585                         struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
1586                         uint8_t proto = ihdr->proto;
1587
1588                         if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
1589                                 /* only tracking TCP and UDP at this time */
1590                                 continue;
1591                         }
1592
1593                         if (CNXN_TRX_DEBUG > 2) {
1594                                 if (CNXN_TRX_DEBUG > 4)
1595                                         rte_ct_cnxn_print_pkt(pkt,
1596                                                         IP_VERSION_6);
1597                         }
1598
1599                         /* need to create compacted table of pointers to pass
1600                          * to bulk lookup
1601                          */
1602
1603                         compacting_map[packets_for_lookup] = pos;
1604                         key_orig_dir[packets_for_lookup] =
1605                                 rte_ct_create_cnxn_hashkey(
1606                                                 (uint32_t *) ihdr->src_addr,
1607                                                 (uint32_t *) ihdr->dst_addr,
1608                                                 src_port, dst_port,
1609                                                 proto,
1610                                                 &ct->hash_keys
1611                                                 [packets_for_lookup][0],
1612                                                 IP_VERSION_6);
1613                         packets_for_lookup++;
1614                 }
1615                 break;
1616         default:
1617                 break;
1618         }
1619         if (unlikely(packets_for_lookup == 0))
1620                 return; /* no suitable packet for lookup */
1621
1622         /* Clear all the data to make sure no stack garbage is in it */
1623         memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
1624
1625         /* lookup all tcp & udp packets in the connection table */
1626
1627         int lookup_result = rte_hash_lookup_bulk(ct->rhash,
1628                         (const void **)&ct->hash_key_ptrs,
1629                         packets_for_lookup, &positions[0]);
1630
1631         if (unlikely(lookup_result < 0)) {
1632                 /* TODO: change a log */
1633                 printf("Unexpected hash table problem, discarding all packets");
1634                 *pkts_mask = 0;
1635                 return; /* unknown error, just discard all packets */
1636         }
1637
1638         /* Pre-fetch hash table entries and counters to avoid LLC miss */
1639         rte_prefetch0(ct->counters);
1640         for (i = 0; i < packets_for_lookup; i++) {
1641                 struct rte_ct_cnxn_data *entry = NULL;
1642                 int hash_table_entry = positions[i];
1643
1644                 if (hash_table_entry >= 0) {
1645                         /* Entry found for existing UDP/TCP connection */
1646                         entry = &ct->hash_table_entries[hash_table_entry];
1647                         rte_prefetch0(&entry->counters.packets_forwarded);
1648                         rte_prefetch0(entry);
1649                         rte_prefetch0(&entry->key_is_client_order);
1650                 }
1651                 cnxn_data_entry[i] = entry;
1652         }
1653
1654         for (i = 0; i < packets_for_lookup; i++) {
1655                 /* index into hash table entries */
1656                 int hash_table_entry = positions[i];
1657                 /* index into packet table of this packet */
1658                 uint8_t pkt_index = compacting_map[i];
1659                 /* bitmask representing only this packet */
1660                 uint64_t pkt_mask = 1LLU << pkt_index;
1661                 uint8_t key_is_client_order = key_orig_dir[i];
1662                 uint32_t *key = ct->hash_key_ptrs[pkt_index];
1663                 uint8_t protocol = *(key + 9);
1664                 struct rte_mbuf *packet = pkts[pkt_index];
1665                 int no_new_cnxn = (pkt_mask & no_new_cnxn_mask) != 0;
1666
1667                 /* rte_ct_print_hashkey(key); */
1668
1669                 if (protocol == TCP_PROTOCOL) {
1670                         enum rte_ct_packet_action tcp_pkt_action;
1671
1672                         tcp_pkt_action = rte_ct_handle_tcp_lookup(ct, packet,
1673                                         pkt_index, key_is_client_order,
1674                                         key, hash_table_entry, no_new_cnxn,
1675                                         ip_hdr_size_bytes);
1676
1677                         switch (tcp_pkt_action) {
1678
1679                         case RTE_CT_SEND_CLIENT_SYNACK:
1680                         case RTE_CT_SEND_SERVER_ACK:
1681                                 /* altered packet or copy must be returned
1682                                  * to originator
1683                                  */
1684                                 *reply_pkt_mask |= pkt_mask;
1685                                 /* FALL-THROUGH */
1686
1687                         case RTE_CT_SEND_SERVER_SYN:
1688                         case RTE_CT_FORWARD_PACKET:
1689                                 break;
1690
1691                         case RTE_CT_HIJACK:
1692                                 *hijack_mask |= pkt_mask;
1693                                 break;
1694
1695                         default:
1696                                 /* bad packet, clear mask to drop */
1697                                 *pkts_mask ^= pkt_mask;
1698                                 ct->counters->pkts_drop++;
1699                                 break;
1700                 }
1701                         /* rte_ct_cnxn_print_pkt(pkts[pkt_index]); */
1702
1703                 } else {        /* UDP entry */
1704
1705                         if (hash_table_entry >= 0) {
1706                                 /*
1707                                  * connection found for this packet. Check that
1708                                  * this is a valid packet for connection
1709                                  */
1710
1711                                 struct rte_ct_cnxn_data *entry =
1712                                         cnxn_data_entry[i];
1713
1714                                 if (rte_ct_udp_packet
1715                                                 (ct, entry, pkts[pkt_index],
1716                                                  key_is_client_order)) {
1717                                         entry->counters.packets_forwarded++;
1718                                         ct->counters->pkts_forwarded++;
1719                                 }
1720                         } else {
1721                                 /*
1722                                  * connection not found in bulk hash lookup,
1723                                  * but might have been added in this batch
1724                                  */
1725
1726                                 struct rte_ct_cnxn_data *recent_entry =
1727                                         rte_ct_search_new_connections(ct, key);
1728
1729                                 if (recent_entry != NULL) {
1730                                         if (rte_ct_udp_packet(ct, recent_entry,
1731                                                         pkts[pkt_index],
1732                                                         key_is_client_order)) {
1733                                                 recent_entry->counters.
1734                                                         packets_forwarded++;
1735                                                 ct->counters->pkts_forwarded++;
1736                                         }
1737                                 } else {
1738                                         /* no existing connection, try to add
1739                                          * new one
1740                                          */
1741
1742                                         if (no_new_cnxn) {
1743                                                 /* new cnxn not allowed, clear
1744                                                  * mask to drop
1745                                                  */
1746                                                 *pkts_mask ^= pkt_mask;
1747                                                 ct->counters->pkts_drop++;
1748                                                 ct->counters->
1749                                                 pkts_drop_invalid_conn++;
1750                                                 continue;
1751                                         }
1752
1753                                         if (rte_ct_udp_new_connection(ct,
1754                                         &new_cnxn_data, pkts[pkt_index])) {
1755                                                 /* This packet creates a
1756                                                  * connection
1757                                                  */
1758                                                 int32_t position =
1759                                                         rte_hash_add_key(ct->
1760                                                                 rhash, key);
1761
1762                                         if (position < 0)
1763                                                 continue;
1764
1765                                                 struct rte_ct_cnxn_data
1766                                                         *new_hash_entry = &ct->
1767                                                 hash_table_entries[position];
1768
1769                                                 /*
1770                                                  *update fields in new_cnxn_data
1771                                                  * not set by "new_connection"
1772                                                  */
1773
1774                                                 memcpy(new_cnxn_data.key, key,
1775                                                 sizeof(new_cnxn_data.key));
1776
1777                                                 new_cnxn_data.
1778                                                         key_is_client_order
1779                                                         = key_is_client_order;
1780                                                 new_cnxn_data.protocol =
1781                                                         UDP_PROTOCOL;
1782                                                 rte_cnxn_ip_type(
1783                                                         &new_cnxn_data.type,
1784                                                         packet);
1785                                                 rte_memcpy(new_hash_entry,
1786                                                         &new_cnxn_data,
1787                                                         sizeof(struct
1788                                                         rte_ct_cnxn_data));
1789
1790                                                 new_hash_entry->counters.
1791                                                         packets_forwarded = 1;
1792                                                 ct->counters->pkts_forwarded++;
1793                                                 new_hash_entry->counters.
1794                                                         packets_dropped = 0;
1795                                                 ct->counters->pkts_drop = 0;
1796                                                 ct->counters->
1797                                                 current_active_sessions++;
1798                                                 ct->counters->
1799                                                         sessions_activated++;
1800
1801                                                 new_hash_entry->
1802                                                         state_used_for_timer
1803                                                         = RTE_CT_UDP_NONE;
1804                                                 rte_ct_set_cnxn_timer_for_udp(
1805                                                         ct,
1806                                                         new_hash_entry,
1807                                                         RTE_CT_UDP_UNREPLIED);
1808
1809                                                 rte_ct_remember_new_connection(
1810                                                                 ct,
1811                                                                 new_hash_entry);
1812                                         }
1813                                 }
1814
1815                         }
1816
1817                 }               /* UDP */
1818         }                       /* packets_for_lookup */
1819
1820         if (CNXN_TRX_DEBUG > 1) {
1821                 printf("Exit cnxn tracker synproxy batch lookup with");
1822                 printf(" packet mask %p\n", (void *)*pkts_mask);
1823         }
1824 }