common: Adding common library for sample vnf
[samplevnf.git] / common / VIL / conntrack / rte_cnxn_tracking.c
1 /*
2 // Copyright (c) 2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <rte_ether.h>
18 #include <rte_prefetch.h>
19 #include <rte_cycles.h>
20 #include <rte_malloc.h>
21 #include <rte_memcpy.h>
22 #include <rte_timer.h>
23 #include <rte_spinlock.h>
24 #include "rte_cnxn_tracking.h"
25 #include "rte_ct_tcp.h"
26
27 #define CNXN_TRX_DEBUG 0
28 #define TESTING_TIMERS 0
29 #define RTE_CT_TIMER_EXPIRED_DUMP 0
30
31 #define META_DATA_OFFSET 128
32 #define ETHERNET_START (META_DATA_OFFSET + RTE_PKTMBUF_HEADROOM)
33 #define ETH_HDR_SIZE 14
34 #define IP_START (ETHERNET_START + ETH_HDR_SIZE)
35 #define PROTOCOL_START (IP_START + 9)
36 #define SRC_ADDR_START (IP_START + 12)
37 #define TCP_START (IP_START + 20)
38
39 /* IPV6 changes */
40 #define PROTOCOL_START_IPV6 (IP_START + 6)
41 #define SRC_ADDR_START_IPV6 (IP_START + 8)
42 #define TCP_START_IPV6 (IP_START + 40)
43
44 #define TCP_PROTOCOL 6
45 #define UDP_PROTOCOL 17
46 #define TCP_FW_IPV4_KEY_SIZE 16
47
48 #define TCP_FW_IPV6_KEY_SIZE 40
49
50 #define IPv4_HEADER_SIZE 20
51 #define IPv6_HEADER_SIZE 40
52
53 #define IP_VERSION_4 4
54 #define IP_VERSION_6 6
55
56 static void
57 rte_ct_cnxn_tracker_batch_lookup_basic_type(
58         struct rte_ct_cnxn_tracker *ct,
59         struct rte_mbuf **pkts,
60         uint64_t *pkts_mask,
61         uint64_t no_new_cnxn_mask,
62         uint64_t *reply_pkt_mask,
63         uint64_t *hijack_mask,
64         uint8_t ip_hdr_size_bytes);
65
66 /*
67  * Check if the packet is valid for the given connection. "original_direction"
68  * is false if the address order need to be "flipped".See create_cnxn_hashkey().
69  * True otherwise. Return 0 if the packet is valid, or a negative otherwise.
70  */
71
72 /* IP/TCP header print for debugging */
73 static void
74 rte_ct_cnxn_print_pkt(struct rte_mbuf *pkt, uint8_t type)
75 {
76         int i;
77         uint8_t *rd = RTE_MBUF_METADATA_UINT8_PTR(pkt, IP_START);
78
79         printf("\n");
80         printf("IP and TCP/UDP headers:\n");
81
82         if (type == IP_VERSION_4) {
83                 for (i = 0; i < 40; i++) {
84                         printf("%02x ", rd[i]);
85                         if ((i & 3) == 3)
86                                 printf("\n");
87                 }
88                 printf("\n");
89         }
90
91         if (type == IP_VERSION_6) {
92                 for (i = 0; i < 60; i++) {
93                         printf("%02x ", rd[i]);
94                         if ((i & 3) == 3)
95                                 printf("\n");
96                 }
97                 printf("\n");
98         }
99
100 }
101
102 static void
103 rte_cnxn_ip_type(uint8_t *type, struct rte_mbuf *pkt)
104 {
105
106         int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
107
108         if (ip_hdr_size_bytes == IPv4_HEADER_SIZE)
109                 *type = IP_VERSION_4;
110
111         if (ip_hdr_size_bytes == IPv6_HEADER_SIZE)
112                 *type = IP_VERSION_6;
113 }
114
115 static void
116 rte_ct_print_hashkey(uint32_t *key)
117 {
118         printf("Key: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x \\\n",
119                                  key[0], key[1], key[2], key[3],
120                                  key[4], key[5], key[6], key[7], key[8], key[9]);
121 }
122
123 /*
124  * Create a hash key consisting of the source address/port, the destination
125  * address/ports, and the tcp protocol number. The address/port combos are
126  * treated as two 48 bit numbers and sorted. Thus the key is always the
127  * same regardless of the direction of the packet. Remembering if the numbers
128  * were "flipped" from the order in the packet, and comparing that to whether
129  * the original hash key was flipped, tells if this packet is from the same
130  * direction as the original sender or the response direction. Returns 1 (true)
131  * if the key was left in the original direction.
132  */
133 uint8_t
134 rte_ct_create_cnxn_hashkey(
135         uint32_t *src_addr,
136         uint32_t *dst_addr,
137         uint16_t src_port,
138         uint16_t dst_port,
139         uint8_t proto,
140         uint32_t *key,
141         uint8_t type)
142 {
143         uint8_t hash_order_original_direction = 1;
144
145         key[9] = proto;
146
147         if (type == IP_VERSION_4) {
148                 uint32_t source = *src_addr;
149                 uint32_t dest = *dst_addr;
150
151                 key[3] = key[4] = key[5] = key[6] = key[7] = key[8] = 0;
152
153                 if ((source < dest)
154                                 || ((source == dest) && (src_port < dst_port))) {
155                         key[0] = source;
156                         key[1] = dest;
157                         key[2] = (src_port << 16) | dst_port;
158                 } else {
159                         key[0] = dest;
160                         key[1] = source;
161                         key[2] = (dst_port << 16) | src_port;
162                         hash_order_original_direction = 0;
163                 }
164         }
165
166         if (type == IP_VERSION_6) {
167                 int ip_cmp = memcmp(src_addr, dst_addr, 16);
168                 uint32_t *lo_addr;
169                 uint32_t *hi_addr;
170
171                 if ((ip_cmp < 0) || ((ip_cmp == 0) && (src_port < dst_port))) {
172                         lo_addr = src_addr;
173                         hi_addr = dst_addr;
174                         key[8] = (src_port << 16) | dst_port;
175                 } else {
176                         lo_addr = dst_addr;
177                         hi_addr = src_addr;
178                         key[8] = (dst_port << 16) | src_port;
179                         hash_order_original_direction = 0;
180                 }
181                 key[0] = lo_addr[0];
182                 key[1] = lo_addr[1];
183                 key[2] = lo_addr[2];
184                 key[3] = lo_addr[3];
185                 key[4] = hi_addr[0];
186                 key[5] = hi_addr[1];
187                 key[6] = hi_addr[2];
188                 key[7] = hi_addr[3];
189
190         }
191 #ifdef ALGDBG
192          rte_ct_print_hashkey(key);
193 #endif
194         return hash_order_original_direction;
195 }
196
197
198 int
199 rte_ct_get_IP_hdr_size(struct rte_mbuf *pkt)
200 {
201         /* NOTE: Only supporting IP headers with no options at this time, so
202          * header is fixed size
203          */
204         /* TODO: Need to find defined contstants for start of Ether and
205          * IP headers.
206          */
207         uint8_t hdr_chk = RTE_MBUF_METADATA_UINT8(pkt, IP_START);
208
209         hdr_chk = hdr_chk >> 4;
210
211         if (hdr_chk == IP_VERSION_4)
212                 return IPv4_HEADER_SIZE;
213
214         else if (hdr_chk == IP_VERSION_6)
215                 return IPv6_HEADER_SIZE;
216
217         else    /* Not IPv4 header with no options, return negative. */
218                 return -1;
219         /*
220          * int ip_hdr_size_bytes = (ihdr->version_ihl & IPV4_HDR_IHL_MASK) *
221          * IPV4_IHL_MULTIPLIER;
222          * return ip_hdr_size_bytes;
223          */
224 }
225
226 static void
227 rte_ct_set_timer_for_new_cnxn(
228                 struct rte_ct_cnxn_tracker *ct,
229                 struct rte_ct_cnxn_data *cd)
230 {
231         cd->state_used_for_timer = RTE_CT_TCP_NONE;
232         rte_ct_set_cnxn_timer_for_tcp(ct, cd, RTE_CT_TCP_SYN_SENT);
233 }
234
235 /*
236  * The connection data is stored in a hash table which makes use of the bulk
237  * lookup optimization provided in DPDK. All of the packets seen in one call
238  * to rte_ct_cnxn_tracker_batch_lookup are done in one hash table lookup. The
239  * number of packets is the number being processed by the pipeline (default
240  * max 32, absolute max 64). For any TCP or UDP packet that does not have
241  * an existing (pseudo-)connection in the table (i.e. was a miss on the hash
242  * lookup), a new connection must be added.
243  *
244  * It is possible, for UDP, that the first packet for a (pseudo-)connection and
245  * a subsequent packet are in the same batch. This means that when looking for
246  * new connections in a batch the first one must add the connection, the
247  * second and subsequent (in that batch) that are part of the same connection
248  * must use that newly created one, not create another table entry.
249  *
250  * Any newly created entries are "remembered" in linear table, which is search
251  * when processing hash tables misses. All the entries in that table are
252  * "forgotten" at the start of a new batch.
253  *
254  * A linear table may seem slow, but consider:
255  * - out of millions of packets/second, this involves at most 64.
256  * - this affects only UDP. TCP connections are set up using an acknowledgement
257  *   protocl, so would not have multiple packets for new connection in
258  *   same batch (TODO)
259  * - the number of new connections in a batch would usually be zero, or a low
260  *   number like 1
261  * - all the data to search through should still be in cache
262  */
263
264 static inline void
265 rte_ct_remember_new_connection(
266         struct rte_ct_cnxn_tracker *ct,
267         struct rte_ct_cnxn_data *entry)
268 {
269         ct->latest_connection++;
270         ct->new_connections[ct->latest_connection] = entry;
271 }
272
273 static struct rte_ct_cnxn_data *
274 rte_ct_search_new_connections(struct rte_ct_cnxn_tracker *ct, uint32_t *key)
275 {
276         int i;
277
278         for (i = 0; i <= ct->latest_connection; i++) {
279                 uint32_t *cnxn_key = ct->new_connections[i]->key;
280                 int key_cmp = memcmp(cnxn_key, key,
281                                 sizeof(ct->new_connections[i]->key));
282
283                 if (key_cmp == 0)
284                         return ct->new_connections[i];
285         }
286         return NULL;
287 }
288
289 static inline void rte_ct_forget_new_connections(struct rte_ct_cnxn_tracker *ct)
290 {
291         ct->latest_connection = -1;
292 }
293
294
295
296
297 static enum rte_ct_packet_action
298 rte_ct_handle_tcp_lookup(
299         struct  rte_ct_cnxn_tracker *ct,
300         struct  rte_mbuf *packet,
301         uint8_t pkt_num,
302         uint8_t key_is_client_order,
303         uint32_t *key,
304         int     hash_table_entry,
305         int     no_new_cnxn,
306         uint8_t ip_hdr_size_bytes)
307 {
308         struct rte_ct_cnxn_data new_cnxn_data;
309
310         memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
311         enum rte_ct_packet_action packet_action;
312
313         #ifdef CT_CGNAT
314         int32_t position = hash_table_entry;
315         ct->positions[pkt_num] = position;
316         #endif
317
318         /* rte_ct_cnxn_print_pkt(packet); */
319         if (hash_table_entry >= 0) {
320                 /*
321                  * connection found for this packet.
322                  * Check that this is a valid packet for connection
323                  */
324
325                 struct rte_ct_cnxn_data *entry =
326                                 &ct->hash_table_entries[hash_table_entry];
327
328                 packet_action = rte_ct_verify_tcp_packet(ct, entry, packet,
329                                 key_is_client_order, ip_hdr_size_bytes);
330
331                 switch (packet_action) {
332
333                 case RTE_CT_FORWARD_PACKET:
334                         entry->counters.packets_forwarded++;
335                         break;
336
337                 case RTE_CT_DROP_PACKET:
338                         entry->counters.packets_dropped++;
339                         return RTE_CT_DROP_PACKET;
340
341                 case RTE_CT_REOPEN_CNXN_AND_FORWARD_PACKET:
342                         /* Entry already in hash table, just re-initialize */
343
344                         /* Don't use syproxy on re-init, since it
345                          * is a valid connection
346                          */
347
348                         if (rte_ct_tcp_new_connection(ct, &new_cnxn_data,
349                                                 packet, 0, ip_hdr_size_bytes) !=
350                                         RTE_CT_DROP_PACKET) {
351                                 rte_memcpy(&entry->ct_protocol.tcp_ct_data,
352                                 &new_cnxn_data.ct_protocol.tcp_ct_data,
353                                 sizeof(new_cnxn_data.ct_protocol.tcp_ct_data));
354                                 rte_ct_set_timer_for_new_cnxn(ct, entry);
355                                 if (ct->counters->sessions_reactivated > 0)
356                                         ct->counters->sessions_reactivated--;
357                         }
358
359                         break;
360
361                 case RTE_CT_SEND_SERVER_SYN:
362                         ct->counters->pkts_forwarded++;
363                         /* packet modified, send back to original source */
364                         return RTE_CT_SEND_SERVER_SYN;
365
366                 case RTE_CT_SEND_SERVER_ACK:
367                         ct->counters->pkts_forwarded++;
368                         /* packet modified, send back to original source */
369                         return RTE_CT_SEND_SERVER_ACK;
370
371                 case RTE_CT_HIJACK:
372                         ct->counters->pkts_forwarded++;
373                         /* packet saved with connection, notify VNF
374                          * to hijack it
375                          */
376                         return RTE_CT_HIJACK;
377
378                 case RTE_CT_DESTROY_CNXN_AND_FORWARD_PACKET:
379
380                         /*
381                          * Forward the packet because it is "legal", but destroy
382                          * the connection by removing it from the hash table and
383                          * cancelling any timer. There is a remote possibility
384                          * (perhaps impossible?) that a later packet in the same
385                          * batch is for this connection. Due to the batch
386                          * lookup, which has already happened, the later packet
387                          * thinks that the connection is valid. This might cause
388                          * a timer to be set. Eventually, it would time out so
389                          * the only bug case occurs if the hash table also, in
390                          * the same batch, allocates this entry for a new
391                          * connection before the above packet is received. The
392                          * chances of this happening seem impossibly small but
393                          * this case should perhaps be investigated further.
394                          */
395
396                         if (rte_hash_del_key(ct->rhash, entry->key) >= 0) {
397                                 /*
398                                  * if rte_hash_del_key >= 0, then the connection
399                                  * was found in the hash table and removed.
400                                  * Counters must be updated, and the timer
401                                  * cancelled. If the result was < 0, then the
402                                  * connection must have already been deleted,
403                                  * and it must have been deleted in this batch
404                                  * of packets processed. Do nothing.
405                                  */
406
407                                 ct->counters->sessions_closed++;
408                                 if (ct->counters->current_active_sessions > 0)
409                                         ct->counters->current_active_sessions--;
410                                 rte_ct_cancel_cnxn_timer(entry);
411                         }
412                         entry->counters.packets_forwarded++;
413                         break;
414
415                 default:
416                         break;
417                 }
418         } else {
419                 /* try to add new connection */
420                 struct rte_ct_cnxn_data *new_hash_entry;
421
422                 if (no_new_cnxn) {
423                         ct->counters->pkts_drop_invalid_conn++;
424                         return RTE_CT_DROP_PACKET;
425                 }
426
427                 packet_action = rte_ct_tcp_new_connection(ct, &new_cnxn_data,
428                                 packet, ct->misc_options.synproxy_enabled,
429                                 ip_hdr_size_bytes);
430
431                 if (unlikely(packet_action == RTE_CT_DROP_PACKET)) {
432                         ct->counters->pkts_drop_invalid_conn++;
433                         return RTE_CT_DROP_PACKET;
434                 }
435
436                 /* This packet creates a connection . */
437                 int32_t position = rte_hash_add_key(ct->rhash, key);
438                 if (position < 0) {
439                         printf
440                                         ("Failed to add new connection to hash table %d, pkt_num:%d\n",
441                                          position, pkt_num);
442                         return RTE_CT_DROP_PACKET;
443                 }
444         #ifdef CT_CGNAT
445         ct->positions[pkt_num] = position;
446         #endif
447                 new_hash_entry = &ct->hash_table_entries[position];
448
449                 /* update fields in new_cnxn_data not set by new_connection */
450
451                 memcpy(new_cnxn_data.key, key, sizeof(new_cnxn_data.key));
452                 new_cnxn_data.key_is_client_order = key_is_client_order;
453                 new_cnxn_data.protocol = TCP_PROTOCOL;
454                 rte_cnxn_ip_type(&new_cnxn_data.type, packet);
455                 rte_memcpy(new_hash_entry, &new_cnxn_data,
456                                 sizeof(struct rte_ct_cnxn_data));
457                 new_hash_entry->counters.packets_forwarded = 1;
458                 new_hash_entry->counters.packets_dropped = 0;
459                 ct->counters->current_active_sessions++;
460                 ct->counters->sessions_activated++;
461
462                 if (packet_action == RTE_CT_SEND_CLIENT_SYNACK) {
463                         /* this is a synproxied connecton */
464                         /* must remember mss, window scaling etc. from client */
465
466                         rte_sp_parse_options(packet, new_hash_entry);
467
468                         /*
469                          * update packet to a SYN/ACK directed to the client,
470                          * including default header options
471                          */
472
473                         rte_sp_cvt_to_spoofed_client_synack(new_hash_entry,
474                                         packet);
475
476                         /*
477                          * run updated packet through connection tracking so
478                          * cnxn data updated appropriately and timer set for syn
479                          * received state, not syn sent.
480                          */
481                         packet_action = rte_ct_verify_tcp_packet(ct,
482                                         new_hash_entry, packet,
483                                         !key_is_client_order,
484                                         ip_hdr_size_bytes);
485
486                         if (unlikely(packet_action != RTE_CT_FORWARD_PACKET)) {
487                                 /* should never get here */
488                                 printf("Serious error in synproxy generating ");
489                                 printf("SYN/ACK\n");
490                                 return RTE_CT_DROP_PACKET;
491                         }
492                         ct->counters->pkts_forwarded++;
493                         /* spoofed packet good to go */
494                         return RTE_CT_SEND_CLIENT_SYNACK;
495                 }
496                 rte_ct_set_timer_for_new_cnxn(ct, new_hash_entry);
497
498         }
499
500         /* TODO: is it possible that earlier packet in this batch caused new
501          * entry to be added for the connection? Seems unlikely, since it
502          * would require multiple packets from the same side of the connection
503          * one after another immediately, and the TCP connection OPEN requires
504          * acknowledgement before further packets. What about simultaneous
505          * OPEN? Only if both sides are on same input port. Is that possible?
506          */
507         /* if made it here, packet will be forwarded */
508         ct->counters->pkts_forwarded++;
509         return RTE_CT_FORWARD_PACKET;
510 }
511
512 static uint64_t
513 rte_ct_cnxn_tracker_batch_lookup_basic(
514         struct rte_ct_cnxn_tracker *ct,
515         struct rte_mbuf **pkts,
516         uint64_t pkts_mask,
517         uint64_t no_new_cnxn_mask,
518         uint64_t *reply_pkt_mask,
519         uint64_t *hijack_mask)
520 {
521         /* bitmap of packets left to process */
522         uint64_t pkts_to_process = pkts_mask;
523         /* bitmap of valid packets to return */
524         uint64_t valid_packets = pkts_mask;
525         uint8_t compacting_map[RTE_HASH_LOOKUP_BULK_MAX];
526         /* for pkt, key in originators direction? */
527         uint8_t key_orig_dir[RTE_HASH_LOOKUP_BULK_MAX];
528         uint32_t packets_for_lookup = 0;
529         int32_t positions[RTE_HASH_LOOKUP_BULK_MAX];
530         uint32_t i;
531         struct rte_ct_cnxn_data new_cnxn_data;
532
533         if (CNXN_TRX_DEBUG > 1) {
534                 printf("Enter cnxn tracker %p", ct);
535                 printf(" synproxy batch lookup with packet mask %p\n",
536                                 (void *)pkts_mask);
537         }
538
539         rte_ct_forget_new_connections(ct);
540         *reply_pkt_mask = 0;
541         *hijack_mask = 0;
542
543         /*
544          * Use bulk lookup into hash table for performance reasons. Cannot have
545          * "empty slots" in the bulk lookup,so need to create a compacted table.
546          */
547
548         for (; pkts_to_process;) {
549                 uint8_t pos = (uint8_t) __builtin_ctzll(pkts_to_process);
550                 /* bitmask representing only this packet */
551                 uint64_t pkt_mask = 1LLU << pos;
552                 /* remove this packet from remaining list */
553                 pkts_to_process &= ~pkt_mask;
554
555                 struct rte_mbuf *pkt = pkts[pos];
556
557                 int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
558
559                 if (unlikely(ip_hdr_size_bytes < 0)) {
560                         /* Not IPv4, ignore. */
561                         continue;
562                 }
563
564                 void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
565
566                 /* TCP and UDP ports at same offset, just use TCP for
567                  * offset calculation
568                  */
569                 struct tcp_hdr *thdr =
570                         (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
571                                         (IP_START + ip_hdr_size_bytes));
572                 uint16_t src_port = rte_bswap16(thdr->src_port);
573                 uint16_t dst_port = rte_bswap16(thdr->dst_port);
574
575                 if (ip_hdr_size_bytes == IPv4_HEADER_SIZE) {
576                         struct ipv4_hdr *ihdr = (struct ipv4_hdr *)ip_hdr;
577                         uint8_t proto = ihdr->next_proto_id;
578
579                         if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
580                                 /* only tracking TCP and UDP at this time */
581                                 continue;
582                         }
583
584                         /*
585                          * Load the addresses and ports, and convert from Intel
586                          * to network byte order. Strictly speaking, it is not
587                          * necessary to do this conversion, as this data is only
588                          * used to create a hash key.
589                          */
590                         uint32_t src_addr = rte_bswap32(ihdr->src_addr);
591                         uint32_t dst_addr = rte_bswap32(ihdr->dst_addr);
592
593                         if (CNXN_TRX_DEBUG > 2) {
594                                 if (CNXN_TRX_DEBUG > 4)
595                                         rte_ct_cnxn_print_pkt(pkt,
596                                                         IP_VERSION_4);
597                         }
598                         /* need to create compacted table of pointers to pass
599                          * to bulk lookup
600                          */
601
602                         compacting_map[packets_for_lookup] = pos;
603                         key_orig_dir[packets_for_lookup] =
604                                 rte_ct_create_cnxn_hashkey(&src_addr, &dst_addr,
605                                                 src_port, dst_port,
606                                                 proto,
607                                                 &ct->hash_keys
608                                                 [packets_for_lookup][0],
609                                                 IP_VERSION_4);
610                         packets_for_lookup++;
611                 }
612
613                 if (ip_hdr_size_bytes == IPv6_HEADER_SIZE) {
614                         struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
615                         uint8_t proto = ihdr->proto;
616
617                         if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
618                                 /* only tracking TCP and UDP at this time */
619                                 continue;
620                         }
621
622                         if (CNXN_TRX_DEBUG > 2) {
623                                 if (CNXN_TRX_DEBUG > 4)
624                                         rte_ct_cnxn_print_pkt(pkt,
625                                                         IP_VERSION_6);
626                         }
627
628                         /* need to create compacted table of pointers to pass
629                          * to bulk lookup
630                          */
631
632                         compacting_map[packets_for_lookup] = pos;
633                         key_orig_dir[packets_for_lookup] =
634                                 rte_ct_create_cnxn_hashkey(
635                                                 (uint32_t *) ihdr->src_addr,
636                                                 (uint32_t *) ihdr->dst_addr,
637                                                 src_port, dst_port,
638                                                 proto,
639                                                 &ct->hash_keys
640                                                 [packets_for_lookup][0],
641                                                 IP_VERSION_6);
642                         packets_for_lookup++;
643                 }
644
645         }
646
647         if (unlikely(packets_for_lookup == 0))
648                 return valid_packets;   /* no suitable packet for lookup */
649
650         /* Clear all the data to make sure no stack garbage is in it */
651         memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
652
653         /* lookup all tcp & udp packets in the connection table */
654
655         int lookup_result =
656                         rte_hash_lookup_bulk(ct->rhash, (const void **)&ct->hash_key_ptrs,
657                                  packets_for_lookup, &positions[0]);
658
659         if (unlikely(lookup_result < 0)) {
660                 /* TODO: change a log */
661                 printf("Unexpected hash table problem, discarding all packets");
662                 return 0;       /* unknown error, just discard all packets */
663         }
664 #ifdef ALGDBG
665         for (i = 0; i < packets_for_lookup; i++) {
666                 if (positions[i] >= 0)
667                 printf("@CT positions[i]= %d, compacting_map[i]= %d\n",
668                         positions[i], compacting_map[i]);
669         }
670 #endif
671         for (i = 0; i < packets_for_lookup; i++) {
672                 /* index into hash table entries */
673                 int hash_table_entry = positions[i];
674                 /* index into packet table of this packet */
675                 uint8_t pkt_index = compacting_map[i];
676                 /* bitmask representing only this packet */
677                 uint64_t pkt_mask = 1LLU << pkt_index;
678                 uint8_t key_is_client_order = key_orig_dir[i];
679                 uint32_t *key = ct->hash_key_ptrs[pkt_index];
680                 uint8_t protocol = *(key + 9);
681                 struct rte_mbuf *packet = pkts[pkt_index];
682                 int no_new_cnxn = (pkt_mask & no_new_cnxn_mask) != 0;
683
684                  /* rte_ct_print_hashkey(key); */
685
686                 if (protocol == TCP_PROTOCOL) {
687                         enum rte_ct_packet_action tcp_pkt_action;
688
689                         int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(packet);
690                         tcp_pkt_action = rte_ct_handle_tcp_lookup(ct, packet,
691                                         pkt_index, key_is_client_order,
692                                         key, hash_table_entry, no_new_cnxn,
693                                         ip_hdr_size_bytes);
694
695                         switch (tcp_pkt_action) {
696
697                         case RTE_CT_SEND_CLIENT_SYNACK:
698                         case RTE_CT_SEND_SERVER_ACK:
699                                 /* altered packet or copy must be returned
700                                  * to originator
701                                  */
702                                 *reply_pkt_mask |= pkt_mask;
703                                 /* FALL-THROUGH */
704
705                         case RTE_CT_SEND_SERVER_SYN:
706                         case RTE_CT_FORWARD_PACKET:
707                                 break;
708
709                         case RTE_CT_HIJACK:
710                                 *hijack_mask |= pkt_mask;
711                                 break;
712
713                         default:
714                                 /* bad packet, clear mask to drop */
715                                 valid_packets ^= pkt_mask;
716                                 ct->counters->pkts_drop++;
717                                 break;
718                         }
719
720                         /* rte_ct_cnxn_print_pkt(pkts[pkt_index]); */
721                 } else {        /* UDP entry */
722
723                         if (hash_table_entry >= 0) {
724                                 /*
725                                  * connection found for this packet. Check that
726                                  * this is a valid packet for connection
727                                  */
728
729                                 struct rte_ct_cnxn_data *entry =
730                                                 &ct->hash_table_entries[hash_table_entry];
731
732                                 if (rte_ct_udp_packet
733                                                 (ct, entry, pkts[pkt_index],
734                                                  key_is_client_order)) {
735                                         entry->counters.packets_forwarded++;
736                                         ct->counters->pkts_forwarded++;
737                                 }
738                         } else {
739                                 /*
740                                  * connection not found in bulk hash lookup,
741                                  * but might have been added in this batch
742                                  */
743
744                                 struct rte_ct_cnxn_data *recent_entry =
745                                                 rte_ct_search_new_connections(ct, key);
746
747                                 if (recent_entry != NULL) {
748                                         if (rte_ct_udp_packet(ct, recent_entry,
749                                                         pkts[pkt_index],
750                                                         key_is_client_order)) {
751                                                 recent_entry->counters.
752                                                         packets_forwarded++;
753                                                 ct->counters->pkts_forwarded++;
754                                         }
755                                 } else {
756                                         /* no existing connection, try to add
757                                          * new one
758                                          */
759
760                                         if (no_new_cnxn) {
761                                                 /* new cnxn not allowed, clear
762                                                  * mask to drop
763                                                  */
764                                                 valid_packets ^= pkt_mask;
765                                                 ct->counters->pkts_drop++;
766                                                 ct->counters->
767                                                 pkts_drop_invalid_conn++;
768                                                 continue;
769                                         }
770
771                                         if (rte_ct_udp_new_connection(ct,
772                                                         &new_cnxn_data,
773                                                         pkts[pkt_index])) {
774                                                 /* This packet creates a
775                                                  * connection .
776                                                  */
777                                                 int32_t position =
778                                                         rte_hash_add_key(
779                                                                 ct->rhash, key);
780
781                                         if (position < 0)
782                                                 continue;
783
784                                                 struct rte_ct_cnxn_data
785                                                         *new_hash_entry = &ct->
786                                                 hash_table_entries[position];
787
788                                                 /*
789                                                  *update fields in new_cnxn_data
790                                                  * not set by "new_connection"
791                                                  */
792
793                                                 memcpy(new_cnxn_data.key, key,
794                                                 sizeof(new_cnxn_data.key));
795
796                                                 new_cnxn_data.
797                                                         key_is_client_order
798                                                         = key_is_client_order;
799                                                 new_cnxn_data.protocol =
800                                                         UDP_PROTOCOL;
801                                                 rte_cnxn_ip_type(
802                                                         &new_cnxn_data.type,
803                                                         packet);
804                                                 rte_memcpy(new_hash_entry,
805                                                         &new_cnxn_data,
806                                                         sizeof(struct
807                                                         rte_ct_cnxn_data));
808
809                                                 new_hash_entry->counters.
810                                                         packets_forwarded = 1;
811                                                 ct->counters->pkts_forwarded++;
812                                                 new_hash_entry->counters.
813                                                         packets_dropped = 0;
814                                                 ct->counters->pkts_drop = 0;
815                                                 ct->counters->
816                                                 current_active_sessions++;
817                                                 ct->counters->
818                                                         sessions_activated++;
819
820                                                 new_hash_entry->
821                                                         state_used_for_timer
822                                                         = RTE_CT_UDP_NONE;
823                                                 rte_ct_set_cnxn_timer_for_udp(
824                                                         ct,
825                                                         new_hash_entry,
826                                                         RTE_CT_UDP_UNREPLIED);
827
828                                                 rte_ct_remember_new_connection(
829                                                                 ct,
830                                                                 new_hash_entry);
831                                         }
832                                 }
833
834                         }
835
836                 }               /* UDP */
837         }                       /* packets_for_lookup */
838
839         if (CNXN_TRX_DEBUG > 1) {
840                 printf("Exit cnxn tracker synproxy batch lookup with");
841                 printf(" packet mask %p\n", (void *)valid_packets);
842         }
843
844         return valid_packets;
845 }
846
847 uint64_t
848 rte_ct_cnxn_tracker_batch_lookup_with_synproxy(
849         struct rte_ct_cnxn_tracker *ct,
850         struct rte_mbuf **pkts,
851         uint64_t pkts_mask,
852         struct rte_synproxy_helper *sp_helper)
853 {
854         return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask, 0,
855                         &sp_helper->reply_pkt_mask, &sp_helper->hijack_mask);
856 }
857 #ifdef CT_CGNAT
858 uint64_t cgnapt_ct_process(
859         struct rte_ct_cnxn_tracker *ct,
860         struct rte_mbuf **pkts,
861         uint64_t pkts_mask,
862         struct rte_CT_helper *ct_helper)
863 {
864 /* to disable SynProxy for CGNAT */
865         rte_ct_disable_synproxy(ct);
866         return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
867                                         ct_helper->no_new_cnxn_mask,
868                                         &ct_helper->reply_pkt_mask,
869                                         &ct_helper->hijack_mask);
870 }
871 #endif/*CT-CGNAT*/
872 uint64_t
873 rte_ct_cnxn_tracker_batch_lookup(
874         struct rte_ct_cnxn_tracker *ct,
875         struct rte_mbuf **pkts,
876         uint64_t pkts_mask,
877         struct rte_CT_helper *ct_helper)
878 {
879
880         return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
881                         ct_helper->no_new_cnxn_mask,
882                         &ct_helper->reply_pkt_mask, &ct_helper->hijack_mask);
883 }
884
885
886 void rte_ct_cnxn_tracker_batch_lookup_type(
887         struct rte_ct_cnxn_tracker *ct,
888         struct rte_mbuf **pkts,
889         uint64_t *pkts_mask,
890         struct rte_CT_helper *ct_helper,
891         uint8_t ip_hdr_size_bytes)
892 {
893
894         rte_ct_cnxn_tracker_batch_lookup_basic_type(ct, pkts, pkts_mask,
895                         ct_helper->no_new_cnxn_mask,
896                         &ct_helper->reply_pkt_mask, &ct_helper->hijack_mask,
897                         ip_hdr_size_bytes);
898 }
899
900
901
902 uint64_t
903 rte_ct_cnxn_tracker_batch_lookup_with_new_cnxn_control(
904         struct rte_ct_cnxn_tracker *ct,
905         struct rte_mbuf **pkts,
906         uint64_t pkts_mask,
907         uint64_t no_new_cnxn_mask)
908 {
909         uint64_t dont_care;
910
911         return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
912                         no_new_cnxn_mask,
913                         &dont_care, &dont_care);
914 }
915
916
917 int
918 rte_ct_initialize_default_timeouts(struct rte_ct_cnxn_tracker *new_cnxn_tracker)
919 {
920
921         /* timer system init */
922
923         uint64_t hertz = rte_get_tsc_hz();
924
925         new_cnxn_tracker->hertz = hertz;
926         new_cnxn_tracker->timing_cycles_per_timing_step = hertz / 10;
927         new_cnxn_tracker->timing_100ms_steps_previous = 0;
928         new_cnxn_tracker->timing_100ms_steps = 0;
929         new_cnxn_tracker->timing_last_time = rte_get_tsc_cycles();
930
931         /* timeouts in seconds */
932         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
933                 [RTE_CT_TCP_SYN_SENT] = 120 * hertz;
934         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
935                 [RTE_CT_TCP_SYN_RECV] = 60 * hertz;
936         /* 5 * DAYS */
937         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
938                 [RTE_CT_TCP_ESTABLISHED] = 60 * 60 * 24 * 5 * hertz;
939
940         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
941                 [RTE_CT_TCP_FIN_WAIT] = 120 * hertz;
942         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
943                 [RTE_CT_TCP_CLOSE_WAIT] = 60 * hertz;
944         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
945                 [RTE_CT_TCP_LAST_ACK] = 30 * hertz;
946         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
947                 [RTE_CT_TCP_TIME_WAIT] = 120 * hertz;
948         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
949                 [RTE_CT_TCP_CLOSE] = 10 * hertz;
950         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
951                 [RTE_CT_TCP_SYN_SENT_2] = 120 * hertz;
952         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
953                 [RTE_CT_TCP_RETRANS] = 300 * hertz;
954         new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
955                 [RTE_CT_TCP_UNACK] = 300 * hertz;
956
957         new_cnxn_tracker->ct_timeout.udptimeout.udp_timeouts
958                 [RTE_CT_UDP_UNREPLIED] = 30 * hertz;
959         new_cnxn_tracker->ct_timeout.udptimeout.udp_timeouts
960                 [RTE_CT_UDP_REPLIED] = 180 * hertz;
961         /* miscellaneous init */
962         new_cnxn_tracker->misc_options.tcp_max_retrans =
963                 RTE_CT_TCP_MAX_RETRANS;
964         new_cnxn_tracker->misc_options.tcp_loose = 0;
965         new_cnxn_tracker->misc_options.tcp_be_liberal = 0;
966 #ifdef CT_CGNAT
967         int i;
968         for (i=0; i < RTE_HASH_LOOKUP_BULK_MAX ;i ++ )
969                         new_cnxn_tracker->positions[i] = -1;
970 #endif
971
972         return 0;
973 }
974
975 struct rte_CT_counter_block rte_CT_counter_table[MAX_CT_INSTANCES]
976 __rte_cache_aligned;
977 int rte_CT_hi_counter_block_in_use = -1;
978
979 int
980 rte_ct_initialize_cnxn_tracker_with_synproxy(
981         struct rte_ct_cnxn_tracker *new_cnxn_tracker,
982         uint32_t max_connection_count,
983         char *name,
984         uint16_t pointer_offset)
985 {
986         uint32_t i;
987         uint32_t size;
988         struct rte_CT_counter_block *counter_ptr;
989         /*
990          * TODO: Should number of entries be something like
991          * max_connection_count * 1.1 to allow for unused space
992          * and thus increased performance of hash table, at a cost of memory???
993          */
994
995         new_cnxn_tracker->pointer_offset = pointer_offset;
996
997         memset(new_cnxn_tracker->name, '\0', sizeof(new_cnxn_tracker->name));
998         strncpy(new_cnxn_tracker->name, name, strlen(new_cnxn_tracker->name));
999         //strcpy(new_cnxn_tracker->name, name);
1000         /* + (max_connection_count >> 3); */
1001         uint32_t number_of_entries = max_connection_count;
1002
1003         size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_ct_cnxn_data) *
1004                         number_of_entries);
1005         new_cnxn_tracker->hash_table_entries =
1006                 rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
1007         if (new_cnxn_tracker->hash_table_entries == NULL) {
1008                 printf(" Not enough memory, or invalid arguments\n");
1009                 return -1;
1010         }
1011         new_cnxn_tracker->num_cnxn_entries = number_of_entries;
1012
1013         /* initialize all timers */
1014
1015         for (i = 0; i < number_of_entries; i++)
1016                 rte_timer_init(&new_cnxn_tracker->hash_table_entries[i].timer);
1017
1018         /* pointers for temp storage used during bulk hash */
1019         for (i = 0; i < RTE_HASH_LOOKUP_BULK_MAX; i++)
1020                 new_cnxn_tracker->hash_key_ptrs[i] =
1021                                 &new_cnxn_tracker->hash_keys[i][0];
1022
1023         /*
1024          * Now allocate a counter block entry.It appears that the initialization
1025          * of these threads is serialized on core 0 so no lock is necessary
1026          */
1027
1028         if (rte_CT_hi_counter_block_in_use == MAX_CT_INSTANCES)
1029                 return -1;
1030
1031         rte_CT_hi_counter_block_in_use++;
1032         counter_ptr = &rte_CT_counter_table[rte_CT_hi_counter_block_in_use];
1033
1034         new_cnxn_tracker->counters = counter_ptr;
1035
1036         /* set up hash table parameters, then create hash table */
1037         struct rte_hash_parameters rhash_parms = {
1038                 .name = name,
1039                 .entries = number_of_entries,
1040                 .hash_func = NULL,      /* use default hash */
1041                 .key_len = 40,
1042                 .hash_func_init_val = 0,
1043                 .socket_id = rte_socket_id(),
1044                 .extra_flag = 1 /*This is needed for TSX memory*/
1045         };
1046
1047         new_cnxn_tracker->rhash = rte_hash_create(&rhash_parms);
1048
1049         return 0;
1050 }
1051
1052 int
1053 rte_ct_initialize_cnxn_tracker(
1054         struct rte_ct_cnxn_tracker *new_cnxn_tracker,
1055         uint32_t max_connection_count,
1056         char *name)
1057 {
1058         return rte_ct_initialize_cnxn_tracker_with_synproxy(new_cnxn_tracker,
1059                                 max_connection_count, name, 0);
1060 }
1061
1062 int
1063 rte_ct_free_cnxn_tracker_resources(struct rte_ct_cnxn_tracker *old_cnxn_tracker)
1064 {
1065         rte_free(old_cnxn_tracker->hash_table_entries);
1066         rte_hash_free(old_cnxn_tracker->rhash);
1067         return 0;
1068 }
1069
1070 int
1071 rte_ct_get_cnxn_tracker_size(void)
1072 {
1073         return sizeof(struct rte_ct_cnxn_tracker);
1074 }
1075
1076 void
1077 rte_ct_cnxn_timer_expired(struct rte_timer *rt, void *arg);
1078
1079 static void
1080 rte_ct_set_cnxn_timer(
1081         struct rte_ct_cnxn_tracker *ct,
1082         struct rte_ct_cnxn_data *cd,
1083         uint64_t ticks_until_timeout)
1084 {
1085         /*
1086          * pointer to cnxn_data will be stored in timer system as pointer to
1087          * rte_timer for later cast back to cnxn_data during timeout handling
1088          */
1089
1090         struct rte_timer *rt = (struct rte_timer *)cd;
1091         #ifdef CT_CGNAT
1092         /* execute timeout on timer core */
1093         uint32_t core_id = get_timer_core_id();
1094         #else
1095         /* execute timeout on current core */
1096         uint32_t core_id = rte_lcore_id();
1097         #endif
1098         /* safe to reset since timeouts handled synchronously
1099          * by rte_timer_manage
1100          */
1101         int success = rte_timer_reset(rt, ticks_until_timeout, SINGLE, core_id,
1102                         rte_ct_cnxn_timer_expired, ct);
1103
1104         if (success < 0) {
1105                 /* TODO: Change to log, perhaps something else?
1106                  * This should not happen
1107                  */
1108                 printf("CNXN_TRACKER: Failed to set connection timer.\n");
1109         }
1110 }
1111
1112 /*
1113  * For the given connection, set a timeout based on the given state. If the
1114 * timer is already set, this call will reset the timer with a new value.
1115  */
1116
1117 void
1118 rte_ct_set_cnxn_timer_for_tcp(
1119         struct rte_ct_cnxn_tracker *ct,
1120         struct rte_ct_cnxn_data *cd,
1121         uint8_t tcp_state)
1122 {
1123
1124         cd->expected_timeout =
1125                         (ct->timing_100ms_steps * ct->timing_cycles_per_timing_step) +
1126                         ct->ct_timeout.tcptimeout.tcp_timeouts[tcp_state];
1127
1128         if (tcp_state == cd->state_used_for_timer) {
1129                 /*
1130                  * Don't reset timer, too expensive. Instead, determine time
1131                  * elapsed since start of timer. When this timer expires, the
1132                  * timer will be reset to the elapsed timer. So if in a state
1133                  * with a 5 minute timer last sees a packet 4 minutes into the
1134                  * timer, the timer when expires will be reset to 4 minutes.
1135                  * This means the timer will then expire 5 minutes after
1136                  * the last packet.
1137                  */
1138                 return;
1139         }
1140
1141         if (TESTING_TIMERS)
1142                 printf("Set Timer for connection %p and state %s\n", cd,
1143                                          rte_ct_tcp_names[tcp_state]);
1144
1145         rte_ct_set_cnxn_timer(ct, cd,
1146                                                 ct->ct_timeout.
1147                                                 tcptimeout.tcp_timeouts[tcp_state]);
1148         cd->state_used_for_timer = tcp_state;
1149 }
1150
1151 /*
1152  * For the given connection, set a timeout based on the given state.
1153  * If the timer is already set,
1154  * this call will reset the timer with a new value.
1155  */
1156
1157 void
1158 rte_ct_set_cnxn_timer_for_udp(
1159         struct rte_ct_cnxn_tracker *ct,
1160         struct rte_ct_cnxn_data *cd,
1161         uint8_t udp_state)
1162 {
1163
1164         cd->expected_timeout = (ct->timing_cycles_per_timing_step) +
1165                         ct->ct_timeout.udptimeout.udp_timeouts[udp_state];
1166
1167         if (udp_state == cd->state_used_for_timer) {
1168                 /*
1169                  * Don't reset timer, too expensive. Instead, determine time
1170                  * elapsed since start of timer. When this timer expires, the
1171                  * timer will be reset to the elapsed timer. So if in a state
1172                  * with a 5 minute timer last sees a packet 4 minutes into the
1173                  * timer, the timer when expires will be reset to 4 minutes.
1174                  * This means the timer will then
1175                  * expire 5 minutes after the last packet.
1176                  */
1177                 return;
1178         }
1179
1180         if (TESTING_TIMERS)
1181                 printf("Set Timer for connection %p and state %s\n", cd,
1182                                          rte_ct_udp_names[udp_state]);
1183         rte_ct_set_cnxn_timer(ct, cd,
1184                                                 ct->ct_timeout.
1185                                                 udptimeout.udp_timeouts[udp_state]);
1186         cd->state_used_for_timer = udp_state;
1187 }
1188
1189 /* Cancel the timer associated with the connection.
1190  * Safe to call if no timer set.
1191  */
1192         void
1193 rte_ct_cancel_cnxn_timer(struct rte_ct_cnxn_data *cd)
1194 {
1195         if (TESTING_TIMERS)
1196                 printf("Cancel Timer\n");
1197
1198         rte_timer_stop(&cd->timer);
1199 }
1200
1201 void
1202 rte_ct_handle_expired_timers(struct rte_ct_cnxn_tracker *ct)
1203 {
1204         /*
1205          * If current time (in 100 ms increments) is different from the
1206          * time it was last viewed, then check for and process expired timers.
1207          */
1208
1209         uint64_t new_time = rte_get_tsc_cycles();
1210         uint64_t time_diff = new_time - ct->timing_last_time;
1211
1212         if (time_diff >= ct->timing_cycles_per_timing_step) {
1213                 ct->timing_last_time = new_time;
1214                 ct->timing_100ms_steps++;
1215         }
1216
1217         if (ct->timing_100ms_steps != ct->timing_100ms_steps_previous) {
1218                 rte_timer_manage();
1219                 ct->timing_100ms_steps_previous = ct->timing_100ms_steps;
1220         }
1221 }
1222
1223 /* timer has expired. Need to delete connection entry */
1224
1225 void
1226 rte_ct_cnxn_timer_expired(struct rte_timer *rt, void *arg)
1227 {
1228         /* the pointer to the rte_timer was actually a pointer
1229          * to the cnxn data
1230          */
1231         struct rte_ct_cnxn_data *cd = (struct rte_ct_cnxn_data *)rt;
1232         struct rte_ct_cnxn_tracker *ct = (struct rte_ct_cnxn_tracker *)arg;
1233         int success = 0;
1234
1235         /*
1236          * Check to see if the timer has "really" expired. If traffic occured
1237          * since the timer was set, the timer needs be extended, so that timer
1238          * expires the appropriate amount after that last packet.
1239          */
1240
1241         uint64_t current_time = ct->timing_100ms_steps *
1242                 ct->timing_cycles_per_timing_step;
1243
1244         if (cd->expected_timeout >= current_time) {
1245                 uint64_t time_diff = cd->expected_timeout - current_time;
1246
1247                 rte_ct_set_cnxn_timer(ct, cd, time_diff);
1248                 return;
1249         }
1250
1251         if (cd->protocol == TCP_PROTOCOL) {
1252                 if (cd->state_used_for_timer == RTE_CT_TCP_TIME_WAIT ||
1253                                 cd->state_used_for_timer == RTE_CT_TCP_CLOSE)
1254                         ct->counters->sessions_closed++;
1255                 else
1256                         ct->counters->sessions_timedout++;
1257                 /* if synproxied connection, free list of buffered
1258                  * packets if any
1259                  */
1260
1261                 if (cd->ct_protocol.synproxy_data.synproxied)
1262                         rte_ct_release_buffered_packets(ct, cd);
1263
1264         } else if (cd->protocol == UDP_PROTOCOL)
1265                 ct->counters->sessions_closed++;
1266         if (ct->counters->current_active_sessions > 0)
1267                 ct->counters->current_active_sessions--;
1268
1269         if (RTE_CT_TIMER_EXPIRED_DUMP) {
1270                 uint64_t percent = (cd->counters.packets_dropped * 10000) /
1271                                 (cd->counters.packets_forwarded +
1272                                  cd->counters.packets_dropped);
1273
1274                 if (cd->protocol == TCP_PROTOCOL) {
1275                         printf("CnxnTrkr %s, timed-out TCP Connection: %p,",
1276                                         ct->name, cd);
1277                         printf(" %s, pkts forwarded %"
1278                                 PRIu64 ", pkts dropped %" PRIu64
1279                                 ", drop%% %u.%u\n",
1280                                 rte_ct_tcp_names[cd->state_used_for_timer],
1281                                 cd->counters.packets_forwarded,
1282                                 cd->counters.packets_dropped,
1283                                 (uint32_t) (percent / 100),
1284                                 (uint32_t) (percent % 100));
1285                 } else if (cd->protocol == UDP_PROTOCOL) {
1286                         printf("CnxnTrkr %s, Timed-out UDP Connection: %p,",
1287                                         ct->name, cd);
1288                         printf(" %s, pkts forwarded %" PRIu64
1289                                 ", pkts dropped %" PRIu64 ", drop%% %u.%u\n",
1290                                 rte_ct_udp_names[cd->state_used_for_timer],
1291                                 cd->counters.packets_forwarded,
1292                                 cd->counters.packets_dropped,
1293                                 (uint32_t) (percent / 100),
1294                                 (uint32_t) (percent % 100));
1295                 }
1296         }
1297
1298         success = rte_hash_del_key(ct->rhash, &cd->key);
1299
1300         if (success < 0) {
1301                 /* TODO: change to a log */
1302                 rte_ct_print_hashkey(cd->key);
1303         }
1304
1305 }
1306
1307 struct rte_CT_counter_block *
1308 rte_ct_get_counter_address(struct rte_ct_cnxn_tracker *ct)
1309 {
1310         return ct->counters;
1311 }
1312
1313 int
1314 rte_ct_set_configuration_options(struct rte_ct_cnxn_tracker *ct,
1315                 char *name, char *value)
1316 {
1317         /* check non-time values first */
1318         int ival = atoi(value);
1319
1320         /* tcp_loose */
1321         if (strcmp(name, "tcp_loose") == 0) {
1322                 ct->misc_options.tcp_loose = ival;
1323                 return 0;
1324         }
1325
1326         /* tcp_be_liberal */
1327         if (strcmp(name, "tcp_be_liberal") == 0) {
1328                 ct->misc_options.tcp_be_liberal = ival;
1329                 return 0;
1330         }
1331
1332         /* tcp_max_retrans */
1333         if (strcmp(name, "tcp_max_retrans") == 0) {
1334                 ct->misc_options.tcp_max_retrans = ival;
1335                 return 0;
1336         }
1337
1338         uint64_t time_value = ival * ct->hertz;
1339
1340
1341         /* configuration of timer values */
1342
1343         /* tcp_syn_sent */
1344         if (strcmp(name, "tcp_syn_sent") == 0) {
1345                 if (time_value == 0)
1346                         return -1;
1347                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_SENT] =
1348                         time_value;
1349                 return 0;
1350         }
1351
1352         /* tcp_syn_recv */
1353         if (strcmp(name, "tcp_syn_recv") == 0) {
1354                 if (time_value == 0)
1355                         return -1;
1356                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_RECV] =
1357                         time_value;
1358                 return 0;
1359         }
1360
1361         /* tcp_established */
1362         if (strcmp(name, "tcp_established") == 0) {
1363                 if (time_value == 0)
1364                         return -1;
1365                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_ESTABLISHED] =
1366                         time_value;
1367                 return 0;
1368         }
1369
1370         /* tcp_fin_wait */
1371         if (strcmp(name, "tcp_fin_wait") == 0) {
1372                 if (time_value == 0)
1373                         return -1;
1374                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_FIN_WAIT] =
1375                         time_value;
1376                 return 0;
1377         }
1378
1379         /* tcp_close_wait */
1380         if (strcmp(name, "tcp_close_wait") == 0) {
1381                 if (time_value == 0)
1382                         return -1;
1383                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_CLOSE_WAIT] =
1384                         time_value;
1385                 return 0;
1386         }
1387
1388         /* tcp_last_ack */
1389         if (strcmp(name, "tcp_last_ack") == 0) {
1390                 if (time_value == 0)
1391                         return -1;
1392                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_LAST_ACK] =
1393                         time_value;
1394                 return 0;
1395         }
1396
1397         /* tcp_time_wait */
1398         if (strcmp(name, "tcp_time_wait") == 0) {
1399                 if (time_value == 0)
1400                         return -1;
1401                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_TIME_WAIT] =
1402                         time_value;
1403                 return 0;
1404         }
1405
1406         /* tcp_close */
1407         if (strcmp(name, "tcp_close") == 0) {
1408                 if (time_value == 0)
1409                         return -1;
1410                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_CLOSE] =
1411                         time_value;
1412                 return 0;
1413         }
1414
1415         /* tcp_syn_sent_2 */
1416         if (strcmp(name, "tcp_syn_sent_2") == 0) {
1417                 if (time_value == 0)
1418                         return -1;
1419                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_SENT_2] =
1420                         time_value;
1421                 return 0;
1422         }
1423
1424         /* tcp_retrans */
1425         if (strcmp(name, "tcp_retrans") == 0) {
1426                 if (time_value == 0)
1427                         return -1;
1428                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_RETRANS] =
1429                         time_value;
1430                 return 0;
1431         }
1432
1433         /* tcp_unack */
1434         if (strcmp(name, "tcp_unack") == 0) {
1435                 if (time_value == 0)
1436                         return -1;
1437                 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_UNACK] =
1438                         time_value;
1439                 return 0;
1440         }
1441
1442         /* udp_unreplied */
1443         if (strcmp(name, "udp_unreplied") == 0) {
1444                 if (time_value == 0)
1445                         return -1;
1446                 ct->ct_timeout.udptimeout.udp_timeouts[RTE_CT_UDP_UNREPLIED] =
1447                         time_value;
1448                 return 0;
1449         }
1450
1451         /* udp_replied */
1452         if (strcmp(name, "udp_replied") == 0) {
1453                 if (time_value == 0)
1454                         return -1;
1455                 ct->ct_timeout.udptimeout.udp_timeouts[RTE_CT_UDP_REPLIED] =
1456                         time_value;
1457                 return 0;
1458         }
1459         return 1;
1460 }
1461
1462 static void
1463 rte_ct_cnxn_tracker_batch_lookup_basic_type(
1464                 struct rte_ct_cnxn_tracker *ct,
1465                 struct rte_mbuf **pkts,
1466                 uint64_t *pkts_mask,
1467                 uint64_t no_new_cnxn_mask,
1468                 uint64_t *reply_pkt_mask,
1469                 uint64_t *hijack_mask,
1470                 uint8_t ip_hdr_size_bytes)
1471 {
1472         /* bitmap of packets left to process */
1473         uint64_t pkts_to_process = *pkts_mask;
1474         /* bitmap of valid packets to return */
1475         uint8_t compacting_map[RTE_HASH_LOOKUP_BULK_MAX];
1476         /* for pkt, key in originators direction? */
1477         uint8_t key_orig_dir[RTE_HASH_LOOKUP_BULK_MAX];
1478         uint32_t packets_for_lookup = 0;
1479         int32_t positions[RTE_HASH_LOOKUP_BULK_MAX];
1480         uint32_t i;
1481         struct rte_ct_cnxn_data new_cnxn_data;
1482
1483         if (CNXN_TRX_DEBUG > 1) {
1484                 printf("Enter cnxn tracker %p", ct);
1485                 printf(" synproxy batch lookup with packet mask %p\n",
1486                                 (void *)*pkts_mask);
1487         }
1488
1489         rte_ct_forget_new_connections(ct);
1490         *reply_pkt_mask = 0;
1491         *hijack_mask = 0;
1492
1493         /*
1494          * Use bulk lookup into hash table for performance reasons. Cannot have
1495          * "empty slots" in the bulk lookup,so need to create a compacted table.
1496          */
1497
1498         switch (ip_hdr_size_bytes) {
1499         case IPv4_HEADER_SIZE:
1500                 for (; pkts_to_process;) {
1501                         uint8_t pos = (uint8_t) __builtin_ctzll(
1502                                         pkts_to_process);
1503                         /* bitmask representing only this packet */
1504                         uint64_t pkt_mask = 1LLU << pos;
1505                         /* remove this packet from remaining list */
1506                         pkts_to_process &= ~pkt_mask;
1507
1508                         struct rte_mbuf *pkt = pkts[pos];
1509
1510
1511                         /* TCP and UDP ports at same offset, just use TCP for
1512                          * offset calculation
1513                          */
1514                         struct tcp_hdr *thdr = (struct tcp_hdr *)
1515                                 RTE_MBUF_METADATA_UINT32_PTR(pkt,
1516                                                 (IP_START + ip_hdr_size_bytes));
1517                         uint16_t src_port = rte_bswap16(thdr->src_port);
1518                         uint16_t dst_port = rte_bswap16(thdr->dst_port);
1519
1520                         struct ipv4_hdr *ihdr = (struct ipv4_hdr *)
1521                                 RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
1522                         uint8_t proto = ihdr->next_proto_id;
1523
1524                         if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
1525                                 /* only tracking TCP and UDP at this time */
1526                                 continue;
1527                         }
1528
1529                         /*
1530                          * Load the addresses and ports, and convert from Intel
1531                          * to network byte order. Strictly speaking, it is not
1532                          * necessary to do this conversion, as this data is only
1533                          * used to create a hash key.
1534                          */
1535                         uint32_t src_addr = rte_bswap32(ihdr->src_addr);
1536                         uint32_t dst_addr = rte_bswap32(ihdr->dst_addr);
1537
1538                         if (CNXN_TRX_DEBUG > 2) {
1539                                 if (CNXN_TRX_DEBUG > 4)
1540                                         rte_ct_cnxn_print_pkt(pkt,
1541                                                         IP_VERSION_4);
1542                         }
1543                         /* need to create compacted table of pointers to pass
1544                          * to bulk lookup
1545                          */
1546
1547                         compacting_map[packets_for_lookup] = pos;
1548                         key_orig_dir[packets_for_lookup] =
1549                                 rte_ct_create_cnxn_hashkey(&src_addr, &dst_addr,
1550                                                 src_port, dst_port,
1551                                                 proto,
1552                                                 &ct->hash_keys
1553                                                 [packets_for_lookup][0],
1554                                                 IP_VERSION_4);
1555                         packets_for_lookup++;
1556                 }
1557                 break;
1558         case IPv6_HEADER_SIZE:
1559                 for (; pkts_to_process;) {
1560                         uint8_t pos = (uint8_t) __builtin_ctzll(
1561                                         pkts_to_process);
1562                         /* bitmask representing only this packet */
1563                         uint64_t pkt_mask = 1LLU << pos;
1564                         /* remove this packet from remaining list */
1565                         pkts_to_process &= ~pkt_mask;
1566
1567                         struct rte_mbuf *pkt = pkts[pos];
1568
1569
1570                         void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt,
1571                                         IP_START);
1572
1573                         /* TCP and UDP ports at same offset, just use TCP for
1574                          * offset calculation
1575                          */
1576                         struct tcp_hdr *thdr = (struct tcp_hdr *)
1577                                 RTE_MBUF_METADATA_UINT32_PTR(pkt,
1578                                                 (IP_START + ip_hdr_size_bytes));
1579                         uint16_t src_port = rte_bswap16(thdr->src_port);
1580                         uint16_t dst_port = rte_bswap16(thdr->dst_port);
1581
1582                         struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
1583                         uint8_t proto = ihdr->proto;
1584
1585                         if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
1586                                 /* only tracking TCP and UDP at this time */
1587                                 continue;
1588                         }
1589
1590                         if (CNXN_TRX_DEBUG > 2) {
1591                                 if (CNXN_TRX_DEBUG > 4)
1592                                         rte_ct_cnxn_print_pkt(pkt,
1593                                                         IP_VERSION_6);
1594                         }
1595
1596                         /* need to create compacted table of pointers to pass
1597                          * to bulk lookup
1598                          */
1599
1600                         compacting_map[packets_for_lookup] = pos;
1601                         key_orig_dir[packets_for_lookup] =
1602                                 rte_ct_create_cnxn_hashkey(
1603                                                 (uint32_t *) ihdr->src_addr,
1604                                                 (uint32_t *) ihdr->dst_addr,
1605                                                 src_port, dst_port,
1606                                                 proto,
1607                                                 &ct->hash_keys
1608                                                 [packets_for_lookup][0],
1609                                                 IP_VERSION_6);
1610                         packets_for_lookup++;
1611                 }
1612                 break;
1613         default:
1614                 break;
1615         }
1616         if (unlikely(packets_for_lookup == 0))
1617                 return; /* no suitable packet for lookup */
1618
1619         /* Clear all the data to make sure no stack garbage is in it */
1620         memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
1621
1622         /* lookup all tcp & udp packets in the connection table */
1623
1624         int lookup_result = rte_hash_lookup_bulk(ct->rhash,
1625                         (const void **)&ct->hash_key_ptrs,
1626                         packets_for_lookup, &positions[0]);
1627
1628         if (unlikely(lookup_result < 0)) {
1629                 /* TODO: change a log */
1630                 printf("Unexpected hash table problem, discarding all packets");
1631                 *pkts_mask = 0;
1632                 return; /* unknown error, just discard all packets */
1633         }
1634         for (i = 0; i < packets_for_lookup; i++) {
1635                 /* index into hash table entries */
1636                 int hash_table_entry = positions[i];
1637                 /* index into packet table of this packet */
1638                 uint8_t pkt_index = compacting_map[i];
1639                 /* bitmask representing only this packet */
1640                 uint64_t pkt_mask = 1LLU << pkt_index;
1641                 uint8_t key_is_client_order = key_orig_dir[i];
1642                 uint32_t *key = ct->hash_key_ptrs[pkt_index];
1643                 uint8_t protocol = *(key + 9);
1644                 struct rte_mbuf *packet = pkts[pkt_index];
1645                 int no_new_cnxn = (pkt_mask & no_new_cnxn_mask) != 0;
1646
1647                 /* rte_ct_print_hashkey(key); */
1648
1649                 if (protocol == TCP_PROTOCOL) {
1650                         enum rte_ct_packet_action tcp_pkt_action;
1651
1652                         tcp_pkt_action = rte_ct_handle_tcp_lookup(ct, packet,
1653                                         pkt_index, key_is_client_order,
1654                                         key, hash_table_entry, no_new_cnxn,
1655                                         ip_hdr_size_bytes);
1656
1657                         switch (tcp_pkt_action) {
1658
1659                         case RTE_CT_SEND_CLIENT_SYNACK:
1660                         case RTE_CT_SEND_SERVER_ACK:
1661                                 /* altered packet or copy must be returned
1662                                  * to originator
1663                                  */
1664                                 *reply_pkt_mask |= pkt_mask;
1665                                 /* FALL-THROUGH */
1666
1667                         case RTE_CT_SEND_SERVER_SYN:
1668                         case RTE_CT_FORWARD_PACKET:
1669                                 break;
1670
1671                         case RTE_CT_HIJACK:
1672                                 *hijack_mask |= pkt_mask;
1673                                 break;
1674
1675                         default:
1676                                 /* bad packet, clear mask to drop */
1677                                 *pkts_mask ^= pkt_mask;
1678                                 ct->counters->pkts_drop++;
1679                                 break;
1680                 }
1681                         /* rte_ct_cnxn_print_pkt(pkts[pkt_index]); */
1682
1683                 } else {        /* UDP entry */
1684
1685                         if (hash_table_entry >= 0) {
1686                                 /*
1687                                  * connection found for this packet. Check that
1688                                  * this is a valid packet for connection
1689                                  */
1690
1691                                 struct rte_ct_cnxn_data *entry =
1692                                 &ct->hash_table_entries[hash_table_entry];
1693
1694                                 if (rte_ct_udp_packet
1695                                                 (ct, entry, pkts[pkt_index],
1696                                                  key_is_client_order)) {
1697                                         entry->counters.packets_forwarded++;
1698                                         ct->counters->pkts_forwarded++;
1699                                 }
1700                         } else {
1701                                 /*
1702                                  * connection not found in bulk hash lookup,
1703                                  * but might have been added in this batch
1704                                  */
1705
1706                                 struct rte_ct_cnxn_data *recent_entry =
1707                                         rte_ct_search_new_connections(ct, key);
1708
1709                                 if (recent_entry != NULL) {
1710                                         if (rte_ct_udp_packet(ct, recent_entry,
1711                                                         pkts[pkt_index],
1712                                                         key_is_client_order)) {
1713                                                 recent_entry->counters.
1714                                                         packets_forwarded++;
1715                                                 ct->counters->pkts_forwarded++;
1716                                         }
1717                                 } else {
1718                                         /* no existing connection, try to add
1719                                          * new one
1720                                          */
1721
1722                                         if (no_new_cnxn) {
1723                                                 /* new cnxn not allowed, clear
1724                                                  * mask to drop
1725                                                  */
1726                                                 *pkts_mask ^= pkt_mask;
1727                                                 ct->counters->pkts_drop++;
1728                                                 ct->counters->
1729                                                 pkts_drop_invalid_conn++;
1730                                                 continue;
1731                                         }
1732
1733                                         if (rte_ct_udp_new_connection(ct,
1734                                         &new_cnxn_data, pkts[pkt_index])) {
1735                                                 /* This packet creates a
1736                                                  * connection
1737                                                  */
1738                                                 int32_t position =
1739                                                         rte_hash_add_key(ct->
1740                                                                 rhash, key);
1741
1742                                         if (position < 0)
1743                                                 continue;
1744
1745                                                 struct rte_ct_cnxn_data
1746                                                         *new_hash_entry = &ct->
1747                                                 hash_table_entries[position];
1748
1749                                                 /*
1750                                                  *update fields in new_cnxn_data
1751                                                  * not set by "new_connection"
1752                                                  */
1753
1754                                                 memcpy(new_cnxn_data.key, key,
1755                                                 sizeof(new_cnxn_data.key));
1756
1757                                                 new_cnxn_data.
1758                                                         key_is_client_order
1759                                                         = key_is_client_order;
1760                                                 new_cnxn_data.protocol =
1761                                                         UDP_PROTOCOL;
1762                                                 rte_cnxn_ip_type(
1763                                                         &new_cnxn_data.type,
1764                                                         packet);
1765                                                 rte_memcpy(new_hash_entry,
1766                                                         &new_cnxn_data,
1767                                                         sizeof(struct
1768                                                         rte_ct_cnxn_data));
1769
1770                                                 new_hash_entry->counters.
1771                                                         packets_forwarded = 1;
1772                                                 ct->counters->pkts_forwarded++;
1773                                                 new_hash_entry->counters.
1774                                                         packets_dropped = 0;
1775                                                 ct->counters->pkts_drop = 0;
1776                                                 ct->counters->
1777                                                 current_active_sessions++;
1778                                                 ct->counters->
1779                                                         sessions_activated++;
1780
1781                                                 new_hash_entry->
1782                                                         state_used_for_timer
1783                                                         = RTE_CT_UDP_NONE;
1784                                                 rte_ct_set_cnxn_timer_for_udp(
1785                                                         ct,
1786                                                         new_hash_entry,
1787                                                         RTE_CT_UDP_UNREPLIED);
1788
1789                                                 rte_ct_remember_new_connection(
1790                                                                 ct,
1791                                                                 new_hash_entry);
1792                                         }
1793                                 }
1794
1795                         }
1796
1797                 }               /* UDP */
1798         }                       /* packets_for_lookup */
1799
1800         if (CNXN_TRX_DEBUG > 1) {
1801                 printf("Exit cnxn tracker synproxy batch lookup with");
1802                 printf(" packet mask %p\n", (void *)*pkts_mask);
1803         }
1804 }