2 // Copyright (c) 2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #include <rte_ether.h>
18 #include <rte_prefetch.h>
19 #include <rte_cycles.h>
20 #include <rte_malloc.h>
21 #include <rte_memcpy.h>
22 #include <rte_timer.h>
23 #include <rte_spinlock.h>
24 #include "rte_cnxn_tracking.h"
25 #include "rte_ct_tcp.h"
27 #define CNXN_TRX_DEBUG 0
28 #define TESTING_TIMERS 0
29 #define RTE_CT_TIMER_EXPIRED_DUMP 0
31 #define META_DATA_OFFSET 128
32 #define ETHERNET_START (META_DATA_OFFSET + RTE_PKTMBUF_HEADROOM)
33 #define ETH_HDR_SIZE 14
34 #define IP_START (ETHERNET_START + ETH_HDR_SIZE)
35 #define PROTOCOL_START (IP_START + 9)
36 #define SRC_ADDR_START (IP_START + 12)
37 #define TCP_START (IP_START + 20)
40 #define PROTOCOL_START_IPV6 (IP_START + 6)
41 #define SRC_ADDR_START_IPV6 (IP_START + 8)
42 #define TCP_START_IPV6 (IP_START + 40)
44 #define TCP_PROTOCOL 6
45 #define UDP_PROTOCOL 17
46 #define TCP_FW_IPV4_KEY_SIZE 16
48 #define TCP_FW_IPV6_KEY_SIZE 40
50 #define IPv4_HEADER_SIZE 20
51 #define IPv6_HEADER_SIZE 40
53 #define IP_VERSION_4 4
54 #define IP_VERSION_6 6
56 rte_ct_cnxn_tracker_batch_lookup_basic_type(
57 struct rte_ct_cnxn_tracker *ct,
58 struct rte_mbuf **pkts,
60 uint64_t no_new_cnxn_mask,
61 uint64_t *reply_pkt_mask,
62 uint64_t *hijack_mask,
63 uint8_t ip_hdr_size_bytes);
66 * Check if the packet is valid for the given connection. "original_direction"
67 * is false if the address order need to be "flipped".See create_cnxn_hashkey().
68 * True otherwise. Return 0 if the packet is valid, or a negative otherwise.
71 /* IP/TCP header print for debugging */
73 rte_ct_cnxn_print_pkt(struct rte_mbuf *pkt, uint8_t type)
76 uint8_t *rd = RTE_MBUF_METADATA_UINT8_PTR(pkt, IP_START);
79 printf("IP and TCP/UDP headers:\n");
81 if (type == IP_VERSION_4) {
82 for (i = 0; i < 40; i++) {
83 printf("%02x ", rd[i]);
90 if (type == IP_VERSION_6) {
91 for (i = 0; i < 60; i++) {
92 printf("%02x ", rd[i]);
102 rte_cnxn_ip_type(uint8_t *type, struct rte_mbuf *pkt)
105 int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
107 if (ip_hdr_size_bytes == IPv4_HEADER_SIZE)
108 *type = IP_VERSION_4;
110 if (ip_hdr_size_bytes == IPv6_HEADER_SIZE)
111 *type = IP_VERSION_6;
115 rte_ct_print_hashkey(uint32_t *key)
117 printf("Key: %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x \\\n",
118 key[0], key[1], key[2], key[3],
119 key[4], key[5], key[6], key[7], key[8], key[9]);
123 * Create a hash key consisting of the source address/port, the destination
124 * address/ports, and the tcp protocol number. The address/port combos are
125 * treated as two 48 bit numbers and sorted. Thus the key is always the
126 * same regardless of the direction of the packet. Remembering if the numbers
127 * were "flipped" from the order in the packet, and comparing that to whether
128 * the original hash key was flipped, tells if this packet is from the same
129 * direction as the original sender or the response direction. Returns 1 (true)
130 * if the key was left in the original direction.
133 rte_ct_create_cnxn_hashkey(
142 uint8_t hash_order_original_direction = 1;
146 if (type == IP_VERSION_4) {
147 uint32_t source = *src_addr;
148 uint32_t dest = *dst_addr;
150 key[3] = key[4] = key[5] = key[6] = key[7] = key[8] = 0;
153 || ((source == dest) && (src_port < dst_port))) {
156 key[2] = (src_port << 16) | dst_port;
160 key[2] = (dst_port << 16) | src_port;
161 hash_order_original_direction = 0;
165 if (type == IP_VERSION_6) {
166 int ip_cmp = memcmp(src_addr, dst_addr, 16);
170 if ((ip_cmp < 0) || ((ip_cmp == 0) && (src_port < dst_port))) {
173 key[8] = (src_port << 16) | dst_port;
177 key[8] = (dst_port << 16) | src_port;
178 hash_order_original_direction = 0;
191 rte_ct_print_hashkey(key);
193 return hash_order_original_direction;
198 rte_ct_get_IP_hdr_size(struct rte_mbuf *pkt)
200 /* NOTE: Only supporting IP headers with no options at this time, so
201 * header is fixed size
203 /* TODO: Need to find defined contstants for start of Ether and
206 uint8_t hdr_chk = RTE_MBUF_METADATA_UINT8(pkt, IP_START);
208 hdr_chk = hdr_chk >> 4;
210 if (hdr_chk == IP_VERSION_4)
211 return IPv4_HEADER_SIZE;
213 else if (hdr_chk == IP_VERSION_6)
214 return IPv6_HEADER_SIZE;
216 else /* Not IPv4 header with no options, return negative. */
219 * int ip_hdr_size_bytes = (ihdr->version_ihl & IPV4_HDR_IHL_MASK) *
220 * IPV4_IHL_MULTIPLIER;
221 * return ip_hdr_size_bytes;
226 rte_ct_set_timer_for_new_cnxn(
227 struct rte_ct_cnxn_tracker *ct,
228 struct rte_ct_cnxn_data *cd)
230 cd->state_used_for_timer = RTE_CT_TCP_NONE;
231 rte_ct_set_cnxn_timer_for_tcp(ct, cd, RTE_CT_TCP_SYN_SENT);
235 * The connection data is stored in a hash table which makes use of the bulk
236 * lookup optimization provided in DPDK. All of the packets seen in one call
237 * to rte_ct_cnxn_tracker_batch_lookup are done in one hash table lookup. The
238 * number of packets is the number being processed by the pipeline (default
239 * max 32, absolute max 64). For any TCP or UDP packet that does not have
240 * an existing (pseudo-)connection in the table (i.e. was a miss on the hash
241 * lookup), a new connection must be added.
243 * It is possible, for UDP, that the first packet for a (pseudo-)connection and
244 * a subsequent packet are in the same batch. This means that when looking for
245 * new connections in a batch the first one must add the connection, the
246 * second and subsequent (in that batch) that are part of the same connection
247 * must use that newly created one, not create another table entry.
249 * Any newly created entries are "remembered" in linear table, which is search
250 * when processing hash tables misses. All the entries in that table are
251 * "forgotten" at the start of a new batch.
253 * A linear table may seem slow, but consider:
254 * - out of millions of packets/second, this involves at most 64.
255 * - this affects only UDP. TCP connections are set up using an acknowledgement
256 * protocl, so would not have multiple packets for new connection in
258 * - the number of new connections in a batch would usually be zero, or a low
260 * - all the data to search through should still be in cache
264 rte_ct_remember_new_connection(
265 struct rte_ct_cnxn_tracker *ct,
266 struct rte_ct_cnxn_data *entry)
268 ct->latest_connection++;
269 ct->new_connections[ct->latest_connection] = entry;
272 static struct rte_ct_cnxn_data *
273 rte_ct_search_new_connections(struct rte_ct_cnxn_tracker *ct, uint32_t *key)
277 for (i = 0; i <= ct->latest_connection; i++) {
278 uint32_t *cnxn_key = ct->new_connections[i]->key;
279 int key_cmp = memcmp(cnxn_key, key,
280 sizeof(ct->new_connections[i]->key));
283 return ct->new_connections[i];
288 static inline void rte_ct_forget_new_connections(struct rte_ct_cnxn_tracker *ct)
290 ct->latest_connection = -1;
296 static enum rte_ct_packet_action
297 rte_ct_handle_tcp_lookup(
298 struct rte_ct_cnxn_tracker *ct,
299 struct rte_mbuf *packet,
301 uint8_t key_is_client_order,
303 int hash_table_entry,
305 uint8_t ip_hdr_size_bytes)
307 struct rte_ct_cnxn_data new_cnxn_data;
309 memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
310 enum rte_ct_packet_action packet_action;
313 int32_t position = hash_table_entry;
314 ct->positions[pkt_num] = position;
317 /* rte_ct_cnxn_print_pkt(packet); */
318 if (hash_table_entry >= 0) {
320 * connection found for this packet.
321 * Check that this is a valid packet for connection
324 struct rte_ct_cnxn_data *entry =
325 &ct->hash_table_entries[hash_table_entry];
327 packet_action = rte_ct_verify_tcp_packet(ct, entry, packet,
328 key_is_client_order, ip_hdr_size_bytes);
330 switch (packet_action) {
332 case RTE_CT_FORWARD_PACKET:
333 entry->counters.packets_forwarded++;
336 case RTE_CT_DROP_PACKET:
337 entry->counters.packets_dropped++;
338 return RTE_CT_DROP_PACKET;
340 case RTE_CT_REOPEN_CNXN_AND_FORWARD_PACKET:
341 /* Entry already in hash table, just re-initialize */
343 /* Don't use syproxy on re-init, since it
344 * is a valid connection
347 if (rte_ct_tcp_new_connection(ct, &new_cnxn_data,
348 packet, 0, ip_hdr_size_bytes) !=
349 RTE_CT_DROP_PACKET) {
350 rte_memcpy(&entry->ct_protocol.tcp_ct_data,
351 &new_cnxn_data.ct_protocol.tcp_ct_data,
352 sizeof(new_cnxn_data.ct_protocol.tcp_ct_data));
353 rte_ct_set_timer_for_new_cnxn(ct, entry);
354 if (ct->counters->sessions_reactivated > 0)
355 ct->counters->sessions_reactivated--;
360 case RTE_CT_SEND_SERVER_SYN:
361 ct->counters->pkts_forwarded++;
362 /* packet modified, send back to original source */
363 return RTE_CT_SEND_SERVER_SYN;
365 case RTE_CT_SEND_SERVER_ACK:
366 ct->counters->pkts_forwarded++;
367 /* packet modified, send back to original source */
368 return RTE_CT_SEND_SERVER_ACK;
371 ct->counters->pkts_forwarded++;
372 /* packet saved with connection, notify VNF
375 return RTE_CT_HIJACK;
377 case RTE_CT_DESTROY_CNXN_AND_FORWARD_PACKET:
380 * Forward the packet because it is "legal", but destroy
381 * the connection by removing it from the hash table and
382 * cancelling any timer. There is a remote possibility
383 * (perhaps impossible?) that a later packet in the same
384 * batch is for this connection. Due to the batch
385 * lookup, which has already happened, the later packet
386 * thinks that the connection is valid. This might cause
387 * a timer to be set. Eventually, it would time out so
388 * the only bug case occurs if the hash table also, in
389 * the same batch, allocates this entry for a new
390 * connection before the above packet is received. The
391 * chances of this happening seem impossibly small but
392 * this case should perhaps be investigated further.
395 if (rte_hash_del_key(ct->rhash, entry->key) >= 0) {
397 * if rte_hash_del_key >= 0, then the connection
398 * was found in the hash table and removed.
399 * Counters must be updated, and the timer
400 * cancelled. If the result was < 0, then the
401 * connection must have already been deleted,
402 * and it must have been deleted in this batch
403 * of packets processed. Do nothing.
406 ct->counters->sessions_closed++;
407 if (ct->counters->current_active_sessions > 0)
408 ct->counters->current_active_sessions--;
409 rte_ct_cancel_cnxn_timer(entry);
411 entry->counters.packets_forwarded++;
418 /* try to add new connection */
419 struct rte_ct_cnxn_data *new_hash_entry;
422 ct->counters->pkts_drop_invalid_conn++;
423 return RTE_CT_DROP_PACKET;
426 packet_action = rte_ct_tcp_new_connection(ct, &new_cnxn_data,
427 packet, ct->misc_options.synproxy_enabled,
430 if (unlikely(packet_action == RTE_CT_DROP_PACKET)) {
431 ct->counters->pkts_drop_invalid_conn++;
432 return RTE_CT_DROP_PACKET;
435 /* This packet creates a connection . */
436 int32_t position = rte_hash_add_key(ct->rhash, key);
439 ("Failed to add new connection to hash table %d, pkt_num:%d\n",
441 return RTE_CT_DROP_PACKET;
444 ct->positions[pkt_num] = position;
446 new_hash_entry = &ct->hash_table_entries[position];
448 /* update fields in new_cnxn_data not set by new_connection */
450 memcpy(new_cnxn_data.key, key, sizeof(new_cnxn_data.key));
451 new_cnxn_data.key_is_client_order = key_is_client_order;
452 new_cnxn_data.protocol = TCP_PROTOCOL;
453 rte_cnxn_ip_type(&new_cnxn_data.type, packet);
454 rte_memcpy(new_hash_entry, &new_cnxn_data,
455 sizeof(struct rte_ct_cnxn_data));
456 new_hash_entry->counters.packets_forwarded = 1;
457 new_hash_entry->counters.packets_dropped = 0;
458 ct->counters->current_active_sessions++;
459 ct->counters->sessions_activated++;
461 if (packet_action == RTE_CT_SEND_CLIENT_SYNACK) {
462 /* this is a synproxied connecton */
463 /* must remember mss, window scaling etc. from client */
465 rte_sp_parse_options(packet, new_hash_entry);
468 * update packet to a SYN/ACK directed to the client,
469 * including default header options
472 rte_sp_cvt_to_spoofed_client_synack(new_hash_entry,
476 * run updated packet through connection tracking so
477 * cnxn data updated appropriately and timer set for syn
478 * received state, not syn sent.
480 packet_action = rte_ct_verify_tcp_packet(ct,
481 new_hash_entry, packet,
482 !key_is_client_order,
485 if (unlikely(packet_action != RTE_CT_FORWARD_PACKET)) {
486 /* should never get here */
487 printf("Serious error in synproxy generating ");
489 return RTE_CT_DROP_PACKET;
491 ct->counters->pkts_forwarded++;
492 /* spoofed packet good to go */
493 return RTE_CT_SEND_CLIENT_SYNACK;
495 rte_ct_set_timer_for_new_cnxn(ct, new_hash_entry);
499 /* TODO: is it possible that earlier packet in this batch caused new
500 * entry to be added for the connection? Seems unlikely, since it
501 * would require multiple packets from the same side of the connection
502 * one after another immediately, and the TCP connection OPEN requires
503 * acknowledgement before further packets. What about simultaneous
504 * OPEN? Only if both sides are on same input port. Is that possible?
506 /* if made it here, packet will be forwarded */
507 ct->counters->pkts_forwarded++;
508 return RTE_CT_FORWARD_PACKET;
512 rte_ct_cnxn_tracker_batch_lookup_basic(
513 struct rte_ct_cnxn_tracker *ct,
514 struct rte_mbuf **pkts,
516 uint64_t no_new_cnxn_mask,
517 uint64_t *reply_pkt_mask,
518 uint64_t *hijack_mask)
520 /* bitmap of packets left to process */
521 uint64_t pkts_to_process = pkts_mask;
522 /* bitmap of valid packets to return */
523 uint64_t valid_packets = pkts_mask;
524 uint8_t compacting_map[RTE_HASH_LOOKUP_BULK_MAX];
525 /* for pkt, key in originators direction? */
526 uint8_t key_orig_dir[RTE_HASH_LOOKUP_BULK_MAX];
527 uint32_t packets_for_lookup = 0;
528 int32_t positions[RTE_HASH_LOOKUP_BULK_MAX];
530 struct rte_ct_cnxn_data new_cnxn_data;
532 if (CNXN_TRX_DEBUG > 1) {
533 printf("Enter cnxn tracker %p", ct);
534 printf(" synproxy batch lookup with packet mask %p\n",
538 rte_ct_forget_new_connections(ct);
543 * Use bulk lookup into hash table for performance reasons. Cannot have
544 * "empty slots" in the bulk lookup,so need to create a compacted table.
547 for (; pkts_to_process;) {
548 uint8_t pos = (uint8_t) __builtin_ctzll(pkts_to_process);
549 /* bitmask representing only this packet */
550 uint64_t pkt_mask = 1LLU << pos;
551 /* remove this packet from remaining list */
552 pkts_to_process &= ~pkt_mask;
554 struct rte_mbuf *pkt = pkts[pos];
556 int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
558 if (unlikely(ip_hdr_size_bytes < 0)) {
559 /* Not IPv4, ignore. */
563 void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
565 /* TCP and UDP ports at same offset, just use TCP for
568 struct tcp_hdr *thdr =
569 (struct tcp_hdr *)RTE_MBUF_METADATA_UINT32_PTR(pkt,
570 (IP_START + ip_hdr_size_bytes));
571 uint16_t src_port = rte_bswap16(thdr->src_port);
572 uint16_t dst_port = rte_bswap16(thdr->dst_port);
574 if (ip_hdr_size_bytes == IPv4_HEADER_SIZE) {
575 struct ipv4_hdr *ihdr = (struct ipv4_hdr *)ip_hdr;
576 uint8_t proto = ihdr->next_proto_id;
578 if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
579 /* only tracking TCP and UDP at this time */
584 * Load the addresses and ports, and convert from Intel
585 * to network byte order. Strictly speaking, it is not
586 * necessary to do this conversion, as this data is only
587 * used to create a hash key.
589 uint32_t src_addr = rte_bswap32(ihdr->src_addr);
590 uint32_t dst_addr = rte_bswap32(ihdr->dst_addr);
592 if (CNXN_TRX_DEBUG > 2) {
593 if (CNXN_TRX_DEBUG > 4)
594 rte_ct_cnxn_print_pkt(pkt,
597 /* need to create compacted table of pointers to pass
601 compacting_map[packets_for_lookup] = pos;
602 key_orig_dir[packets_for_lookup] =
603 rte_ct_create_cnxn_hashkey(&src_addr, &dst_addr,
607 [packets_for_lookup][0],
609 packets_for_lookup++;
612 if (ip_hdr_size_bytes == IPv6_HEADER_SIZE) {
613 struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
614 uint8_t proto = ihdr->proto;
616 if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
617 /* only tracking TCP and UDP at this time */
621 if (CNXN_TRX_DEBUG > 2) {
622 if (CNXN_TRX_DEBUG > 4)
623 rte_ct_cnxn_print_pkt(pkt,
627 /* need to create compacted table of pointers to pass
631 compacting_map[packets_for_lookup] = pos;
632 key_orig_dir[packets_for_lookup] =
633 rte_ct_create_cnxn_hashkey(
634 (uint32_t *) ihdr->src_addr,
635 (uint32_t *) ihdr->dst_addr,
639 [packets_for_lookup][0],
641 packets_for_lookup++;
646 if (unlikely(packets_for_lookup == 0))
647 return valid_packets; /* no suitable packet for lookup */
649 /* Clear all the data to make sure no stack garbage is in it */
650 memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
652 /* lookup all tcp & udp packets in the connection table */
655 rte_hash_lookup_bulk(ct->rhash, (const void **)&ct->hash_key_ptrs,
656 packets_for_lookup, &positions[0]);
658 if (unlikely(lookup_result < 0)) {
659 /* TODO: change a log */
660 printf("Unexpected hash table problem, discarding all packets");
661 return 0; /* unknown error, just discard all packets */
664 for (i = 0; i < packets_for_lookup; i++) {
665 if (positions[i] >= 0)
666 printf("@CT positions[i]= %d, compacting_map[i]= %d\n",
667 positions[i], compacting_map[i]);
670 for (i = 0; i < packets_for_lookup; i++) {
671 /* index into hash table entries */
672 int hash_table_entry = positions[i];
673 /* index into packet table of this packet */
674 uint8_t pkt_index = compacting_map[i];
675 /* bitmask representing only this packet */
676 uint64_t pkt_mask = 1LLU << pkt_index;
677 uint8_t key_is_client_order = key_orig_dir[i];
678 uint32_t *key = ct->hash_key_ptrs[pkt_index];
679 uint8_t protocol = *(key + 9);
680 struct rte_mbuf *packet = pkts[pkt_index];
681 int no_new_cnxn = (pkt_mask & no_new_cnxn_mask) != 0;
683 /* rte_ct_print_hashkey(key); */
685 if (protocol == TCP_PROTOCOL) {
686 enum rte_ct_packet_action tcp_pkt_action;
688 int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(packet);
689 tcp_pkt_action = rte_ct_handle_tcp_lookup(ct, packet,
690 pkt_index, key_is_client_order,
691 key, hash_table_entry, no_new_cnxn,
694 switch (tcp_pkt_action) {
696 case RTE_CT_SEND_CLIENT_SYNACK:
697 case RTE_CT_SEND_SERVER_ACK:
698 /* altered packet or copy must be returned
701 *reply_pkt_mask |= pkt_mask;
704 case RTE_CT_SEND_SERVER_SYN:
705 case RTE_CT_FORWARD_PACKET:
709 *hijack_mask |= pkt_mask;
713 /* bad packet, clear mask to drop */
714 valid_packets ^= pkt_mask;
715 ct->counters->pkts_drop++;
719 /* rte_ct_cnxn_print_pkt(pkts[pkt_index]); */
720 } else { /* UDP entry */
722 if (hash_table_entry >= 0) {
724 * connection found for this packet. Check that
725 * this is a valid packet for connection
728 struct rte_ct_cnxn_data *entry =
729 &ct->hash_table_entries[hash_table_entry];
731 if (rte_ct_udp_packet
732 (ct, entry, pkts[pkt_index],
733 key_is_client_order)) {
734 entry->counters.packets_forwarded++;
735 ct->counters->pkts_forwarded++;
739 * connection not found in bulk hash lookup,
740 * but might have been added in this batch
743 struct rte_ct_cnxn_data *recent_entry =
744 rte_ct_search_new_connections(ct, key);
746 if (recent_entry != NULL) {
747 if (rte_ct_udp_packet(ct, recent_entry,
749 key_is_client_order)) {
750 recent_entry->counters.
752 ct->counters->pkts_forwarded++;
755 /* no existing connection, try to add
760 /* new cnxn not allowed, clear
763 valid_packets ^= pkt_mask;
764 ct->counters->pkts_drop++;
766 pkts_drop_invalid_conn++;
770 if (rte_ct_udp_new_connection(ct,
773 /* This packet creates a
783 struct rte_ct_cnxn_data
784 *new_hash_entry = &ct->
785 hash_table_entries[position];
788 *update fields in new_cnxn_data
789 * not set by "new_connection"
792 memcpy(new_cnxn_data.key, key,
793 sizeof(new_cnxn_data.key));
797 = key_is_client_order;
798 new_cnxn_data.protocol =
803 rte_memcpy(new_hash_entry,
808 new_hash_entry->counters.
809 packets_forwarded = 1;
810 ct->counters->pkts_forwarded++;
811 new_hash_entry->counters.
813 ct->counters->pkts_drop = 0;
815 current_active_sessions++;
817 sessions_activated++;
822 rte_ct_set_cnxn_timer_for_udp(
825 RTE_CT_UDP_UNREPLIED);
827 rte_ct_remember_new_connection(
836 } /* packets_for_lookup */
838 if (CNXN_TRX_DEBUG > 1) {
839 printf("Exit cnxn tracker synproxy batch lookup with");
840 printf(" packet mask %p\n", (void *)valid_packets);
843 return valid_packets;
847 rte_ct_cnxn_tracker_batch_lookup_with_synproxy(
848 struct rte_ct_cnxn_tracker *ct,
849 struct rte_mbuf **pkts,
851 struct rte_synproxy_helper *sp_helper)
853 return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask, 0,
854 &sp_helper->reply_pkt_mask, &sp_helper->hijack_mask);
857 uint64_t cgnapt_ct_process(
858 struct rte_ct_cnxn_tracker *ct,
859 struct rte_mbuf **pkts,
861 struct rte_CT_helper *ct_helper)
863 /* to disable SynProxy for CGNAT */
864 rte_ct_disable_synproxy(ct);
865 return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
866 ct_helper->no_new_cnxn_mask,
867 &ct_helper->reply_pkt_mask,
868 &ct_helper->hijack_mask);
872 rte_ct_cnxn_tracker_batch_lookup(
873 struct rte_ct_cnxn_tracker *ct,
874 struct rte_mbuf **pkts,
876 struct rte_CT_helper *ct_helper)
879 return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
880 ct_helper->no_new_cnxn_mask,
881 &ct_helper->reply_pkt_mask, &ct_helper->hijack_mask);
885 void rte_ct_cnxn_tracker_batch_lookup_type(
886 struct rte_ct_cnxn_tracker *ct,
887 struct rte_mbuf **pkts,
889 struct rte_CT_helper *ct_helper,
890 uint8_t ip_hdr_size_bytes)
893 rte_ct_cnxn_tracker_batch_lookup_basic_type(ct, pkts, pkts_mask,
894 ct_helper->no_new_cnxn_mask,
895 &ct_helper->reply_pkt_mask, &ct_helper->hijack_mask,
902 rte_ct_cnxn_tracker_batch_lookup_with_new_cnxn_control(
903 struct rte_ct_cnxn_tracker *ct,
904 struct rte_mbuf **pkts,
906 uint64_t no_new_cnxn_mask)
910 return rte_ct_cnxn_tracker_batch_lookup_basic(ct, pkts, pkts_mask,
912 &dont_care, &dont_care);
917 rte_ct_initialize_default_timeouts(struct rte_ct_cnxn_tracker *new_cnxn_tracker)
920 /* timer system init */
922 uint64_t hertz = rte_get_tsc_hz();
924 new_cnxn_tracker->hertz = hertz;
925 new_cnxn_tracker->timing_cycles_per_timing_step = hertz / 10;
926 new_cnxn_tracker->timing_100ms_steps_previous = 0;
927 new_cnxn_tracker->timing_100ms_steps = 0;
928 new_cnxn_tracker->timing_last_time = rte_get_tsc_cycles();
930 /* timeouts in seconds */
931 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
932 [RTE_CT_TCP_SYN_SENT] = 120 * hertz;
933 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
934 [RTE_CT_TCP_SYN_RECV] = 60 * hertz;
936 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
937 [RTE_CT_TCP_ESTABLISHED] = 60 * 60 * 24 * 5 * hertz;
939 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
940 [RTE_CT_TCP_FIN_WAIT] = 120 * hertz;
941 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
942 [RTE_CT_TCP_CLOSE_WAIT] = 60 * hertz;
943 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
944 [RTE_CT_TCP_LAST_ACK] = 30 * hertz;
945 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
946 [RTE_CT_TCP_TIME_WAIT] = 120 * hertz;
947 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
948 [RTE_CT_TCP_CLOSE] = 10 * hertz;
949 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
950 [RTE_CT_TCP_SYN_SENT_2] = 120 * hertz;
951 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
952 [RTE_CT_TCP_RETRANS] = 300 * hertz;
953 new_cnxn_tracker->ct_timeout.tcptimeout.tcp_timeouts
954 [RTE_CT_TCP_UNACK] = 300 * hertz;
956 new_cnxn_tracker->ct_timeout.udptimeout.udp_timeouts
957 [RTE_CT_UDP_UNREPLIED] = 30 * hertz;
958 new_cnxn_tracker->ct_timeout.udptimeout.udp_timeouts
959 [RTE_CT_UDP_REPLIED] = 180 * hertz;
960 /* miscellaneous init */
961 new_cnxn_tracker->misc_options.tcp_max_retrans =
962 RTE_CT_TCP_MAX_RETRANS;
963 new_cnxn_tracker->misc_options.tcp_loose = 0;
964 new_cnxn_tracker->misc_options.tcp_be_liberal = 0;
967 for (i=0; i < RTE_HASH_LOOKUP_BULK_MAX ;i ++ )
968 new_cnxn_tracker->positions[i] = -1;
974 struct rte_CT_counter_block rte_CT_counter_table[MAX_CT_INSTANCES]
976 int rte_CT_hi_counter_block_in_use = -1;
979 rte_ct_initialize_cnxn_tracker_with_synproxy(
980 struct rte_ct_cnxn_tracker *new_cnxn_tracker,
981 uint32_t max_connection_count,
983 uint16_t pointer_offset)
987 struct rte_CT_counter_block *counter_ptr;
989 * TODO: Should number of entries be something like
990 * max_connection_count * 1.1 to allow for unused space
991 * and thus increased performance of hash table, at a cost of memory???
994 new_cnxn_tracker->pointer_offset = pointer_offset;
996 memset(new_cnxn_tracker->name, '\0', sizeof(new_cnxn_tracker->name));
997 strncpy(new_cnxn_tracker->name, name, strlen(new_cnxn_tracker->name));
998 //strcpy(new_cnxn_tracker->name, name);
999 /* + (max_connection_count >> 3); */
1000 uint32_t number_of_entries = max_connection_count;
1002 size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_ct_cnxn_data) *
1004 new_cnxn_tracker->hash_table_entries =
1005 rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
1006 if (new_cnxn_tracker->hash_table_entries == NULL) {
1007 printf(" Not enough memory, or invalid arguments\n");
1010 new_cnxn_tracker->num_cnxn_entries = number_of_entries;
1012 /* initialize all timers */
1014 for (i = 0; i < number_of_entries; i++)
1015 rte_timer_init(&new_cnxn_tracker->hash_table_entries[i].timer);
1017 /* pointers for temp storage used during bulk hash */
1018 for (i = 0; i < RTE_HASH_LOOKUP_BULK_MAX; i++)
1019 new_cnxn_tracker->hash_key_ptrs[i] =
1020 &new_cnxn_tracker->hash_keys[i][0];
1023 * Now allocate a counter block entry.It appears that the initialization
1024 * of these threads is serialized on core 0 so no lock is necessary
1027 if (rte_CT_hi_counter_block_in_use == MAX_CT_INSTANCES)
1030 rte_CT_hi_counter_block_in_use++;
1031 counter_ptr = &rte_CT_counter_table[rte_CT_hi_counter_block_in_use];
1033 new_cnxn_tracker->counters = counter_ptr;
1035 /* set up hash table parameters, then create hash table */
1036 struct rte_hash_parameters rhash_parms = {
1038 .entries = number_of_entries,
1039 .hash_func = NULL, /* use default hash */
1041 .hash_func_init_val = 0,
1042 .socket_id = rte_socket_id(),
1043 .extra_flag = 1 /*This is needed for TSX memory*/
1046 new_cnxn_tracker->rhash = rte_hash_create(&rhash_parms);
1052 rte_ct_initialize_cnxn_tracker(
1053 struct rte_ct_cnxn_tracker *new_cnxn_tracker,
1054 uint32_t max_connection_count,
1057 return rte_ct_initialize_cnxn_tracker_with_synproxy(new_cnxn_tracker,
1058 max_connection_count, name, 0);
1062 rte_ct_free_cnxn_tracker_resources(struct rte_ct_cnxn_tracker *old_cnxn_tracker)
1064 rte_free(old_cnxn_tracker->hash_table_entries);
1065 rte_hash_free(old_cnxn_tracker->rhash);
1070 rte_ct_get_cnxn_tracker_size(void)
1072 return sizeof(struct rte_ct_cnxn_tracker);
1076 rte_ct_cnxn_timer_expired(struct rte_timer *rt, void *arg);
1079 rte_ct_set_cnxn_timer(
1080 struct rte_ct_cnxn_tracker *ct,
1081 struct rte_ct_cnxn_data *cd,
1082 uint64_t ticks_until_timeout)
1085 * pointer to cnxn_data will be stored in timer system as pointer to
1086 * rte_timer for later cast back to cnxn_data during timeout handling
1089 struct rte_timer *rt = (struct rte_timer *)cd;
1091 /* execute timeout on timer core */
1092 uint32_t core_id = get_timer_core_id();
1094 /* execute timeout on current core */
1095 uint32_t core_id = rte_lcore_id();
1097 /* safe to reset since timeouts handled synchronously
1098 * by rte_timer_manage
1100 int success = rte_timer_reset(rt, ticks_until_timeout, SINGLE, core_id,
1101 rte_ct_cnxn_timer_expired, ct);
1104 /* TODO: Change to log, perhaps something else?
1105 * This should not happen
1107 printf("CNXN_TRACKER: Failed to set connection timer.\n");
1112 * For the given connection, set a timeout based on the given state. If the
1113 * timer is already set, this call will reset the timer with a new value.
1117 rte_ct_set_cnxn_timer_for_tcp(
1118 struct rte_ct_cnxn_tracker *ct,
1119 struct rte_ct_cnxn_data *cd,
1123 cd->expected_timeout =
1124 (ct->timing_100ms_steps * ct->timing_cycles_per_timing_step) +
1125 ct->ct_timeout.tcptimeout.tcp_timeouts[tcp_state];
1127 if (tcp_state == cd->state_used_for_timer) {
1129 * Don't reset timer, too expensive. Instead, determine time
1130 * elapsed since start of timer. When this timer expires, the
1131 * timer will be reset to the elapsed timer. So if in a state
1132 * with a 5 minute timer last sees a packet 4 minutes into the
1133 * timer, the timer when expires will be reset to 4 minutes.
1134 * This means the timer will then expire 5 minutes after
1141 printf("Set Timer for connection %p and state %s\n", cd,
1142 rte_ct_tcp_names[tcp_state]);
1144 rte_ct_set_cnxn_timer(ct, cd,
1146 tcptimeout.tcp_timeouts[tcp_state]);
1147 cd->state_used_for_timer = tcp_state;
1151 * For the given connection, set a timeout based on the given state.
1152 * If the timer is already set,
1153 * this call will reset the timer with a new value.
1157 rte_ct_set_cnxn_timer_for_udp(
1158 struct rte_ct_cnxn_tracker *ct,
1159 struct rte_ct_cnxn_data *cd,
1163 cd->expected_timeout = (ct->timing_cycles_per_timing_step) +
1164 ct->ct_timeout.udptimeout.udp_timeouts[udp_state];
1166 if (udp_state == cd->state_used_for_timer) {
1168 * Don't reset timer, too expensive. Instead, determine time
1169 * elapsed since start of timer. When this timer expires, the
1170 * timer will be reset to the elapsed timer. So if in a state
1171 * with a 5 minute timer last sees a packet 4 minutes into the
1172 * timer, the timer when expires will be reset to 4 minutes.
1173 * This means the timer will then
1174 * expire 5 minutes after the last packet.
1180 printf("Set Timer for connection %p and state %s\n", cd,
1181 rte_ct_udp_names[udp_state]);
1182 rte_ct_set_cnxn_timer(ct, cd,
1184 udptimeout.udp_timeouts[udp_state]);
1185 cd->state_used_for_timer = udp_state;
1188 /* Cancel the timer associated with the connection.
1189 * Safe to call if no timer set.
1192 rte_ct_cancel_cnxn_timer(struct rte_ct_cnxn_data *cd)
1195 printf("Cancel Timer\n");
1197 rte_timer_stop(&cd->timer);
1201 rte_ct_handle_expired_timers(struct rte_ct_cnxn_tracker *ct)
1204 * If current time (in 100 ms increments) is different from the
1205 * time it was last viewed, then check for and process expired timers.
1208 uint64_t new_time = rte_get_tsc_cycles();
1209 uint64_t time_diff = new_time - ct->timing_last_time;
1211 if (time_diff >= ct->timing_cycles_per_timing_step) {
1212 ct->timing_last_time = new_time;
1213 ct->timing_100ms_steps++;
1216 if (ct->timing_100ms_steps != ct->timing_100ms_steps_previous) {
1218 ct->timing_100ms_steps_previous = ct->timing_100ms_steps;
1222 /* timer has expired. Need to delete connection entry */
1225 rte_ct_cnxn_timer_expired(struct rte_timer *rt, void *arg)
1227 /* the pointer to the rte_timer was actually a pointer
1230 struct rte_ct_cnxn_data *cd = (struct rte_ct_cnxn_data *)rt;
1231 struct rte_ct_cnxn_tracker *ct = (struct rte_ct_cnxn_tracker *)arg;
1235 * Check to see if the timer has "really" expired. If traffic occured
1236 * since the timer was set, the timer needs be extended, so that timer
1237 * expires the appropriate amount after that last packet.
1240 uint64_t current_time = ct->timing_100ms_steps *
1241 ct->timing_cycles_per_timing_step;
1243 if (cd->expected_timeout >= current_time) {
1244 uint64_t time_diff = cd->expected_timeout - current_time;
1246 rte_ct_set_cnxn_timer(ct, cd, time_diff);
1250 if (cd->protocol == TCP_PROTOCOL) {
1251 if (cd->state_used_for_timer == RTE_CT_TCP_TIME_WAIT ||
1252 cd->state_used_for_timer == RTE_CT_TCP_CLOSE)
1253 ct->counters->sessions_closed++;
1255 ct->counters->sessions_timedout++;
1256 /* if synproxied connection, free list of buffered
1260 if (cd->ct_protocol.synproxy_data.synproxied)
1261 rte_ct_release_buffered_packets(ct, cd);
1263 } else if (cd->protocol == UDP_PROTOCOL)
1264 ct->counters->sessions_closed++;
1265 if (ct->counters->current_active_sessions > 0)
1266 ct->counters->current_active_sessions--;
1268 if (RTE_CT_TIMER_EXPIRED_DUMP) {
1269 uint64_t percent = (cd->counters.packets_dropped * 10000) /
1270 (cd->counters.packets_forwarded +
1271 cd->counters.packets_dropped);
1273 if (cd->protocol == TCP_PROTOCOL) {
1274 printf("CnxnTrkr %s, timed-out TCP Connection: %p,",
1276 printf(" %s, pkts forwarded %"
1277 PRIu64 ", pkts dropped %" PRIu64
1279 rte_ct_tcp_names[cd->state_used_for_timer],
1280 cd->counters.packets_forwarded,
1281 cd->counters.packets_dropped,
1282 (uint32_t) (percent / 100),
1283 (uint32_t) (percent % 100));
1284 } else if (cd->protocol == UDP_PROTOCOL) {
1285 printf("CnxnTrkr %s, Timed-out UDP Connection: %p,",
1287 printf(" %s, pkts forwarded %" PRIu64
1288 ", pkts dropped %" PRIu64 ", drop%% %u.%u\n",
1289 rte_ct_udp_names[cd->state_used_for_timer],
1290 cd->counters.packets_forwarded,
1291 cd->counters.packets_dropped,
1292 (uint32_t) (percent / 100),
1293 (uint32_t) (percent % 100));
1297 success = rte_hash_del_key(ct->rhash, &cd->key);
1300 /* TODO: change to a log */
1301 rte_ct_print_hashkey(cd->key);
1306 struct rte_CT_counter_block *
1307 rte_ct_get_counter_address(struct rte_ct_cnxn_tracker *ct)
1309 return ct->counters;
1313 rte_ct_set_configuration_options(struct rte_ct_cnxn_tracker *ct,
1314 char *name, char *value)
1316 /* check non-time values first */
1317 int ival = atoi(value);
1320 if (strcmp(name, "tcp_loose") == 0) {
1321 ct->misc_options.tcp_loose = ival;
1325 /* tcp_be_liberal */
1326 if (strcmp(name, "tcp_be_liberal") == 0) {
1327 ct->misc_options.tcp_be_liberal = ival;
1331 /* tcp_max_retrans */
1332 if (strcmp(name, "tcp_max_retrans") == 0) {
1333 ct->misc_options.tcp_max_retrans = ival;
1337 uint64_t time_value = ival * ct->hertz;
1340 /* configuration of timer values */
1343 if (strcmp(name, "tcp_syn_sent") == 0) {
1344 if (time_value == 0)
1346 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_SENT] =
1352 if (strcmp(name, "tcp_syn_recv") == 0) {
1353 if (time_value == 0)
1355 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_RECV] =
1360 /* tcp_established */
1361 if (strcmp(name, "tcp_established") == 0) {
1362 if (time_value == 0)
1364 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_ESTABLISHED] =
1370 if (strcmp(name, "tcp_fin_wait") == 0) {
1371 if (time_value == 0)
1373 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_FIN_WAIT] =
1378 /* tcp_close_wait */
1379 if (strcmp(name, "tcp_close_wait") == 0) {
1380 if (time_value == 0)
1382 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_CLOSE_WAIT] =
1388 if (strcmp(name, "tcp_last_ack") == 0) {
1389 if (time_value == 0)
1391 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_LAST_ACK] =
1397 if (strcmp(name, "tcp_time_wait") == 0) {
1398 if (time_value == 0)
1400 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_TIME_WAIT] =
1406 if (strcmp(name, "tcp_close") == 0) {
1407 if (time_value == 0)
1409 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_CLOSE] =
1414 /* tcp_syn_sent_2 */
1415 if (strcmp(name, "tcp_syn_sent_2") == 0) {
1416 if (time_value == 0)
1418 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_SYN_SENT_2] =
1424 if (strcmp(name, "tcp_retrans") == 0) {
1425 if (time_value == 0)
1427 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_RETRANS] =
1433 if (strcmp(name, "tcp_unack") == 0) {
1434 if (time_value == 0)
1436 ct->ct_timeout.tcptimeout.tcp_timeouts[RTE_CT_TCP_UNACK] =
1442 if (strcmp(name, "udp_unreplied") == 0) {
1443 if (time_value == 0)
1445 ct->ct_timeout.udptimeout.udp_timeouts[RTE_CT_UDP_UNREPLIED] =
1451 if (strcmp(name, "udp_replied") == 0) {
1452 if (time_value == 0)
1454 ct->ct_timeout.udptimeout.udp_timeouts[RTE_CT_UDP_REPLIED] =
1462 rte_ct_cnxn_tracker_batch_lookup_basic_type(
1463 struct rte_ct_cnxn_tracker *ct,
1464 struct rte_mbuf **pkts,
1465 uint64_t *pkts_mask,
1466 uint64_t no_new_cnxn_mask,
1467 uint64_t *reply_pkt_mask,
1468 uint64_t *hijack_mask,
1469 uint8_t ip_hdr_size_bytes)
1471 /* bitmap of packets left to process */
1472 uint64_t pkts_to_process = *pkts_mask;
1473 /* bitmap of valid packets to return */
1474 uint8_t compacting_map[RTE_HASH_LOOKUP_BULK_MAX];
1475 /* for pkt, key in originators direction? */
1476 uint8_t key_orig_dir[RTE_HASH_LOOKUP_BULK_MAX];
1477 uint32_t packets_for_lookup = 0;
1478 int32_t positions[RTE_HASH_LOOKUP_BULK_MAX];
1480 struct rte_ct_cnxn_data new_cnxn_data;
1481 struct rte_ct_cnxn_data *cnxn_data_entry[RTE_HASH_LOOKUP_BULK_MAX];
1483 rte_prefetch0(ct->hash_table_entries);
1485 if (CNXN_TRX_DEBUG > 1) {
1486 printf("Enter cnxn tracker %p", ct);
1487 printf(" synproxy batch lookup with packet mask %p\n",
1488 (void *)*pkts_mask);
1491 rte_ct_forget_new_connections(ct);
1492 *reply_pkt_mask = 0;
1496 * Use bulk lookup into hash table for performance reasons. Cannot have
1497 * "empty slots" in the bulk lookup,so need to create a compacted table.
1500 switch (ip_hdr_size_bytes) {
1501 case IPv4_HEADER_SIZE:
1502 for (; pkts_to_process;) {
1503 uint8_t pos = (uint8_t) __builtin_ctzll(
1505 /* bitmask representing only this packet */
1506 uint64_t pkt_mask = 1LLU << pos;
1507 /* remove this packet from remaining list */
1508 pkts_to_process &= ~pkt_mask;
1510 struct rte_mbuf *pkt = pkts[pos];
1513 /* TCP and UDP ports at same offset, just use TCP for
1514 * offset calculation
1516 struct tcp_hdr *thdr = (struct tcp_hdr *)
1517 RTE_MBUF_METADATA_UINT32_PTR(pkt,
1518 (IP_START + ip_hdr_size_bytes));
1519 uint16_t src_port = rte_bswap16(thdr->src_port);
1520 uint16_t dst_port = rte_bswap16(thdr->dst_port);
1522 struct ipv4_hdr *ihdr = (struct ipv4_hdr *)
1523 RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
1524 uint8_t proto = ihdr->next_proto_id;
1526 if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
1527 /* only tracking TCP and UDP at this time */
1532 * Load the addresses and ports, and convert from Intel
1533 * to network byte order. Strictly speaking, it is not
1534 * necessary to do this conversion, as this data is only
1535 * used to create a hash key.
1537 uint32_t src_addr = rte_bswap32(ihdr->src_addr);
1538 uint32_t dst_addr = rte_bswap32(ihdr->dst_addr);
1540 if (CNXN_TRX_DEBUG > 2) {
1541 if (CNXN_TRX_DEBUG > 4)
1542 rte_ct_cnxn_print_pkt(pkt,
1545 /* need to create compacted table of pointers to pass
1549 compacting_map[packets_for_lookup] = pos;
1550 key_orig_dir[packets_for_lookup] =
1551 rte_ct_create_cnxn_hashkey(&src_addr, &dst_addr,
1555 [packets_for_lookup][0],
1557 packets_for_lookup++;
1560 case IPv6_HEADER_SIZE:
1561 for (; pkts_to_process;) {
1562 uint8_t pos = (uint8_t) __builtin_ctzll(
1564 /* bitmask representing only this packet */
1565 uint64_t pkt_mask = 1LLU << pos;
1566 /* remove this packet from remaining list */
1567 pkts_to_process &= ~pkt_mask;
1569 struct rte_mbuf *pkt = pkts[pos];
1572 void *ip_hdr = RTE_MBUF_METADATA_UINT32_PTR(pkt,
1575 /* TCP and UDP ports at same offset, just use TCP for
1576 * offset calculation
1578 struct tcp_hdr *thdr = (struct tcp_hdr *)
1579 RTE_MBUF_METADATA_UINT32_PTR(pkt,
1580 (IP_START + ip_hdr_size_bytes));
1581 uint16_t src_port = rte_bswap16(thdr->src_port);
1582 uint16_t dst_port = rte_bswap16(thdr->dst_port);
1584 struct ipv6_hdr *ihdr = (struct ipv6_hdr *)ip_hdr;
1585 uint8_t proto = ihdr->proto;
1587 if (!(proto == TCP_PROTOCOL || proto == UDP_PROTOCOL)) {
1588 /* only tracking TCP and UDP at this time */
1592 if (CNXN_TRX_DEBUG > 2) {
1593 if (CNXN_TRX_DEBUG > 4)
1594 rte_ct_cnxn_print_pkt(pkt,
1598 /* need to create compacted table of pointers to pass
1602 compacting_map[packets_for_lookup] = pos;
1603 key_orig_dir[packets_for_lookup] =
1604 rte_ct_create_cnxn_hashkey(
1605 (uint32_t *) ihdr->src_addr,
1606 (uint32_t *) ihdr->dst_addr,
1610 [packets_for_lookup][0],
1612 packets_for_lookup++;
1618 if (unlikely(packets_for_lookup == 0))
1619 return; /* no suitable packet for lookup */
1621 /* Clear all the data to make sure no stack garbage is in it */
1622 memset(&new_cnxn_data, 0, sizeof(struct rte_ct_cnxn_data));
1624 /* lookup all tcp & udp packets in the connection table */
1626 int lookup_result = rte_hash_lookup_bulk(ct->rhash,
1627 (const void **)&ct->hash_key_ptrs,
1628 packets_for_lookup, &positions[0]);
1630 if (unlikely(lookup_result < 0)) {
1631 /* TODO: change a log */
1632 printf("Unexpected hash table problem, discarding all packets");
1634 return; /* unknown error, just discard all packets */
1637 /* Pre-fetch hash table entries and counters to avoid LLC miss */
1638 rte_prefetch0(ct->counters);
1639 for (i = 0; i < packets_for_lookup; i++) {
1640 struct rte_ct_cnxn_data *entry = NULL;
1641 int hash_table_entry = positions[i];
1643 if (hash_table_entry >= 0) {
1644 /* Entry found for existing UDP/TCP connection */
1645 entry = &ct->hash_table_entries[hash_table_entry];
1646 rte_prefetch0(&entry->counters.packets_forwarded);
1647 rte_prefetch0(entry);
1648 rte_prefetch0(&entry->key_is_client_order);
1651 uint8_t pkt_index = compacting_map[i];
1652 uint32_t *key = ct->hash_key_ptrs[pkt_index];
1653 uint8_t protocol = *(key + 9);
1654 if (protocol == UDP_PROTOCOL) {
1655 /* Search in new connections only for UDP */
1656 entry = rte_ct_search_new_connections(ct, key);
1657 rte_prefetch0(&entry->counters.packets_forwarded);
1658 rte_prefetch0(entry);
1659 rte_prefetch0(&entry->key_is_client_order);
1662 cnxn_data_entry[i] = entry;
1665 for (i = 0; i < packets_for_lookup; i++) {
1666 /* index into hash table entries */
1667 int hash_table_entry = positions[i];
1668 /* index into packet table of this packet */
1669 uint8_t pkt_index = compacting_map[i];
1670 /* bitmask representing only this packet */
1671 uint64_t pkt_mask = 1LLU << pkt_index;
1672 uint8_t key_is_client_order = key_orig_dir[i];
1673 uint32_t *key = ct->hash_key_ptrs[pkt_index];
1674 uint8_t protocol = *(key + 9);
1675 struct rte_mbuf *packet = pkts[pkt_index];
1676 int no_new_cnxn = (pkt_mask & no_new_cnxn_mask) != 0;
1678 /* rte_ct_print_hashkey(key); */
1680 if (protocol == TCP_PROTOCOL) {
1681 enum rte_ct_packet_action tcp_pkt_action;
1683 tcp_pkt_action = rte_ct_handle_tcp_lookup(ct, packet,
1684 pkt_index, key_is_client_order,
1685 key, hash_table_entry, no_new_cnxn,
1688 switch (tcp_pkt_action) {
1690 case RTE_CT_SEND_CLIENT_SYNACK:
1691 case RTE_CT_SEND_SERVER_ACK:
1692 /* altered packet or copy must be returned
1695 *reply_pkt_mask |= pkt_mask;
1698 case RTE_CT_SEND_SERVER_SYN:
1699 case RTE_CT_FORWARD_PACKET:
1703 *hijack_mask |= pkt_mask;
1707 /* bad packet, clear mask to drop */
1708 *pkts_mask ^= pkt_mask;
1709 ct->counters->pkts_drop++;
1712 /* rte_ct_cnxn_print_pkt(pkts[pkt_index]); */
1714 } else { /* UDP entry */
1716 if (hash_table_entry >= 0) {
1718 * connection found for this packet. Check that
1719 * this is a valid packet for connection
1722 struct rte_ct_cnxn_data *entry =
1725 if (rte_ct_udp_packet
1726 (ct, entry, pkts[pkt_index],
1727 key_is_client_order)) {
1728 entry->counters.packets_forwarded++;
1729 ct->counters->pkts_forwarded++;
1733 * connection not found in bulk hash lookup,
1734 * but might have been added in this batch
1737 struct rte_ct_cnxn_data *recent_entry =
1740 if (recent_entry != NULL) {
1741 if (rte_ct_udp_packet(ct, recent_entry,
1743 key_is_client_order)) {
1744 recent_entry->counters.
1745 packets_forwarded++;
1746 ct->counters->pkts_forwarded++;
1749 /* no existing connection, try to add
1754 /* new cnxn not allowed, clear
1757 *pkts_mask ^= pkt_mask;
1758 ct->counters->pkts_drop++;
1760 pkts_drop_invalid_conn++;
1764 if (rte_ct_udp_new_connection(ct,
1765 &new_cnxn_data, pkts[pkt_index])) {
1766 /* This packet creates a
1770 rte_hash_add_key(ct->
1776 struct rte_ct_cnxn_data
1777 *new_hash_entry = &ct->
1778 hash_table_entries[position];
1781 *update fields in new_cnxn_data
1782 * not set by "new_connection"
1785 memcpy(new_cnxn_data.key, key,
1786 sizeof(new_cnxn_data.key));
1790 = key_is_client_order;
1791 new_cnxn_data.protocol =
1794 &new_cnxn_data.type,
1796 rte_memcpy(new_hash_entry,
1801 new_hash_entry->counters.
1802 packets_forwarded = 1;
1803 ct->counters->pkts_forwarded++;
1804 new_hash_entry->counters.
1805 packets_dropped = 0;
1806 ct->counters->pkts_drop = 0;
1808 current_active_sessions++;
1810 sessions_activated++;
1813 state_used_for_timer
1815 rte_ct_set_cnxn_timer_for_udp(
1818 RTE_CT_UDP_UNREPLIED);
1820 rte_ct_remember_new_connection(
1829 } /* packets_for_lookup */
1831 if (CNXN_TRX_DEBUG > 1) {
1832 printf("Exit cnxn tracker synproxy batch lookup with");
1833 printf(" packet mask %p\n", (void *)*pkts_mask);