2 // Copyright (c) 2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 #ifndef __INCLUDE_RTE_CT_TCP_H__
18 #define __INCLUDE_RTE_CT_TCP_H__
25 #include <rte_timer.h>
30 #include <rte_byteorder.h>
31 #include "rte_cnxn_tracking.h"
33 /* AN INNER, PRIVATE INTERFACE FOR RTE_CNXN_TRACKING */
35 /* constants for TCP options */
37 #define RTE_CT_TCPOPT_EOL 0 /* End of options */
38 #define RTE_CT_TCPOPT_NOP 1 /* Padding */
39 #define RTE_CT_TCPOPT_MSS 2 /* Segment size negotiating */
40 #define RTE_CT_TCPOPT_WINDOW 3 /* Window scaling */
41 #define RTE_CT_TCPOPT_SACK_PERM 4 /* SACK Permitted */
42 #define RTE_CT_TCPOPT_SACK 5 /* SACK Block */
43 #define RTE_CT_TCPOPT_TIMESTAMP 8 /* RTT estimations */
45 #define RTE_CT_TCPOLEN_MSS 4
46 #define RTE_CT_TCPOLEN_WINDOW 3
47 #define RTE_CT_TCPOLEN_SACK_PERM 2
48 #define RTE_CT_TCPOLEN_TIMESTAMP 10
49 #define RTE_CT_TCPOLEN_PER_SACK_ENTRY 8
51 #define RTE_CT_TCPOLEN_MSS_ALIGNED 4
52 #define RTE_CT_TCPOLEN_WINDOW_ALIGNED 4
53 #define RTE_CT_TCPOLEN_SACK_PERM_ALIGNED 4
54 #define RTE_CT_TCPOLEN_TIMESTAMP_ALIGNED 12
56 #define RTE_CT_MAX_TCP_WINDOW_SCALE 14
58 #define RTE_SP_OPTIONS_MSS 1
59 #define RTE_SP_OPTIONS_WINDOW_SCALE 2
60 #define RTE_SP_OPTIONS_TIMESTAMP 4
61 #define RTE_SP_OPTIONS_SACK_PERM 8
64 enum rte_ct_packet_action {
65 RTE_CT_OPEN_CONNECTION,
67 RTE_CT_FORWARD_PACKET,
68 RTE_CT_DESTROY_CNXN_AND_FORWARD_PACKET,
69 RTE_CT_REOPEN_CNXN_AND_FORWARD_PACKET,
70 RTE_CT_SEND_CLIENT_SYNACK,
71 RTE_CT_SEND_SERVER_SYN,
72 RTE_CT_SEND_SERVER_ACK,
76 enum rte_ct_connstatus {
84 static const char *const rte_ct_tcp_names[] = {
100 static const char *const rte_ct_udp_names[] = {
106 /* Fixme: what about big packets? */
107 #define RTE_MAX_ACKWIN_CONST 66000
109 /* Window scaling is advertised by the sender */
110 #define RTE_CT_TCP_FLAG_WINDOW_SCALE 0x01
112 /* SACK is permitted by the sender */
113 #define RTE_CT_TCP_FLAG_SACK_PERM 0x02
115 /* This sender sent FIN first */
116 #define RTE_CT_TCP_FLAG_CLOSE_INIT 0x04
118 /* Be liberal in window checking */
119 #define RTE_CT_TCP_FLAG_BE_LIBERAL 0x08
121 /* Has unacknowledged data */
122 #define RTE_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10
124 /* The field td_maxack has been set */
125 #define RTE_CT_TCP_FLAG_MAXACK_SET 0x20
126 /* Marks possibility for expected RFC5961 challenge ACK */
127 #define RTE_CT_EXP_CHALLENGE_ACK 0x40
131 /* TCP header flags of interest */
132 #define RTE_CT_TCPHDR_FIN 0x01
133 #define RTE_CT_TCPHDR_SYN 0x02
134 #define RTE_CT_TCPHDR_RST 0x04
135 #define RTE_CT_TCPHDR_ACK 0x10
137 #define RTE_CT_TCPHDR_RST_ACK (RTE_CT_TCPHDR_RST | RTE_CT_TCPHDR_ACK)
141 /* state machine values. Note that order is important as relative checks made */
142 enum rte_ct_tcp_states {
146 RTE_CT_TCP_ESTABLISHED,
148 RTE_CT_TCP_CLOSE_WAIT,
150 RTE_CT_TCP_TIME_WAIT,
152 RTE_CT_TCP_SYN_SENT_2,
158 enum rte_ct_udp_states {
160 RTE_CT_UDP_UNREPLIED,
167 #define RTE_CT_TCP_MAX RTE_CT_TCP_UNACK
169 enum rte_ct_pkt_direction {
174 struct rte_ct_tcp_state {
175 uint32_t end; /* max of seq + len */
176 uint32_t maxend; /* max of ack + max(win, 1) */
177 uint32_t maxwin; /* max(win) */
178 uint32_t maxack; /* max of ack */
179 uint8_t scale; /* window scale factor */
180 uint8_t flags; /* per direction options */
183 struct rte_synproxy_options {
185 uint8_t window_scale;
188 uint32_t ts_echo_reply;
189 uint16_t initial_window;
192 struct ct_sp_cnxn_data {
193 /* buffer client pkt while waiting on server setup,
194 * store in reverse order
196 struct rte_mbuf *buffered_pkt_list;
197 uint32_t original_spoofed_seq;
198 /* difference between spoofed and real seq from server */
200 struct rte_synproxy_options cnxn_options;
201 /* non-zero if this connection created using synproxy */
203 bool half_established;
204 /* non-zero after both half-connections established */
205 bool cnxn_established;
209 struct rte_ct_tcp_state seen[2]; /* connection parms per direction */
211 uint8_t last_dir; /* Direction of the last packet
212 * (TODO: enum ip_conntrack_dir)
214 uint8_t retrans; /* Number of retransmitted packets */
215 uint8_t last_index; /* Index of the last packet */
216 uint32_t last_seq; /* Last seq number seen in dir */
217 uint32_t last_ack; /* Last seq number seen opposite dir */
218 uint32_t last_end; /* Last seq + len */
219 uint16_t last_win; /* Last window seen in dir */
220 /* For SYN packets while we may be out-of-sync */
221 uint8_t last_wscale; /* Last window scaling factor seen */
222 uint8_t last_flags; /* Last flags set */
226 * rte_ct_cnxn_counters holds all the connection-specicif counters.
227 * TODO: Make available in public interface
230 struct rte_ct_cnxn_counters {
231 uint64_t packets_received;//Added for CT-NAT
232 uint64_t packets_forwarded;
233 uint64_t packets_dropped;
236 struct rte_ct_proto {
237 struct rte_ct_tcp tcp_ct_data; /* TCP specific data fields*/
238 struct ct_sp_cnxn_data synproxy_data;
243 * rte_ct_cnxn_data contains all the data for a TCP connection. This include
244 * state data as necessary for verifying the validity of TCP packets. In
245 * addition, it holds data necessary for implementing the TCP timers.
248 struct rte_ct_cnxn_data {
249 /* The timer will be kept as part of the cnxn_data. When it fires, the
250 * pointer to the timer can be cast as the pointer to the cnxn_data
252 struct rte_timer timer; /* !!!!! IMPORTANT: Keep as first field !!!!! */
254 struct rte_ct_cnxn_counters counters;
256 /* full key stored here to allow the timer to remove the connection */
257 /* TODO: Consider storing key signature as well to speed up deletions.*/
260 struct rte_ct_proto ct_protocol;
262 /* the 100 ms timing step that a packet was seen for connection */
263 uint64_t expected_timeout;
265 /* Abstract states also used for timer values, e.g. RTE_CT_TCP_UNACK*/
266 uint8_t state_used_for_timer;
268 /* used to compute the "direction" of the packet */
269 uint8_t key_is_client_order;
272 /* used to store the type of packet ipv4 or ipv6 */
275 // Bypass flag to indicate that ALG checking is no more needed;
276 uint8_t alg_bypass_flag;
277 // Can we use key_is_client_order for direction checking
278 uint8_t server_direction;
280 // PORT = 0, PASV = 1
281 uint8_t ftp_session_type;
282 uint32_t tcp_payload_size;
288 } __rte_cache_aligned;
291 #define RTE_CT_TCP_MAX_RETRANS 3
293 struct rte_ct_tcptimeout {
294 /* a table of timeouts for each state of TCP */
295 uint64_t tcp_timeouts[RTE_CT_TCP_MAX + 1];
299 struct rte_ct_misc_options {
300 uint8_t synproxy_enabled;
302 uint32_t tcp_be_liberal;
303 uint32_t tcp_max_retrans;
306 struct rte_ct_udptimeout {
307 uint64_t udp_timeouts[RTE_CT_UDP_MAX + 1];
310 struct rte_ct_timeout {
311 struct rte_ct_tcptimeout tcptimeout;
312 struct rte_ct_udptimeout udptimeout;
315 struct rte_ct_cnxn_tracker {
316 struct rte_hash *rhash;
319 * Data for bulk hash lookup. Use this memory as temporary space.
320 * Too big for stack (64*16 bytes)
322 uint32_t hash_keys[RTE_HASH_LOOKUP_BULK_MAX][10];
324 /* table of pointers to above, for bulk hash lookup */
325 void *hash_key_ptrs[RTE_HASH_LOOKUP_BULK_MAX];
327 uint32_t positions[RTE_HASH_LOOKUP_BULK_MAX];/*added for ALG*/
329 /* hash table and timer storage */
330 uint32_t num_cnxn_entries;
333 * pointer to data space used for hash table, "num_cnxn_entries" long.
334 * Memory allocated during initialization.
336 struct rte_ct_cnxn_data *hash_table_entries;
337 struct rte_CT_counter_block *counters;
340 uint64_t timing_cycles_per_timing_step;
341 uint64_t timing_100ms_steps;
342 uint64_t timing_100ms_steps_previous;
343 uint64_t timing_last_time;
344 struct rte_ct_timeout ct_timeout;
345 struct rte_ct_misc_options misc_options;
348 struct rte_ct_cnxn_data *new_connections[64];
349 struct rte_mbuf *buffered_pkt_list;
350 int latest_connection;
351 /* offset into mbuf where synnproxy can store a pointer */
352 uint16_t pointer_offset;
353 } __rte_cache_aligned;
356 * Returns a value stating if this is a valid TCP open connection attempt.
357 * If valid, updates cnxn with any data fields it need to save.
360 enum rte_ct_packet_action
361 rte_ct_tcp_new_connection(
362 struct rte_ct_cnxn_tracker *inst,
363 struct rte_ct_cnxn_data *cnxn,
364 struct rte_mbuf *pkt,
366 uint8_t ip_hdr_size);
369 * Returns a value stating if this is a valid TCP packet for the give connection.
370 * If valid, updates cnxn with any data fields it need to save.
373 enum rte_ct_packet_action
374 rte_ct_verify_tcp_packet(
375 struct rte_ct_cnxn_tracker *inst,
376 struct rte_ct_cnxn_data *cnxn,
377 struct rte_mbuf *pkt,
378 uint8_t key_was_flipped,
379 uint8_t ip_hdr_size);
382 * Returns a value stating if this is a valid UDP open connection attempt.
383 * If valid, updates cnxn with any data fields it need to save.
387 rte_ct_udp_new_connection(
388 struct rte_ct_cnxn_tracker *ct,
389 struct rte_ct_cnxn_data *cd,
390 struct rte_mbuf *pkt);
393 * Returns a value stating if this is a valid UDP packet for the give connection.
394 * If valid, updates cnxn with any data fields it need to save.
397 enum rte_ct_packet_action
399 struct rte_ct_cnxn_tracker *ct,
400 struct rte_ct_cnxn_data *cd,
401 struct rte_mbuf *pkt,
402 uint8_t key_was_flipped);
406 * For the given connection, set a timeout based on the given state. If the
407 * timer is already set, this call will reset the timer with a new value.
411 rte_ct_set_cnxn_timer_for_tcp(
412 struct rte_ct_cnxn_tracker *ct,
413 struct rte_ct_cnxn_data *cd,
417 rte_ct_set_cnxn_timer_for_udp(
418 struct rte_ct_cnxn_tracker *ct,
419 struct rte_ct_cnxn_data *cd,
422 /* Cancel timer associated with the connection. Safe to call if no timer set.*/
423 void rte_ct_cancel_cnxn_timer(struct rte_ct_cnxn_data *cd);
427 * SYNPROXY related routines. Detailed comments are available in
428 * rte_ct_synproxy.c where they are implemented.
432 /* these 3 routines convert a received packet to a different one */
435 rte_sp_cvt_to_spoofed_client_synack(struct rte_ct_cnxn_data *cd,
436 struct rte_mbuf *old_pkt);
439 rte_sp_cvt_to_spoofed_server_syn(struct rte_ct_cnxn_data *cd,
440 struct rte_mbuf *old_pkt);
443 rte_sp_cvt_to_spoofed_server_ack(struct rte_ct_cnxn_data *cd,
444 struct rte_mbuf *old_pkt);
446 /* These two routines adjust seq or ack numbers,
447 * as part of the proxy mechanism
451 rte_sp_adjust_client_ack_before_window_check(
452 struct rte_ct_cnxn_data *cd,
454 struct tcp_hdr *thdr,
455 enum rte_ct_pkt_direction dir);
458 rte_sp_adjust_server_seq_after_window_check(
459 struct rte_ct_cnxn_data *cd,
461 struct tcp_hdr *thdr,
462 enum rte_ct_pkt_direction dir);
466 /* parse tcp options and save in t_opts */
468 rte_sp_parse_options(struct rte_mbuf *pkt, struct rte_ct_cnxn_data *cd);
471 /* these two routines deal with packet buffering */
474 rte_ct_buffer_packet(
475 struct rte_ct_cnxn_tracker *ct,
476 struct rte_ct_cnxn_data *cd,
477 struct rte_mbuf *pkt);
480 rte_ct_release_buffered_packets(
481 struct rte_ct_cnxn_tracker *ct,
482 struct rte_ct_cnxn_data *cd);
484 #endif /* TCPCONNTRACK_H */