Merge "conntrack: timer reset in reply traffic causes performance impact"
[samplevnf.git] / common / VIL / conntrack / rte_ct_synproxy.c
1 /*
2 // Copyright (c) 2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <stdint.h>
20 #include <stddef.h>
21 #include <string.h>
22 #include <unistd.h>
23
24 #include <rte_common.h>
25 #include <rte_malloc.h>
26 #include <rte_ether.h>
27 #include <rte_ip.h>
28 #include <rte_udp.h>
29 #include <rte_icmp.h>
30 #include <rte_byteorder.h>
31 #include <rte_cycles.h>
32
33 #include "rte_ct_tcp.h"
34
35
36 /*
37  * OVERVIEW:
38  * This module will behave as a proxy between an initiator (external client)
39  * and listener (internal server).
40  * (1) Proxy receives SYN from initiator, replies with spoofed SYN-ACK message
41  *     No packet is sent to the lister at this time.
42  * (2) Proxy receives ACK from the initiator, so the connection request is
43  *     considred valid. Proxy sends a spoofed SYN message to the listener.
44  * (3) Proxy receives SYN-ACK message from listener. Proxy replies to listener
45  *     with a spoofed ACK message. The connection is considered established.
46  * (4) Traffic is exchanged between initiator and listener. Sequence and
47  *     ack numbers translated appropriately by proxy.
48  */
49
50 /*
51  * DETAILS, when SynProxy on:
52  * (1) receive initial SYN from client
53  *    call CT, all new connections assigned spoofed (random) SEQ number
54  *    packet re-purposed as SYN-ACK back to client with spoofed SEQ
55  *    -> change ethernet, IP, and TCP headers, put on appropriate output ring
56  * (2) receive ACK packet from client
57  *    connection request now considered valid
58  *    packet re-purposed as SYN to server, using SEQ from original SYN
59  *    -> change TCP header, put on output ring originally targetted
60  * (3) receive SYN-ACK packet from server
61  *    connection now ESTABLISHED
62  *    compute SEQ difference between spoofed SEQ and real server SEQ
63  *    packet re-purposed as ACK to server
64  *    -> change ethernet, IP, and TCP headers, put on appropriate output ring
65  * (4) all further packets flow normally, except SEQ and ACK numbers must be
66  *    modified by SEQ diff (SEQ in server->client direction, ACK and SACK in
67  *    client->server direction)
68  *
69  */
70
71 #define META_DATA_OFFSET 128
72 #define ETHERNET_START (META_DATA_OFFSET + RTE_PKTMBUF_HEADROOM)
73 #define ETH_HDR_SIZE 14
74 #define IP_START (ETHERNET_START + ETH_HDR_SIZE)
75 #define PROTOCOL_START (IP_START + 9)
76 #define IP_V4_HEADER_SIZE 20
77 #define IP_V6_HEADER_SIZE 40
78 #define TCP_START (IP_START + IP_V4_HEADER_SIZE)
79 #define TCP_MIN_HDR_SIZE 20
80
81 #define RTE_TCP_PROTO_ID 6
82 #define RTE_SP_DEFAULT_TTL 64
83
84 #define RTE_SYNPROXY_MAX_SPOOFED_PKTS 64
85
86 #define RTE_TCP_SYN 0x02
87 #define RTE_TCP_ACK 0x10
88 #define RTE_TCP_SYN_ACK (RTE_TCP_SYN | RTE_TCP_ACK)
89
90 #define RTE_SP_DEFAULT_WINDOW 29200
91 #define RTE_CT_DEBUG_SPOOFED_SEQ 0
92 #define RTE_DPDK_IS_16_4 0
93
94 #define IP_VERSION_4 4
95 #define IP_VERSION_6 6
96
97
98 /* default TCP options */
99 /* TODO: need to set in config file */
100
101 struct rte_synproxy_options default_ipv4_synproxy_options = {
102         .options = RTE_SP_OPTIONS_MSS |
103                         RTE_SP_OPTIONS_SACK_PERM |
104                         RTE_SP_OPTIONS_WINDOW_SCALE,
105         .mss = 1460,
106         .window_scale = 7,
107         .initial_window = RTE_SP_DEFAULT_WINDOW
108 };
109
110
111 struct rte_synproxy_options default_ipv6_synproxy_options = {
112         .options = RTE_SP_OPTIONS_MSS |
113                         RTE_SP_OPTIONS_SACK_PERM |
114                         RTE_SP_OPTIONS_WINDOW_SCALE,
115         .mss = 1440,
116         .window_scale = 7,
117         .initial_window = RTE_SP_DEFAULT_WINDOW
118 };
119
120 /* IP/TCP header print for debugging */
121 static __rte_unused void
122 rte_ct_synproxy_print_pkt_info(struct rte_mbuf *pkt)
123 {
124         struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)
125                 RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
126         __rte_unused struct tcp_hdr *thdr = (struct tcp_hdr *)
127                 RTE_MBUF_METADATA_UINT32_PTR(pkt, TCP_START);
128         uint32_t packet_length = rte_pktmbuf_pkt_len(pkt);
129
130         printf("\npacket length %u, ip length %u\n", packet_length,
131                 rte_bswap16(ihdr4->total_length));
132         rte_pktmbuf_dump(stdout, pkt, 80);
133 }
134
135 static inline void
136 rte_sp_incremental_tcp_chksum_update_32(
137         uint32_t num_before,    /* in Intel order, not network order */
138         uint32_t num_after,     /* in Intel order, not network order */
139
140         uint16_t *chksum)       /* network order, e.g. pointer into header */
141 {
142         uint32_t sum;
143
144         sum = ~rte_bswap16(*chksum) & 0xffff;
145         num_before = ~num_before;
146         sum += (num_before >> 16) + (num_before & 0xffff);
147         sum += (num_after >> 16) + (num_after & 0xffff);
148         sum = (sum >> 16) + (sum & 0xffff);
149         sum += (sum >> 16);
150         *chksum = rte_bswap16(~sum & 0xffff);
151 }
152
153
154
155 static inline uint32_t
156 rte_sp_get_random_seq_number(void)
157 {
158         return rte_get_tsc_cycles(); /* low 32 bits of timestamp*/
159 }
160
161
162 static int8_t rte_ct_ipversion(void *i_hdr)
163 {
164         uint8_t *ihdr = (uint8_t *)i_hdr;
165         int8_t hdr_chk = *ihdr;
166
167         hdr_chk = hdr_chk >> 4;
168         if (hdr_chk == IP_VERSION_4 || hdr_chk == IP_VERSION_6)
169                 return hdr_chk;
170         else
171                 return -1;
172 }
173
174 static inline void
175 rte_synproxy_adjust_pkt_length(struct rte_mbuf *pkt)
176 {
177         uint16_t pkt_length = 0;
178         int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(pkt);
179         void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START);
180
181         if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
182                 struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
183
184                 pkt_length = rte_bswap16(ihdr4->total_length) + ETH_HDR_SIZE;
185         } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
186                 struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
187
188                 pkt_length = rte_bswap16(ihdr6->payload_len) +
189                         IP_V6_HEADER_SIZE + ETH_HDR_SIZE;
190         }
191         uint16_t mbuf_pkt_length = rte_pktmbuf_pkt_len(pkt);
192
193         if (pkt_length == mbuf_pkt_length)
194                 return;
195
196         if (pkt_length < mbuf_pkt_length) {
197                 rte_pktmbuf_trim(pkt, mbuf_pkt_length - pkt_length);
198                 return;
199         }
200
201         /* pkt_length > mbuf_pkt_length */
202         rte_pktmbuf_append(pkt, pkt_length - mbuf_pkt_length);
203 }
204
205 static void
206 rte_synproxy_build_ipv4_header(
207         struct ipv4_hdr *hdr4,
208         uint32_t src_addr,
209         uint32_t dst_addr,
210         uint16_t tcp_length)
211 {
212         /* TODO: consider interface re-work, too many rte_bswapxx */
213         /* options are not supported, so header size is fixed */
214         hdr4->version_ihl = 0x45;
215         hdr4->type_of_service = 0;
216         hdr4->total_length = rte_bswap16(tcp_length + IP_V4_HEADER_SIZE);
217         hdr4->packet_id = 0;
218         /* set Don't fragment bit, Intel order */
219         hdr4->fragment_offset = 0x0040;
220         hdr4->time_to_live = RTE_SP_DEFAULT_TTL;
221         hdr4->next_proto_id = RTE_TCP_PROTO_ID;
222         /* checksum calculated later */
223         hdr4->src_addr = rte_bswap32(src_addr);
224         hdr4->dst_addr = rte_bswap32(dst_addr);
225 }
226
227
228 static void
229 rte_synproxy_build_ipv6_header(
230         struct ipv6_hdr *hdr6,
231         uint8_t *src_addr,
232         uint8_t *dst_addr,
233         uint16_t tcp_length)
234 {
235         /* TODO: consider interface re-work, too many rte_bswapxx */
236         /* options are not supported, so header size is fixed */
237         uint8_t temp_src[16];
238         uint8_t temp_dst[16];
239
240         hdr6->vtc_flow = 0x60;  /* Intel Order */
241         hdr6->payload_len = rte_bswap16(tcp_length);
242         hdr6->proto = RTE_TCP_PROTO_ID;
243         hdr6->hop_limits = RTE_SP_DEFAULT_TTL;
244         /* checksum calculated later */
245
246         /* must copy to temps to avoid overwriting */
247         rte_mov16(temp_src, src_addr);
248         rte_mov16(temp_dst, dst_addr);
249         rte_mov16(hdr6->src_addr, temp_src);
250         rte_mov16(hdr6->dst_addr, temp_dst);
251 }
252
253 /* add options specified in t_opts to TCP header in packet. */
254
255 static uint16_t
256 rte_sp_add_tcp_options(struct tcp_hdr *thdr,
257                 const struct rte_synproxy_options *t_opts)
258 {
259         uint32_t *options_ptr = (uint32_t *)(thdr + 1);
260         uint32_t *saved_ptr = options_ptr;
261         uint8_t options = t_opts->options;
262         uint32_t option_bytes;  /* options built in groups of 4 bytes */
263
264         if (options & RTE_SP_OPTIONS_MSS) {
265                 option_bytes = (RTE_CT_TCPOPT_MSS << 24) |
266                         (RTE_CT_TCPOLEN_MSS << 16) | t_opts->mss;
267                 *options_ptr++ = rte_bswap32(option_bytes);
268         }
269
270         if (options & RTE_SP_OPTIONS_TIMESTAMP) {
271                 /* if both timestamp and sack permitted options,
272                  * pack together
273                  */
274                 if (options & RTE_SP_OPTIONS_SACK_PERM)
275                         option_bytes = (RTE_CT_TCPOPT_SACK_PERM << 24) |
276                                         (RTE_CT_TCPOLEN_SACK_PERM << 16);
277                 else
278                         option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
279                                 (RTE_CT_TCPOPT_NOP << 16);
280
281                 option_bytes |= (RTE_CT_TCPOPT_TIMESTAMP << 8) |
282                         RTE_CT_TCPOLEN_TIMESTAMP;
283                 *options_ptr++ = rte_bswap32(option_bytes);
284                 *options_ptr++ = rte_bswap32(t_opts->ts_val);
285                 *options_ptr++ = rte_bswap32(t_opts->ts_echo_reply);
286         } else if (options & RTE_SP_OPTIONS_SACK_PERM) {
287                 option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
288                         (RTE_CT_TCPOPT_NOP << 16) |
289                         (RTE_CT_TCPOPT_SACK_PERM << 8) |
290                         RTE_CT_TCPOLEN_SACK_PERM;
291                 *options_ptr++ = rte_bswap32(option_bytes);
292         }
293
294         if (options & RTE_SP_OPTIONS_WINDOW_SCALE) {
295                 option_bytes = (RTE_CT_TCPOPT_NOP << 24) |
296                         (RTE_CT_TCPOPT_WINDOW << 16) |
297                         (RTE_CT_TCPOLEN_WINDOW << 8) |
298                         t_opts->window_scale;
299                 *options_ptr++ = rte_bswap32(option_bytes);
300         }
301
302         /* compute the data offset field, which is size of total
303          * TCP header in 32 bit words
304          */
305         /* TODO: diff from options ptr to thdr */
306         uint16_t data_offset_bytes = (uint16_t)RTE_PTR_DIFF(options_ptr,
307                         saved_ptr) + sizeof(struct tcp_hdr);
308         thdr->data_off = (data_offset_bytes >> 2) << 4;
309
310         return data_offset_bytes;
311 }
312
313 /* Build a TCP header.
314  * Note that the the tcp_hdr must be in the appropriate location
315  * in an mbuf
316  * TODO: consider interface re-work, too many rte_bswapxx
317  */
318 static inline uint16_t
319 rte_synproxy_build_tcp_header(
320         __rte_unused struct rte_mbuf *old_pkt,
321         struct tcp_hdr *t_hdr,
322         uint16_t src_port,
323         uint16_t dst_port,
324         uint32_t seq,
325         uint32_t ack,
326         uint8_t flags,
327         const struct rte_synproxy_options *t_opts,
328         uint8_t add_options)
329 {
330         t_hdr->src_port = rte_bswap16(src_port);
331         t_hdr->dst_port = rte_bswap16(dst_port);
332         t_hdr->sent_seq = rte_bswap32(seq);
333         t_hdr->recv_ack = rte_bswap32(ack);
334
335         t_hdr->tcp_flags = flags;
336         t_hdr->rx_win = t_opts->initial_window;
337         /* checksum calculated later */
338         t_hdr->tcp_urp = 0;
339
340         /* add tcp header options, if applicable */
341
342         uint16_t new_tcp_hdr_size = TCP_MIN_HDR_SIZE;
343
344         if (add_options)
345                 new_tcp_hdr_size = rte_sp_add_tcp_options(t_hdr, t_opts);
346         else
347                 t_hdr->data_off = (TCP_MIN_HDR_SIZE >> 2) << 4;
348
349         return new_tcp_hdr_size;
350 }
351
352 static void
353 rte_synproxy_compute_checksums(void *i_hdr, struct tcp_hdr *t_hdr)
354 {
355         /*
356          * calculate IP and TCP checksums. Note that both checksum
357          * routines requirehecksum fields to be set to zero,
358          * and the the checksum is in the correct
359          * byte order, so no rte_bswap16 is required.
360          */
361
362         /* TODO: look into h/w computation of checksums */
363
364         int8_t hdr_chk = rte_ct_ipversion(i_hdr);
365
366         t_hdr->cksum = 0;
367
368         if (hdr_chk == IP_VERSION_4) {
369                 struct ipv4_hdr *i4_hdr = (struct ipv4_hdr *)i_hdr;
370
371                 i4_hdr->hdr_checksum = 0;
372                 t_hdr->cksum = rte_ipv4_udptcp_cksum(i4_hdr, t_hdr);
373                 i4_hdr->hdr_checksum = rte_ipv4_cksum(i4_hdr);
374         } else if (hdr_chk == IP_VERSION_6) {
375                 struct ipv6_hdr *i6_hdr = (struct ipv6_hdr *)i_hdr;
376
377                 t_hdr->cksum = rte_ipv6_udptcp_cksum(i6_hdr, t_hdr);
378         }
379 }
380
381
382
383 /*
384  * Building new packet headers:
385  * For IPv4 and IPv6 headers, no options and no fragmentation are supported.
386  * Header size is fixed.
387  * TCP header will (likely) have options, so header size is not fixed.
388  * TCP header will be built first, and size used in IP packet size calculation.
389  */
390 void
391 rte_sp_cvt_to_spoofed_client_synack(struct rte_ct_cnxn_data *cd,
392                 struct rte_mbuf *old_pkt)
393 {
394         /* old packet is syn from client. Change to a (spoofed)
395          * SYN-ACK to send back
396          */
397
398         int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
399         void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
400         struct tcp_hdr *thdr = (struct tcp_hdr *)
401                 RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START +
402                                 ip_hdr_size_bytes);
403         uint16_t tcp_header_size;
404
405         /* get a spoofed sequence number and save in the connection data */
406         uint32_t new_seq = rte_sp_get_random_seq_number();
407
408         if (RTE_CT_DEBUG_SPOOFED_SEQ)
409                 new_seq = 10; /* something simple to aid debugging */
410
411         cd->ct_protocol.synproxy_data.original_spoofed_seq = new_seq;
412
413         /* build the TCP header, including reversing the port numbers. */
414         tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
415                         rte_bswap16(thdr->dst_port),
416                         rte_bswap16(thdr->src_port),
417                         new_seq, rte_bswap32(thdr->sent_seq) + 1,
418                         RTE_TCP_SYN_ACK,
419                         ip_hdr_size_bytes == IP_V4_HEADER_SIZE ?
420                         &default_ipv4_synproxy_options :
421                         &default_ipv6_synproxy_options, 1);
422
423         /* reverse the source and destination addresses in the IP hdr */
424         if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
425                 struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
426
427                 rte_synproxy_build_ipv4_header(ihdr4,
428                                 rte_bswap32(ihdr4->dst_addr),
429                                 rte_bswap32(ihdr4->src_addr), tcp_header_size);
430
431         } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
432                 struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
433
434                 rte_synproxy_build_ipv6_header(ihdr6,
435                                 (uint8_t *)ihdr6->dst_addr,
436                                 (uint8_t *)ihdr6->src_addr, tcp_header_size);
437         }
438         rte_synproxy_adjust_pkt_length(old_pkt);
439         /* compute checksums */
440         rte_synproxy_compute_checksums(iphdr, thdr);
441
442 }
443
444
445 void
446 rte_sp_cvt_to_spoofed_server_syn(struct rte_ct_cnxn_data *cd,
447                 struct rte_mbuf *old_pkt)
448 {
449         /* old packet is ACK from client. Change to (spoofed)
450          * SYN to send to server
451          */
452
453         int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
454         void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
455         struct tcp_hdr *thdr = (struct tcp_hdr *)
456                 RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START
457                                 + ip_hdr_size_bytes);
458         uint16_t tcp_header_size;
459
460         tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
461                         rte_bswap16(thdr->src_port),
462                         rte_bswap16(thdr->dst_port),
463                         rte_bswap32(thdr->sent_seq) - 1, 0,
464                         RTE_TCP_SYN,
465                         &cd->ct_protocol.synproxy_data.cnxn_options, 1);
466
467         if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
468                 struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
469
470                 rte_synproxy_build_ipv4_header(ihdr4,
471                                 rte_bswap32(ihdr4->src_addr),
472                                 rte_bswap32(ihdr4->dst_addr), tcp_header_size);
473         } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
474                 struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
475
476                 rte_synproxy_build_ipv6_header(ihdr6,
477                                 (uint8_t *)ihdr6->src_addr,
478                                 (uint8_t *)ihdr6->dst_addr, tcp_header_size);
479         }
480
481         rte_synproxy_adjust_pkt_length(old_pkt);
482         /* compute checksums */
483         rte_synproxy_compute_checksums(iphdr, thdr);
484
485 }
486
487 void
488 rte_sp_cvt_to_spoofed_server_ack(struct rte_ct_cnxn_data *cd,
489                 struct rte_mbuf *old_pkt)
490 {
491         /* old packet is SYN-ACK from server. Change to spoofed ACK and
492          * send back to server
493          */
494
495         int ip_hdr_size_bytes = rte_ct_get_IP_hdr_size(old_pkt);
496         void *iphdr = RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START);
497         struct tcp_hdr *thdr = (struct tcp_hdr *)
498                 RTE_MBUF_METADATA_UINT32_PTR(old_pkt, IP_START +
499                                 ip_hdr_size_bytes);
500
501         /* read real seq out of SYN-ACK from server, and save the delta from
502          * the spoofed one
503          */
504         uint32_t real_seq = rte_bswap32(thdr->sent_seq);
505         uint16_t tcp_header_size;
506
507         cd->ct_protocol.synproxy_data.seq_diff =
508                 real_seq - cd->ct_protocol.synproxy_data.original_spoofed_seq;
509
510         /* reverse the source and destination addresses */
511         tcp_header_size = rte_synproxy_build_tcp_header(old_pkt, thdr,
512                         rte_bswap16(thdr->dst_port),
513                         rte_bswap16(thdr->src_port),
514                         rte_bswap32(thdr->recv_ack),
515                         rte_bswap32(thdr->sent_seq) + 1, RTE_TCP_ACK,
516                         &cd->ct_protocol.synproxy_data.cnxn_options, 0);
517
518         /* reverse the source and destination addresses in the IP hdr */
519         if (ip_hdr_size_bytes == IP_V4_HEADER_SIZE) {
520                 struct ipv4_hdr *ihdr4 = (struct ipv4_hdr *)iphdr;
521
522                 rte_synproxy_build_ipv4_header(ihdr4,
523                                 rte_bswap32(ihdr4->dst_addr),
524                                 rte_bswap32(ihdr4->src_addr), tcp_header_size);
525
526         } else if (ip_hdr_size_bytes == IP_V6_HEADER_SIZE) {
527                 struct ipv6_hdr *ihdr6 = (struct ipv6_hdr *)iphdr;
528
529                 rte_synproxy_build_ipv6_header(ihdr6,
530                                 (uint8_t *)ihdr6->dst_addr,
531                                 (uint8_t *)ihdr6->src_addr, tcp_header_size);
532         }
533         rte_synproxy_adjust_pkt_length(old_pkt);
534         /* compute checksums */
535         rte_synproxy_compute_checksums(iphdr, thdr);
536 }
537
538 /*
539  * if running synproxy and both halves of the proxied connection has been
540  * established, need adjust the seq or ack value of the packet.
541  * The value is adjusted by the difference between the spoofed server
542  * initial sequence number and the real server sequence number.
543  * In the client -> server direction, the ack must be increased by the
544  * difference before the window check.
545  * In the server -> client direction, the seq must be decreased by the
546  * difference after the window check.
547  */
548
549
550 void
551 rte_sp_adjust_server_seq_after_window_check(
552         struct rte_ct_cnxn_data *cd,
553         __rte_unused void *i_hdr,
554         struct tcp_hdr *thdr,
555         enum rte_ct_pkt_direction dir)
556 {
557         uint32_t num_before, num_after;
558
559         if (!cd->ct_protocol.synproxy_data.cnxn_established)
560                 return;
561
562         if (dir == RTE_CT_DIR_ORIGINAL)
563                 return; /*wrong direction */
564
565
566         /* update appropriate number (seq or ack) in header */
567         num_before = rte_bswap32(thdr->sent_seq);
568         num_after = num_before - cd->ct_protocol.synproxy_data.seq_diff;
569         thdr->sent_seq = rte_bswap32(num_after);
570
571         rte_sp_incremental_tcp_chksum_update_32(num_before, num_after,
572                         &thdr->cksum);
573 }
574
575
576 static void
577 rte_sp_adjust_client_sack_entries(
578         struct tcp_hdr *thdr,
579         uint32_t diff)
580 {
581         uint32_t num_before, num_after;
582         uint32_t *sack_ptr;
583         uint8_t  sack_blk_size;
584         uint16_t dataoff_in_bytes = (thdr->data_off & 0xf0) >> 2;
585         uint16_t length = dataoff_in_bytes - sizeof(struct tcp_hdr);
586
587         if (!length)
588                 return;
589
590         uint8_t *options_ptr = (uint8_t *)(thdr + 1);
591
592         while (length > 0) {
593                 uint8_t opcode = *options_ptr;
594                 uint8_t opsize = options_ptr[1];
595                 int i;
596
597                 switch (opcode) {
598
599                 case RTE_CT_TCPOPT_EOL:
600                         return; /* end of options */
601
602                 case RTE_CT_TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
603                         length--;
604                         options_ptr++;
605                         continue;
606
607                 case RTE_CT_TCPOPT_SACK:
608                         /*
609                          * SACK (selective ACK) contains a block of 1 to 4
610                          * entries of 8 bytes each. Each entry is a pair of
611                          * 32 bit numbers. This block follows the usual 2
612                          * bytes for opcode and opsize. Thus, the entire SACK
613                          * option must be 10, 18, 26 or 34 bytes long.
614                          */
615
616                         sack_blk_size = opsize - 2;
617                         /* start of entries */
618                         sack_ptr = (uint32_t *)(options_ptr + 2);
619                         /* count of 32 bit elements */
620                         int num_acks = sack_blk_size >> 2;
621
622                         if (unlikely(sack_blk_size > 32 ||
623                                                 ((sack_blk_size & 0x3) != 0))) {
624                                 printf("Sack block parsing failure\n");
625                                 return;
626                         }
627
628                         for (i = 0; i < num_acks; i++) {
629                                 num_before = rte_bswap32(*sack_ptr);
630                                 num_after = num_before + diff;
631                                 *sack_ptr = rte_bswap32(num_after);
632                                 sack_ptr++;
633                                 rte_sp_incremental_tcp_chksum_update_32(
634                                                 num_before,
635                                                 num_after,
636                                                 &thdr->cksum);
637                         }
638
639                         return;
640                 default:
641                         break;
642                 }
643                 if ((opsize < 2) || (opsize > length)) {
644                         printf("ERROR!, opsize %i, length %i\n",
645                                 opsize, length);
646                         return;
647                 }
648
649                 options_ptr += opsize;
650                 length -= opsize;
651         }
652 }
653
654 void
655 rte_sp_adjust_client_ack_before_window_check(
656         struct rte_ct_cnxn_data *cd,
657          __rte_unused void *i_hdr,
658         struct tcp_hdr *thdr,
659         enum rte_ct_pkt_direction dir)
660 {
661         uint32_t num_before, num_after;
662
663         if (!cd->ct_protocol.synproxy_data.cnxn_established)
664                 return;
665
666         if (dir != RTE_CT_DIR_ORIGINAL)
667                 return; /*wrong direction */
668
669
670         /* first update appropriate number (seq or ack) in header */
671         num_before = rte_bswap32(thdr->recv_ack);
672         num_after = num_before + cd->ct_protocol.synproxy_data.seq_diff;
673         thdr->recv_ack = rte_bswap32(num_after);
674         rte_sp_incremental_tcp_chksum_update_32(num_before,
675                         num_after, &thdr->cksum);
676
677         /* update SACK entries in header if any */
678
679         if (1) { /* TODO: check if sack permitted before calling */
680                 rte_sp_adjust_client_sack_entries(thdr,
681                                 cd->ct_protocol.synproxy_data.seq_diff);
682                 /* note that tcp hdr checksum adjusted in above sack
683                  * entries routine call
684                  */
685         }
686 }
687
688
689
690
691 /* parse the tcp header options, if any, and save interesting ones */
692 static void
693 rte_sp_parse_tcp_options(
694         uint8_t *options_ptr,
695         uint16_t length,
696         struct rte_synproxy_options *t_opts)
697 {
698         int opsize;
699
700         t_opts->options = 0;
701
702         while (length > 0) {
703                 uint8_t opcode = *options_ptr++;
704
705                 if (opcode == RTE_CT_TCPOPT_EOL)
706                         return;
707
708                 if (opcode == RTE_CT_TCPOPT_NOP) {
709                         length--;
710                         continue; /* skip adjustments at loop bottom */
711                 }
712
713                 opsize = *options_ptr++;
714
715                 if (unlikely(opsize < 2 || opsize > length)) {
716                         /* TODO: Change printf to log */
717                         printf("parsing error, opsize: %i, length: %i\n",
718                                 opsize, length);
719                         return;
720                 }
721
722                 switch (opcode) {
723
724                 case RTE_CT_TCPOPT_MSS:
725                         if (opsize == RTE_CT_TCPOLEN_MSS) {
726                                 uint16_t *mss_ptr = (uint16_t *)options_ptr;
727
728                                 t_opts->mss = rte_bswap16(*mss_ptr);
729                                 t_opts->options |= RTE_SP_OPTIONS_MSS;
730                         }
731                         break;
732
733                 case RTE_CT_TCPOPT_WINDOW:
734                         if (opsize == RTE_CT_TCPOLEN_WINDOW) {
735                                 t_opts->window_scale = RTE_MIN(*options_ptr,
736                                                 RTE_CT_MAX_TCP_WINDOW_SCALE);
737                                 t_opts->options |= RTE_SP_OPTIONS_WINDOW_SCALE;
738                         }
739                         break;
740
741                 case RTE_CT_TCPOPT_TIMESTAMP:
742                         if (opsize == RTE_CT_TCPOLEN_TIMESTAMP) {
743                                 uint32_t *ts_val_ptr = (uint32_t *)options_ptr;
744                                 uint32_t *ts_ecr_ptr =
745                                         (uint32_t *)(options_ptr + 4);
746                                 t_opts->ts_val = rte_bswap32(*ts_val_ptr);
747                                 t_opts->ts_echo_reply =
748                                         rte_bswap32(*ts_ecr_ptr);
749                                 t_opts->options |= RTE_SP_OPTIONS_TIMESTAMP;
750                         }
751                         break;
752
753                 case RTE_CT_TCPOPT_SACK_PERM:
754                         if (opsize == RTE_CT_TCPOLEN_SACK_PERM)
755                                 t_opts->options |= RTE_SP_OPTIONS_SACK_PERM;
756                         break;
757
758                 default:
759                         break;
760                 }
761
762                 options_ptr += opsize - 2;
763                 length -= opsize;
764
765         }
766 }
767
768 /* parse the tcp header options, if any, and save interesting ones in t_opts */
769 void
770 rte_sp_parse_options(struct rte_mbuf *pkt, struct rte_ct_cnxn_data *cd)
771 {
772         /*uint16_t ip_hdr_length = rte_sp_get_ip_header_size(pkt);
773          * skip over IPv4 or IPv6 header
774          */
775         int ip_hdr_length = rte_ct_get_IP_hdr_size(pkt);
776         struct tcp_hdr *thdr = (struct tcp_hdr *)
777                 RTE_MBUF_METADATA_UINT32_PTR(pkt, IP_START + ip_hdr_length);
778         uint8_t *opt_ptr = RTE_MBUF_METADATA_UINT8_PTR(pkt,
779                         (IP_START + ip_hdr_length + sizeof(struct tcp_hdr)));
780
781         struct rte_synproxy_options *t_opts =
782                 &cd->ct_protocol.synproxy_data.cnxn_options;
783         int length_in_bytes =
784                 ((thdr->data_off & 0xf0) >> 2) - sizeof(struct tcp_hdr);
785
786         rte_sp_parse_tcp_options(opt_ptr, length_in_bytes, t_opts);
787         t_opts->initial_window = thdr->rx_win;
788 }
789
790
791
792
793 struct rte_mbuf *
794 rte_ct_get_buffered_synproxy_packets(
795         struct rte_ct_cnxn_tracker *ct)
796 {
797         struct rte_mbuf *trkr_list = ct->buffered_pkt_list;
798
799         ct->buffered_pkt_list = NULL;
800         return trkr_list;
801 }
802
803
804
805 void rte_ct_enable_synproxy(struct rte_ct_cnxn_tracker *ct)
806 {
807         ct->misc_options.synproxy_enabled = 1;
808         printf("rte_ct_enable_synproxy = %d\n",
809                         ct->misc_options.synproxy_enabled);
810 }
811
812 void rte_ct_disable_synproxy(struct rte_ct_cnxn_tracker *ct)
813 {
814         ct->misc_options.synproxy_enabled = 0;
815         //printf("rte_ct_disable_synproxy = %d\n",
816         //              ct->misc_options.synproxy_enabled);
817 }
818
819 void
820 rte_ct_buffer_packet(
821         struct rte_ct_cnxn_tracker *ct,
822         struct rte_ct_cnxn_data *cd,
823         struct rte_mbuf *pkt)
824 {
825         /*
826          * Add packet to list of buffered packets for the connection.
827          * List is built in reverse of order received by adding to front.
828          * List will later be reversed to maintain order of arrival.
829          */
830
831         struct rte_mbuf **next = (struct rte_mbuf **)
832                 RTE_MBUF_METADATA_UINT64_PTR(pkt,
833                                 ct->pointer_offset);
834         *next = cd->ct_protocol.synproxy_data.buffered_pkt_list;
835         cd->ct_protocol.synproxy_data.buffered_pkt_list = pkt;
836 }
837
838 void
839 rte_ct_release_buffered_packets(
840         struct rte_ct_cnxn_tracker *ct,
841         struct rte_ct_cnxn_data *cd)
842 {
843         struct rte_mbuf *cnxn_list =
844                 cd->ct_protocol.synproxy_data.buffered_pkt_list;
845
846         if (cnxn_list == NULL)
847                 return;
848
849         cd->ct_protocol.synproxy_data.buffered_pkt_list = NULL;
850
851         struct rte_mbuf *trkr_list = ct->buffered_pkt_list;
852
853         if (trkr_list == NULL)
854                 return;
855         /*
856          * walk the cnxn_list, and add to front of trkr_list, reversing order
857          * and thus restoring orginal order. Order between different
858          * connections is irrelevant.
859          */
860         while (cnxn_list != NULL) {
861                 struct rte_mbuf *old_next;
862
863                 struct rte_mbuf **next = (struct rte_mbuf **)
864                         RTE_MBUF_METADATA_UINT64_PTR(cnxn_list,
865                                         ct->pointer_offset);
866
867                 old_next = *next;       /* save next cd packet */
868                 *next = trkr_list;/* make this cd packet point to ct list */
869                 trkr_list = cnxn_list;/* make the cd packet head of ct list */
870                 cnxn_list = old_next;   /* advance along cd list */
871         }
872         ct->buffered_pkt_list = trkr_list;
873 }