These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / ipv4 / netfilter / ipt_SYNPROXY.c
1 /*
2  * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/module.h>
10 #include <linux/skbuff.h>
11 #include <net/tcp.h>
12
13 #include <linux/netfilter_ipv4/ip_tables.h>
14 #include <linux/netfilter/x_tables.h>
15 #include <linux/netfilter/xt_SYNPROXY.h>
16 #include <net/netfilter/nf_conntrack.h>
17 #include <net/netfilter/nf_conntrack_seqadj.h>
18 #include <net/netfilter/nf_conntrack_synproxy.h>
19
20 static struct iphdr *
21 synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
22 {
23         struct iphdr *iph;
24
25         skb_reset_network_header(skb);
26         iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
27         iph->version    = 4;
28         iph->ihl        = sizeof(*iph) / 4;
29         iph->tos        = 0;
30         iph->id         = 0;
31         iph->frag_off   = htons(IP_DF);
32         iph->ttl        = sysctl_ip_default_ttl;
33         iph->protocol   = IPPROTO_TCP;
34         iph->check      = 0;
35         iph->saddr      = saddr;
36         iph->daddr      = daddr;
37
38         return iph;
39 }
40
41 static void
42 synproxy_send_tcp(const struct synproxy_net *snet,
43                   const struct sk_buff *skb, struct sk_buff *nskb,
44                   struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
45                   struct iphdr *niph, struct tcphdr *nth,
46                   unsigned int tcp_hdr_size)
47 {
48         struct net *net = nf_ct_net(snet->tmpl);
49
50         nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
51         nskb->ip_summed   = CHECKSUM_PARTIAL;
52         nskb->csum_start  = (unsigned char *)nth - nskb->head;
53         nskb->csum_offset = offsetof(struct tcphdr, check);
54
55         skb_dst_set_noref(nskb, skb_dst(skb));
56         nskb->protocol = htons(ETH_P_IP);
57         if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
58                 goto free_nskb;
59
60         if (nfct) {
61                 nskb->nfct = nfct;
62                 nskb->nfctinfo = ctinfo;
63                 nf_conntrack_get(nfct);
64         }
65
66         ip_local_out(net, nskb->sk, nskb);
67         return;
68
69 free_nskb:
70         kfree_skb(nskb);
71 }
72
73 static void
74 synproxy_send_client_synack(const struct synproxy_net *snet,
75                             const struct sk_buff *skb, const struct tcphdr *th,
76                             const struct synproxy_options *opts)
77 {
78         struct sk_buff *nskb;
79         struct iphdr *iph, *niph;
80         struct tcphdr *nth;
81         unsigned int tcp_hdr_size;
82         u16 mss = opts->mss;
83
84         iph = ip_hdr(skb);
85
86         tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
87         nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
88                          GFP_ATOMIC);
89         if (nskb == NULL)
90                 return;
91         skb_reserve(nskb, MAX_TCP_HEADER);
92
93         niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
94
95         skb_reset_transport_header(nskb);
96         nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
97         nth->source     = th->dest;
98         nth->dest       = th->source;
99         nth->seq        = htonl(__cookie_v4_init_sequence(iph, th, &mss));
100         nth->ack_seq    = htonl(ntohl(th->seq) + 1);
101         tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
102         if (opts->options & XT_SYNPROXY_OPT_ECN)
103                 tcp_flag_word(nth) |= TCP_FLAG_ECE;
104         nth->doff       = tcp_hdr_size / 4;
105         nth->window     = 0;
106         nth->check      = 0;
107         nth->urg_ptr    = 0;
108
109         synproxy_build_options(nth, opts);
110
111         synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
112                           niph, nth, tcp_hdr_size);
113 }
114
115 static void
116 synproxy_send_server_syn(const struct synproxy_net *snet,
117                          const struct sk_buff *skb, const struct tcphdr *th,
118                          const struct synproxy_options *opts, u32 recv_seq)
119 {
120         struct sk_buff *nskb;
121         struct iphdr *iph, *niph;
122         struct tcphdr *nth;
123         unsigned int tcp_hdr_size;
124
125         iph = ip_hdr(skb);
126
127         tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
128         nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
129                          GFP_ATOMIC);
130         if (nskb == NULL)
131                 return;
132         skb_reserve(nskb, MAX_TCP_HEADER);
133
134         niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
135
136         skb_reset_transport_header(nskb);
137         nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
138         nth->source     = th->source;
139         nth->dest       = th->dest;
140         nth->seq        = htonl(recv_seq - 1);
141         /* ack_seq is used to relay our ISN to the synproxy hook to initialize
142          * sequence number translation once a connection tracking entry exists.
143          */
144         nth->ack_seq    = htonl(ntohl(th->ack_seq) - 1);
145         tcp_flag_word(nth) = TCP_FLAG_SYN;
146         if (opts->options & XT_SYNPROXY_OPT_ECN)
147                 tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
148         nth->doff       = tcp_hdr_size / 4;
149         nth->window     = th->window;
150         nth->check      = 0;
151         nth->urg_ptr    = 0;
152
153         synproxy_build_options(nth, opts);
154
155         synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
156                           niph, nth, tcp_hdr_size);
157 }
158
159 static void
160 synproxy_send_server_ack(const struct synproxy_net *snet,
161                          const struct ip_ct_tcp *state,
162                          const struct sk_buff *skb, const struct tcphdr *th,
163                          const struct synproxy_options *opts)
164 {
165         struct sk_buff *nskb;
166         struct iphdr *iph, *niph;
167         struct tcphdr *nth;
168         unsigned int tcp_hdr_size;
169
170         iph = ip_hdr(skb);
171
172         tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
173         nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
174                          GFP_ATOMIC);
175         if (nskb == NULL)
176                 return;
177         skb_reserve(nskb, MAX_TCP_HEADER);
178
179         niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
180
181         skb_reset_transport_header(nskb);
182         nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
183         nth->source     = th->dest;
184         nth->dest       = th->source;
185         nth->seq        = htonl(ntohl(th->ack_seq));
186         nth->ack_seq    = htonl(ntohl(th->seq) + 1);
187         tcp_flag_word(nth) = TCP_FLAG_ACK;
188         nth->doff       = tcp_hdr_size / 4;
189         nth->window     = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
190         nth->check      = 0;
191         nth->urg_ptr    = 0;
192
193         synproxy_build_options(nth, opts);
194
195         synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
196 }
197
198 static void
199 synproxy_send_client_ack(const struct synproxy_net *snet,
200                          const struct sk_buff *skb, const struct tcphdr *th,
201                          const struct synproxy_options *opts)
202 {
203         struct sk_buff *nskb;
204         struct iphdr *iph, *niph;
205         struct tcphdr *nth;
206         unsigned int tcp_hdr_size;
207
208         iph = ip_hdr(skb);
209
210         tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
211         nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
212                          GFP_ATOMIC);
213         if (nskb == NULL)
214                 return;
215         skb_reserve(nskb, MAX_TCP_HEADER);
216
217         niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
218
219         skb_reset_transport_header(nskb);
220         nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
221         nth->source     = th->source;
222         nth->dest       = th->dest;
223         nth->seq        = htonl(ntohl(th->seq) + 1);
224         nth->ack_seq    = th->ack_seq;
225         tcp_flag_word(nth) = TCP_FLAG_ACK;
226         nth->doff       = tcp_hdr_size / 4;
227         nth->window     = htons(ntohs(th->window) >> opts->wscale);
228         nth->check      = 0;
229         nth->urg_ptr    = 0;
230
231         synproxy_build_options(nth, opts);
232
233         synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
234                           niph, nth, tcp_hdr_size);
235 }
236
237 static bool
238 synproxy_recv_client_ack(const struct synproxy_net *snet,
239                          const struct sk_buff *skb, const struct tcphdr *th,
240                          struct synproxy_options *opts, u32 recv_seq)
241 {
242         int mss;
243
244         mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
245         if (mss == 0) {
246                 this_cpu_inc(snet->stats->cookie_invalid);
247                 return false;
248         }
249
250         this_cpu_inc(snet->stats->cookie_valid);
251         opts->mss = mss;
252         opts->options |= XT_SYNPROXY_OPT_MSS;
253
254         if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
255                 synproxy_check_timestamp_cookie(opts);
256
257         synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
258         return true;
259 }
260
261 static unsigned int
262 synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
263 {
264         const struct xt_synproxy_info *info = par->targinfo;
265         struct synproxy_net *snet = synproxy_pernet(par->net);
266         struct synproxy_options opts = {};
267         struct tcphdr *th, _th;
268
269         if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
270                 return NF_DROP;
271
272         th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
273         if (th == NULL)
274                 return NF_DROP;
275
276         if (!synproxy_parse_options(skb, par->thoff, th, &opts))
277                 return NF_DROP;
278
279         if (th->syn && !(th->ack || th->fin || th->rst)) {
280                 /* Initial SYN from client */
281                 this_cpu_inc(snet->stats->syn_received);
282
283                 if (th->ece && th->cwr)
284                         opts.options |= XT_SYNPROXY_OPT_ECN;
285
286                 opts.options &= info->options;
287                 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
288                         synproxy_init_timestamp_cookie(info, &opts);
289                 else
290                         opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
291                                           XT_SYNPROXY_OPT_SACK_PERM |
292                                           XT_SYNPROXY_OPT_ECN);
293
294                 synproxy_send_client_synack(snet, skb, th, &opts);
295                 return NF_DROP;
296
297         } else if (th->ack && !(th->fin || th->rst || th->syn)) {
298                 /* ACK from client */
299                 synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
300                 return NF_DROP;
301         }
302
303         return XT_CONTINUE;
304 }
305
306 static unsigned int ipv4_synproxy_hook(void *priv,
307                                        struct sk_buff *skb,
308                                        const struct nf_hook_state *nhs)
309 {
310         struct synproxy_net *snet = synproxy_pernet(nhs->net);
311         enum ip_conntrack_info ctinfo;
312         struct nf_conn *ct;
313         struct nf_conn_synproxy *synproxy;
314         struct synproxy_options opts = {};
315         const struct ip_ct_tcp *state;
316         struct tcphdr *th, _th;
317         unsigned int thoff;
318
319         ct = nf_ct_get(skb, &ctinfo);
320         if (ct == NULL)
321                 return NF_ACCEPT;
322
323         synproxy = nfct_synproxy(ct);
324         if (synproxy == NULL)
325                 return NF_ACCEPT;
326
327         if (nf_is_loopback_packet(skb))
328                 return NF_ACCEPT;
329
330         thoff = ip_hdrlen(skb);
331         th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
332         if (th == NULL)
333                 return NF_DROP;
334
335         state = &ct->proto.tcp;
336         switch (state->state) {
337         case TCP_CONNTRACK_CLOSE:
338                 if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
339                         nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
340                                                       ntohl(th->seq) + 1);
341                         break;
342                 }
343
344                 if (!th->syn || th->ack ||
345                     CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
346                         break;
347
348                 /* Reopened connection - reset the sequence number and timestamp
349                  * adjustments, they will get initialized once the connection is
350                  * reestablished.
351                  */
352                 nf_ct_seqadj_init(ct, ctinfo, 0);
353                 synproxy->tsoff = 0;
354                 this_cpu_inc(snet->stats->conn_reopened);
355
356                 /* fall through */
357         case TCP_CONNTRACK_SYN_SENT:
358                 if (!synproxy_parse_options(skb, thoff, th, &opts))
359                         return NF_DROP;
360
361                 if (!th->syn && th->ack &&
362                     CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
363                         /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
364                          * therefore we need to add 1 to make the SYN sequence
365                          * number match the one of first SYN.
366                          */
367                         if (synproxy_recv_client_ack(snet, skb, th, &opts,
368                                                      ntohl(th->seq) + 1))
369                                 this_cpu_inc(snet->stats->cookie_retrans);
370
371                         return NF_DROP;
372                 }
373
374                 synproxy->isn = ntohl(th->ack_seq);
375                 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
376                         synproxy->its = opts.tsecr;
377                 break;
378         case TCP_CONNTRACK_SYN_RECV:
379                 if (!th->syn || !th->ack)
380                         break;
381
382                 if (!synproxy_parse_options(skb, thoff, th, &opts))
383                         return NF_DROP;
384
385                 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
386                         synproxy->tsoff = opts.tsval - synproxy->its;
387
388                 opts.options &= ~(XT_SYNPROXY_OPT_MSS |
389                                   XT_SYNPROXY_OPT_WSCALE |
390                                   XT_SYNPROXY_OPT_SACK_PERM);
391
392                 swap(opts.tsval, opts.tsecr);
393                 synproxy_send_server_ack(snet, state, skb, th, &opts);
394
395                 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
396
397                 swap(opts.tsval, opts.tsecr);
398                 synproxy_send_client_ack(snet, skb, th, &opts);
399
400                 consume_skb(skb);
401                 return NF_STOLEN;
402         default:
403                 break;
404         }
405
406         synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
407         return NF_ACCEPT;
408 }
409
410 static int synproxy_tg4_check(const struct xt_tgchk_param *par)
411 {
412         const struct ipt_entry *e = par->entryinfo;
413
414         if (e->ip.proto != IPPROTO_TCP ||
415             e->ip.invflags & XT_INV_PROTO)
416                 return -EINVAL;
417
418         return nf_ct_l3proto_try_module_get(par->family);
419 }
420
421 static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
422 {
423         nf_ct_l3proto_module_put(par->family);
424 }
425
426 static struct xt_target synproxy_tg4_reg __read_mostly = {
427         .name           = "SYNPROXY",
428         .family         = NFPROTO_IPV4,
429         .hooks          = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
430         .target         = synproxy_tg4,
431         .targetsize     = sizeof(struct xt_synproxy_info),
432         .checkentry     = synproxy_tg4_check,
433         .destroy        = synproxy_tg4_destroy,
434         .me             = THIS_MODULE,
435 };
436
437 static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
438         {
439                 .hook           = ipv4_synproxy_hook,
440                 .pf             = NFPROTO_IPV4,
441                 .hooknum        = NF_INET_LOCAL_IN,
442                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
443         },
444         {
445                 .hook           = ipv4_synproxy_hook,
446                 .pf             = NFPROTO_IPV4,
447                 .hooknum        = NF_INET_POST_ROUTING,
448                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
449         },
450 };
451
452 static int __init synproxy_tg4_init(void)
453 {
454         int err;
455
456         err = nf_register_hooks(ipv4_synproxy_ops,
457                                 ARRAY_SIZE(ipv4_synproxy_ops));
458         if (err < 0)
459                 goto err1;
460
461         err = xt_register_target(&synproxy_tg4_reg);
462         if (err < 0)
463                 goto err2;
464
465         return 0;
466
467 err2:
468         nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
469 err1:
470         return err;
471 }
472
473 static void __exit synproxy_tg4_exit(void)
474 {
475         xt_unregister_target(&synproxy_tg4_reg);
476         nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
477 }
478
479 module_init(synproxy_tg4_init);
480 module_exit(synproxy_tg4_exit);
481
482 MODULE_LICENSE("GPL");
483 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");