Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / net / ipv6 / netfilter / ip6t_SYNPROXY.c
1 /*
2  * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  */
8
9 #include <linux/module.h>
10 #include <linux/skbuff.h>
11 #include <net/ip6_checksum.h>
12 #include <net/ip6_route.h>
13 #include <net/tcp.h>
14
15 #include <linux/netfilter_ipv6/ip6_tables.h>
16 #include <linux/netfilter/x_tables.h>
17 #include <linux/netfilter/xt_SYNPROXY.h>
18 #include <net/netfilter/nf_conntrack.h>
19 #include <net/netfilter/nf_conntrack_seqadj.h>
20 #include <net/netfilter/nf_conntrack_synproxy.h>
21
22 static struct ipv6hdr *
23 synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
24                                        const struct in6_addr *daddr)
25 {
26         struct ipv6hdr *iph;
27
28         skb_reset_network_header(skb);
29         iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
30         ip6_flow_hdr(iph, 0, 0);
31         iph->hop_limit  = 64;   //XXX
32         iph->nexthdr    = IPPROTO_TCP;
33         iph->saddr      = *saddr;
34         iph->daddr      = *daddr;
35
36         return iph;
37 }
38
39 static void
40 synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
41                   struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
42                   struct ipv6hdr *niph, struct tcphdr *nth,
43                   unsigned int tcp_hdr_size)
44 {
45         struct net *net = nf_ct_net((struct nf_conn *)nfct);
46         struct dst_entry *dst;
47         struct flowi6 fl6;
48
49         nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
50         nskb->ip_summed   = CHECKSUM_PARTIAL;
51         nskb->csum_start  = (unsigned char *)nth - nskb->head;
52         nskb->csum_offset = offsetof(struct tcphdr, check);
53
54         memset(&fl6, 0, sizeof(fl6));
55         fl6.flowi6_proto = IPPROTO_TCP;
56         fl6.saddr = niph->saddr;
57         fl6.daddr = niph->daddr;
58         fl6.fl6_sport = nth->source;
59         fl6.fl6_dport = nth->dest;
60         security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
61         dst = ip6_route_output(net, NULL, &fl6);
62         if (dst == NULL || dst->error) {
63                 dst_release(dst);
64                 goto free_nskb;
65         }
66         dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
67         if (IS_ERR(dst))
68                 goto free_nskb;
69
70         skb_dst_set(nskb, dst);
71
72         if (nfct) {
73                 nskb->nfct = nfct;
74                 nskb->nfctinfo = ctinfo;
75                 nf_conntrack_get(nfct);
76         }
77
78         ip6_local_out(nskb);
79         return;
80
81 free_nskb:
82         kfree_skb(nskb);
83 }
84
85 static void
86 synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
87                             const struct synproxy_options *opts)
88 {
89         struct sk_buff *nskb;
90         struct ipv6hdr *iph, *niph;
91         struct tcphdr *nth;
92         unsigned int tcp_hdr_size;
93         u16 mss = opts->mss;
94
95         iph = ipv6_hdr(skb);
96
97         tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
98         nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
99                          GFP_ATOMIC);
100         if (nskb == NULL)
101                 return;
102         skb_reserve(nskb, MAX_TCP_HEADER);
103
104         niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
105
106         skb_reset_transport_header(nskb);
107         nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
108         nth->source     = th->dest;
109         nth->dest       = th->source;
110         nth->seq        = htonl(__cookie_v6_init_sequence(iph, th, &mss));
111         nth->ack_seq    = htonl(ntohl(th->seq) + 1);
112         tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
113         if (opts->options & XT_SYNPROXY_OPT_ECN)
114                 tcp_flag_word(nth) |= TCP_FLAG_ECE;
115         nth->doff       = tcp_hdr_size / 4;
116         nth->window     = 0;
117         nth->check      = 0;
118         nth->urg_ptr    = 0;
119
120         synproxy_build_options(nth, opts);
121
122         synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
123                           niph, nth, tcp_hdr_size);
124 }
125
126 static void
127 synproxy_send_server_syn(const struct synproxy_net *snet,
128                          const struct sk_buff *skb, const struct tcphdr *th,
129                          const struct synproxy_options *opts, u32 recv_seq)
130 {
131         struct sk_buff *nskb;
132         struct ipv6hdr *iph, *niph;
133         struct tcphdr *nth;
134         unsigned int tcp_hdr_size;
135
136         iph = ipv6_hdr(skb);
137
138         tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
139         nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
140                          GFP_ATOMIC);
141         if (nskb == NULL)
142                 return;
143         skb_reserve(nskb, MAX_TCP_HEADER);
144
145         niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
146
147         skb_reset_transport_header(nskb);
148         nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
149         nth->source     = th->source;
150         nth->dest       = th->dest;
151         nth->seq        = htonl(recv_seq - 1);
152         /* ack_seq is used to relay our ISN to the synproxy hook to initialize
153          * sequence number translation once a connection tracking entry exists.
154          */
155         nth->ack_seq    = htonl(ntohl(th->ack_seq) - 1);
156         tcp_flag_word(nth) = TCP_FLAG_SYN;
157         if (opts->options & XT_SYNPROXY_OPT_ECN)
158                 tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
159         nth->doff       = tcp_hdr_size / 4;
160         nth->window     = th->window;
161         nth->check      = 0;
162         nth->urg_ptr    = 0;
163
164         synproxy_build_options(nth, opts);
165
166         synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
167                           niph, nth, tcp_hdr_size);
168 }
169
170 static void
171 synproxy_send_server_ack(const struct synproxy_net *snet,
172                          const struct ip_ct_tcp *state,
173                          const struct sk_buff *skb, const struct tcphdr *th,
174                          const struct synproxy_options *opts)
175 {
176         struct sk_buff *nskb;
177         struct ipv6hdr *iph, *niph;
178         struct tcphdr *nth;
179         unsigned int tcp_hdr_size;
180
181         iph = ipv6_hdr(skb);
182
183         tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
184         nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
185                          GFP_ATOMIC);
186         if (nskb == NULL)
187                 return;
188         skb_reserve(nskb, MAX_TCP_HEADER);
189
190         niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
191
192         skb_reset_transport_header(nskb);
193         nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
194         nth->source     = th->dest;
195         nth->dest       = th->source;
196         nth->seq        = htonl(ntohl(th->ack_seq));
197         nth->ack_seq    = htonl(ntohl(th->seq) + 1);
198         tcp_flag_word(nth) = TCP_FLAG_ACK;
199         nth->doff       = tcp_hdr_size / 4;
200         nth->window     = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
201         nth->check      = 0;
202         nth->urg_ptr    = 0;
203
204         synproxy_build_options(nth, opts);
205
206         synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
207 }
208
209 static void
210 synproxy_send_client_ack(const struct synproxy_net *snet,
211                          const struct sk_buff *skb, const struct tcphdr *th,
212                          const struct synproxy_options *opts)
213 {
214         struct sk_buff *nskb;
215         struct ipv6hdr *iph, *niph;
216         struct tcphdr *nth;
217         unsigned int tcp_hdr_size;
218
219         iph = ipv6_hdr(skb);
220
221         tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
222         nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
223                          GFP_ATOMIC);
224         if (nskb == NULL)
225                 return;
226         skb_reserve(nskb, MAX_TCP_HEADER);
227
228         niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
229
230         skb_reset_transport_header(nskb);
231         nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
232         nth->source     = th->source;
233         nth->dest       = th->dest;
234         nth->seq        = htonl(ntohl(th->seq) + 1);
235         nth->ack_seq    = th->ack_seq;
236         tcp_flag_word(nth) = TCP_FLAG_ACK;
237         nth->doff       = tcp_hdr_size / 4;
238         nth->window     = ntohs(htons(th->window) >> opts->wscale);
239         nth->check      = 0;
240         nth->urg_ptr    = 0;
241
242         synproxy_build_options(nth, opts);
243
244         synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
245 }
246
247 static bool
248 synproxy_recv_client_ack(const struct synproxy_net *snet,
249                          const struct sk_buff *skb, const struct tcphdr *th,
250                          struct synproxy_options *opts, u32 recv_seq)
251 {
252         int mss;
253
254         mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
255         if (mss == 0) {
256                 this_cpu_inc(snet->stats->cookie_invalid);
257                 return false;
258         }
259
260         this_cpu_inc(snet->stats->cookie_valid);
261         opts->mss = mss;
262         opts->options |= XT_SYNPROXY_OPT_MSS;
263
264         if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
265                 synproxy_check_timestamp_cookie(opts);
266
267         synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
268         return true;
269 }
270
271 static unsigned int
272 synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
273 {
274         const struct xt_synproxy_info *info = par->targinfo;
275         struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
276         struct synproxy_options opts = {};
277         struct tcphdr *th, _th;
278
279         if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
280                 return NF_DROP;
281
282         th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
283         if (th == NULL)
284                 return NF_DROP;
285
286         if (!synproxy_parse_options(skb, par->thoff, th, &opts))
287                 return NF_DROP;
288
289         if (th->syn && !(th->ack || th->fin || th->rst)) {
290                 /* Initial SYN from client */
291                 this_cpu_inc(snet->stats->syn_received);
292
293                 if (th->ece && th->cwr)
294                         opts.options |= XT_SYNPROXY_OPT_ECN;
295
296                 opts.options &= info->options;
297                 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
298                         synproxy_init_timestamp_cookie(info, &opts);
299                 else
300                         opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
301                                           XT_SYNPROXY_OPT_SACK_PERM |
302                                           XT_SYNPROXY_OPT_ECN);
303
304                 synproxy_send_client_synack(skb, th, &opts);
305                 return NF_DROP;
306
307         } else if (th->ack && !(th->fin || th->rst || th->syn)) {
308                 /* ACK from client */
309                 synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
310                 return NF_DROP;
311         }
312
313         return XT_CONTINUE;
314 }
315
316 static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
317                                        struct sk_buff *skb,
318                                        const struct nf_hook_state *nhs)
319 {
320         struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
321         enum ip_conntrack_info ctinfo;
322         struct nf_conn *ct;
323         struct nf_conn_synproxy *synproxy;
324         struct synproxy_options opts = {};
325         const struct ip_ct_tcp *state;
326         struct tcphdr *th, _th;
327         __be16 frag_off;
328         u8 nexthdr;
329         int thoff;
330
331         ct = nf_ct_get(skb, &ctinfo);
332         if (ct == NULL)
333                 return NF_ACCEPT;
334
335         synproxy = nfct_synproxy(ct);
336         if (synproxy == NULL)
337                 return NF_ACCEPT;
338
339         if (nf_is_loopback_packet(skb))
340                 return NF_ACCEPT;
341
342         nexthdr = ipv6_hdr(skb)->nexthdr;
343         thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
344                                  &frag_off);
345         if (thoff < 0)
346                 return NF_ACCEPT;
347
348         th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
349         if (th == NULL)
350                 return NF_DROP;
351
352         state = &ct->proto.tcp;
353         switch (state->state) {
354         case TCP_CONNTRACK_CLOSE:
355                 if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
356                         nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
357                                                       ntohl(th->seq) + 1);
358                         break;
359                 }
360
361                 if (!th->syn || th->ack ||
362                     CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
363                         break;
364
365                 /* Reopened connection - reset the sequence number and timestamp
366                  * adjustments, they will get initialized once the connection is
367                  * reestablished.
368                  */
369                 nf_ct_seqadj_init(ct, ctinfo, 0);
370                 synproxy->tsoff = 0;
371                 this_cpu_inc(snet->stats->conn_reopened);
372
373                 /* fall through */
374         case TCP_CONNTRACK_SYN_SENT:
375                 if (!synproxy_parse_options(skb, thoff, th, &opts))
376                         return NF_DROP;
377
378                 if (!th->syn && th->ack &&
379                     CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
380                         /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
381                          * therefore we need to add 1 to make the SYN sequence
382                          * number match the one of first SYN.
383                          */
384                         if (synproxy_recv_client_ack(snet, skb, th, &opts,
385                                                      ntohl(th->seq) + 1))
386                                 this_cpu_inc(snet->stats->cookie_retrans);
387
388                         return NF_DROP;
389                 }
390
391                 synproxy->isn = ntohl(th->ack_seq);
392                 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
393                         synproxy->its = opts.tsecr;
394                 break;
395         case TCP_CONNTRACK_SYN_RECV:
396                 if (!th->syn || !th->ack)
397                         break;
398
399                 if (!synproxy_parse_options(skb, thoff, th, &opts))
400                         return NF_DROP;
401
402                 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
403                         synproxy->tsoff = opts.tsval - synproxy->its;
404
405                 opts.options &= ~(XT_SYNPROXY_OPT_MSS |
406                                   XT_SYNPROXY_OPT_WSCALE |
407                                   XT_SYNPROXY_OPT_SACK_PERM);
408
409                 swap(opts.tsval, opts.tsecr);
410                 synproxy_send_server_ack(snet, state, skb, th, &opts);
411
412                 nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
413
414                 swap(opts.tsval, opts.tsecr);
415                 synproxy_send_client_ack(snet, skb, th, &opts);
416
417                 consume_skb(skb);
418                 return NF_STOLEN;
419         default:
420                 break;
421         }
422
423         synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
424         return NF_ACCEPT;
425 }
426
427 static int synproxy_tg6_check(const struct xt_tgchk_param *par)
428 {
429         const struct ip6t_entry *e = par->entryinfo;
430
431         if (!(e->ipv6.flags & IP6T_F_PROTO) ||
432             e->ipv6.proto != IPPROTO_TCP ||
433             e->ipv6.invflags & XT_INV_PROTO)
434                 return -EINVAL;
435
436         return nf_ct_l3proto_try_module_get(par->family);
437 }
438
439 static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
440 {
441         nf_ct_l3proto_module_put(par->family);
442 }
443
444 static struct xt_target synproxy_tg6_reg __read_mostly = {
445         .name           = "SYNPROXY",
446         .family         = NFPROTO_IPV6,
447         .hooks          = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
448         .target         = synproxy_tg6,
449         .targetsize     = sizeof(struct xt_synproxy_info),
450         .checkentry     = synproxy_tg6_check,
451         .destroy        = synproxy_tg6_destroy,
452         .me             = THIS_MODULE,
453 };
454
455 static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
456         {
457                 .hook           = ipv6_synproxy_hook,
458                 .owner          = THIS_MODULE,
459                 .pf             = NFPROTO_IPV6,
460                 .hooknum        = NF_INET_LOCAL_IN,
461                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
462         },
463         {
464                 .hook           = ipv6_synproxy_hook,
465                 .owner          = THIS_MODULE,
466                 .pf             = NFPROTO_IPV6,
467                 .hooknum        = NF_INET_POST_ROUTING,
468                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
469         },
470 };
471
472 static int __init synproxy_tg6_init(void)
473 {
474         int err;
475
476         err = nf_register_hooks(ipv6_synproxy_ops,
477                                 ARRAY_SIZE(ipv6_synproxy_ops));
478         if (err < 0)
479                 goto err1;
480
481         err = xt_register_target(&synproxy_tg6_reg);
482         if (err < 0)
483                 goto err2;
484
485         return 0;
486
487 err2:
488         nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
489 err1:
490         return err;
491 }
492
493 static void __exit synproxy_tg6_exit(void)
494 {
495         xt_unregister_target(&synproxy_tg6_reg);
496         nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
497 }
498
499 module_init(synproxy_tg6_init);
500 module_exit(synproxy_tg6_exit);
501
502 MODULE_LICENSE("GPL");
503 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");