These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / netfilter / nf_conntrack_standalone.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.
8  */
9
10 #include <linux/types.h>
11 #include <linux/netfilter.h>
12 #include <linux/slab.h>
13 #include <linux/module.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/percpu.h>
18 #include <linux/netdevice.h>
19 #include <linux/security.h>
20 #include <net/net_namespace.h>
21 #ifdef CONFIG_SYSCTL
22 #include <linux/sysctl.h>
23 #endif
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_l3proto.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_acct.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33 #include <net/netfilter/nf_conntrack_timestamp.h>
34 #include <linux/rculist_nulls.h>
35
36 MODULE_LICENSE("GPL");
37
38 #ifdef CONFIG_NF_CONNTRACK_PROCFS
39 void
40 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
41             const struct nf_conntrack_l3proto *l3proto,
42             const struct nf_conntrack_l4proto *l4proto)
43 {
44         l3proto->print_tuple(s, tuple);
45         l4proto->print_tuple(s, tuple);
46 }
47 EXPORT_SYMBOL_GPL(print_tuple);
48
49 struct ct_iter_state {
50         struct seq_net_private p;
51         unsigned int bucket;
52         u_int64_t time_now;
53 };
54
55 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
56 {
57         struct net *net = seq_file_net(seq);
58         struct ct_iter_state *st = seq->private;
59         struct hlist_nulls_node *n;
60
61         for (st->bucket = 0;
62              st->bucket < net->ct.htable_size;
63              st->bucket++) {
64                 n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
65                 if (!is_a_nulls(n))
66                         return n;
67         }
68         return NULL;
69 }
70
71 static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
72                                       struct hlist_nulls_node *head)
73 {
74         struct net *net = seq_file_net(seq);
75         struct ct_iter_state *st = seq->private;
76
77         head = rcu_dereference(hlist_nulls_next_rcu(head));
78         while (is_a_nulls(head)) {
79                 if (likely(get_nulls_value(head) == st->bucket)) {
80                         if (++st->bucket >= net->ct.htable_size)
81                                 return NULL;
82                 }
83                 head = rcu_dereference(
84                                 hlist_nulls_first_rcu(
85                                         &net->ct.hash[st->bucket]));
86         }
87         return head;
88 }
89
90 static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
91 {
92         struct hlist_nulls_node *head = ct_get_first(seq);
93
94         if (head)
95                 while (pos && (head = ct_get_next(seq, head)))
96                         pos--;
97         return pos ? NULL : head;
98 }
99
100 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
101         __acquires(RCU)
102 {
103         struct ct_iter_state *st = seq->private;
104
105         st->time_now = ktime_get_real_ns();
106         rcu_read_lock();
107         return ct_get_idx(seq, *pos);
108 }
109
110 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
111 {
112         (*pos)++;
113         return ct_get_next(s, v);
114 }
115
116 static void ct_seq_stop(struct seq_file *s, void *v)
117         __releases(RCU)
118 {
119         rcu_read_unlock();
120 }
121
122 #ifdef CONFIG_NF_CONNTRACK_SECMARK
123 static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
124 {
125         int ret;
126         u32 len;
127         char *secctx;
128
129         ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
130         if (ret)
131                 return;
132
133         seq_printf(s, "secctx=%s ", secctx);
134
135         security_release_secctx(secctx, len);
136 }
137 #else
138 static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
139 {
140 }
141 #endif
142
143 #ifdef CONFIG_NF_CONNTRACK_ZONES
144 static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
145                          int dir)
146 {
147         const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
148
149         if (zone->dir != dir)
150                 return;
151         switch (zone->dir) {
152         case NF_CT_DEFAULT_ZONE_DIR:
153                 seq_printf(s, "zone=%u ", zone->id);
154                 break;
155         case NF_CT_ZONE_DIR_ORIG:
156                 seq_printf(s, "zone-orig=%u ", zone->id);
157                 break;
158         case NF_CT_ZONE_DIR_REPL:
159                 seq_printf(s, "zone-reply=%u ", zone->id);
160                 break;
161         default:
162                 break;
163         }
164 }
165 #else
166 static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
167                                 int dir)
168 {
169 }
170 #endif
171
172 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
173 static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
174 {
175         struct ct_iter_state *st = s->private;
176         struct nf_conn_tstamp *tstamp;
177         s64 delta_time;
178
179         tstamp = nf_conn_tstamp_find(ct);
180         if (tstamp) {
181                 delta_time = st->time_now - tstamp->start;
182                 if (delta_time > 0)
183                         delta_time = div_s64(delta_time, NSEC_PER_SEC);
184                 else
185                         delta_time = 0;
186
187                 seq_printf(s, "delta-time=%llu ",
188                            (unsigned long long)delta_time);
189         }
190         return;
191 }
192 #else
193 static inline void
194 ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
195 {
196 }
197 #endif
198
199 /* return 0 on success, 1 in case of error */
200 static int ct_seq_show(struct seq_file *s, void *v)
201 {
202         struct nf_conntrack_tuple_hash *hash = v;
203         struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
204         const struct nf_conntrack_l3proto *l3proto;
205         const struct nf_conntrack_l4proto *l4proto;
206         int ret = 0;
207
208         NF_CT_ASSERT(ct);
209         if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
210                 return 0;
211
212         /* we only want to print DIR_ORIGINAL */
213         if (NF_CT_DIRECTION(hash))
214                 goto release;
215
216         l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
217         NF_CT_ASSERT(l3proto);
218         l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
219         NF_CT_ASSERT(l4proto);
220
221         ret = -ENOSPC;
222         seq_printf(s, "%-8s %u %-8s %u %ld ",
223                    l3proto->name, nf_ct_l3num(ct),
224                    l4proto->name, nf_ct_protonum(ct),
225                    timer_pending(&ct->timeout)
226                    ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
227
228         if (l4proto->print_conntrack)
229                 l4proto->print_conntrack(s, ct);
230
231         print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
232                     l3proto, l4proto);
233
234         ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
235
236         if (seq_has_overflowed(s))
237                 goto release;
238
239         if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
240                 goto release;
241
242         if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
243                 seq_printf(s, "[UNREPLIED] ");
244
245         print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
246                     l3proto, l4proto);
247
248         ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
249
250         if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
251                 goto release;
252
253         if (test_bit(IPS_ASSURED_BIT, &ct->status))
254                 seq_printf(s, "[ASSURED] ");
255
256         if (seq_has_overflowed(s))
257                 goto release;
258
259 #if defined(CONFIG_NF_CONNTRACK_MARK)
260         seq_printf(s, "mark=%u ", ct->mark);
261 #endif
262
263         ct_show_secctx(s, ct);
264         ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
265         ct_show_delta_time(s, ct);
266
267         seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
268
269         if (seq_has_overflowed(s))
270                 goto release;
271
272         ret = 0;
273 release:
274         nf_ct_put(ct);
275         return ret;
276 }
277
278 static const struct seq_operations ct_seq_ops = {
279         .start = ct_seq_start,
280         .next  = ct_seq_next,
281         .stop  = ct_seq_stop,
282         .show  = ct_seq_show
283 };
284
285 static int ct_open(struct inode *inode, struct file *file)
286 {
287         return seq_open_net(inode, file, &ct_seq_ops,
288                         sizeof(struct ct_iter_state));
289 }
290
291 static const struct file_operations ct_file_ops = {
292         .owner   = THIS_MODULE,
293         .open    = ct_open,
294         .read    = seq_read,
295         .llseek  = seq_lseek,
296         .release = seq_release_net,
297 };
298
299 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
300 {
301         struct net *net = seq_file_net(seq);
302         int cpu;
303
304         if (*pos == 0)
305                 return SEQ_START_TOKEN;
306
307         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
308                 if (!cpu_possible(cpu))
309                         continue;
310                 *pos = cpu + 1;
311                 return per_cpu_ptr(net->ct.stat, cpu);
312         }
313
314         return NULL;
315 }
316
317 static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
318 {
319         struct net *net = seq_file_net(seq);
320         int cpu;
321
322         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
323                 if (!cpu_possible(cpu))
324                         continue;
325                 *pos = cpu + 1;
326                 return per_cpu_ptr(net->ct.stat, cpu);
327         }
328
329         return NULL;
330 }
331
332 static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
333 {
334 }
335
336 static int ct_cpu_seq_show(struct seq_file *seq, void *v)
337 {
338         struct net *net = seq_file_net(seq);
339         unsigned int nr_conntracks = atomic_read(&net->ct.count);
340         const struct ip_conntrack_stat *st = v;
341
342         if (v == SEQ_START_TOKEN) {
343                 seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
344                 return 0;
345         }
346
347         seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
348                         "%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
349                    nr_conntracks,
350                    st->searched,
351                    st->found,
352                    st->new,
353                    st->invalid,
354                    st->ignore,
355                    st->delete,
356                    st->delete_list,
357                    st->insert,
358                    st->insert_failed,
359                    st->drop,
360                    st->early_drop,
361                    st->error,
362
363                    st->expect_new,
364                    st->expect_create,
365                    st->expect_delete,
366                    st->search_restart
367                 );
368         return 0;
369 }
370
371 static const struct seq_operations ct_cpu_seq_ops = {
372         .start  = ct_cpu_seq_start,
373         .next   = ct_cpu_seq_next,
374         .stop   = ct_cpu_seq_stop,
375         .show   = ct_cpu_seq_show,
376 };
377
378 static int ct_cpu_seq_open(struct inode *inode, struct file *file)
379 {
380         return seq_open_net(inode, file, &ct_cpu_seq_ops,
381                             sizeof(struct seq_net_private));
382 }
383
384 static const struct file_operations ct_cpu_seq_fops = {
385         .owner   = THIS_MODULE,
386         .open    = ct_cpu_seq_open,
387         .read    = seq_read,
388         .llseek  = seq_lseek,
389         .release = seq_release_net,
390 };
391
392 static int nf_conntrack_standalone_init_proc(struct net *net)
393 {
394         struct proc_dir_entry *pde;
395
396         pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
397         if (!pde)
398                 goto out_nf_conntrack;
399
400         pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
401                           &ct_cpu_seq_fops);
402         if (!pde)
403                 goto out_stat_nf_conntrack;
404         return 0;
405
406 out_stat_nf_conntrack:
407         remove_proc_entry("nf_conntrack", net->proc_net);
408 out_nf_conntrack:
409         return -ENOMEM;
410 }
411
412 static void nf_conntrack_standalone_fini_proc(struct net *net)
413 {
414         remove_proc_entry("nf_conntrack", net->proc_net_stat);
415         remove_proc_entry("nf_conntrack", net->proc_net);
416 }
417 #else
418 static int nf_conntrack_standalone_init_proc(struct net *net)
419 {
420         return 0;
421 }
422
423 static void nf_conntrack_standalone_fini_proc(struct net *net)
424 {
425 }
426 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
427
428 /* Sysctl support */
429
430 #ifdef CONFIG_SYSCTL
431 /* Log invalid packets of a given protocol */
432 static int log_invalid_proto_min = 0;
433 static int log_invalid_proto_max = 255;
434
435 static struct ctl_table_header *nf_ct_netfilter_header;
436
437 static struct ctl_table nf_ct_sysctl_table[] = {
438         {
439                 .procname       = "nf_conntrack_max",
440                 .data           = &nf_conntrack_max,
441                 .maxlen         = sizeof(int),
442                 .mode           = 0644,
443                 .proc_handler   = proc_dointvec,
444         },
445         {
446                 .procname       = "nf_conntrack_count",
447                 .data           = &init_net.ct.count,
448                 .maxlen         = sizeof(int),
449                 .mode           = 0444,
450                 .proc_handler   = proc_dointvec,
451         },
452         {
453                 .procname       = "nf_conntrack_buckets",
454                 .data           = &init_net.ct.htable_size,
455                 .maxlen         = sizeof(unsigned int),
456                 .mode           = 0444,
457                 .proc_handler   = proc_dointvec,
458         },
459         {
460                 .procname       = "nf_conntrack_checksum",
461                 .data           = &init_net.ct.sysctl_checksum,
462                 .maxlen         = sizeof(unsigned int),
463                 .mode           = 0644,
464                 .proc_handler   = proc_dointvec,
465         },
466         {
467                 .procname       = "nf_conntrack_log_invalid",
468                 .data           = &init_net.ct.sysctl_log_invalid,
469                 .maxlen         = sizeof(unsigned int),
470                 .mode           = 0644,
471                 .proc_handler   = proc_dointvec_minmax,
472                 .extra1         = &log_invalid_proto_min,
473                 .extra2         = &log_invalid_proto_max,
474         },
475         {
476                 .procname       = "nf_conntrack_expect_max",
477                 .data           = &nf_ct_expect_max,
478                 .maxlen         = sizeof(int),
479                 .mode           = 0644,
480                 .proc_handler   = proc_dointvec,
481         },
482         { }
483 };
484
485 #define NET_NF_CONNTRACK_MAX 2089
486
487 static struct ctl_table nf_ct_netfilter_table[] = {
488         {
489                 .procname       = "nf_conntrack_max",
490                 .data           = &nf_conntrack_max,
491                 .maxlen         = sizeof(int),
492                 .mode           = 0644,
493                 .proc_handler   = proc_dointvec,
494         },
495         { }
496 };
497
498 static int nf_conntrack_standalone_init_sysctl(struct net *net)
499 {
500         struct ctl_table *table;
501
502         table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
503                         GFP_KERNEL);
504         if (!table)
505                 goto out_kmemdup;
506
507         table[1].data = &net->ct.count;
508         table[2].data = &net->ct.htable_size;
509         table[3].data = &net->ct.sysctl_checksum;
510         table[4].data = &net->ct.sysctl_log_invalid;
511
512         /* Don't export sysctls to unprivileged users */
513         if (net->user_ns != &init_user_ns)
514                 table[0].procname = NULL;
515
516         net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
517         if (!net->ct.sysctl_header)
518                 goto out_unregister_netfilter;
519
520         return 0;
521
522 out_unregister_netfilter:
523         kfree(table);
524 out_kmemdup:
525         return -ENOMEM;
526 }
527
528 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
529 {
530         struct ctl_table *table;
531
532         table = net->ct.sysctl_header->ctl_table_arg;
533         unregister_net_sysctl_table(net->ct.sysctl_header);
534         kfree(table);
535 }
536 #else
537 static int nf_conntrack_standalone_init_sysctl(struct net *net)
538 {
539         return 0;
540 }
541
542 static void nf_conntrack_standalone_fini_sysctl(struct net *net)
543 {
544 }
545 #endif /* CONFIG_SYSCTL */
546
547 static int nf_conntrack_pernet_init(struct net *net)
548 {
549         int ret;
550
551         ret = nf_conntrack_init_net(net);
552         if (ret < 0)
553                 goto out_init;
554
555         ret = nf_conntrack_standalone_init_proc(net);
556         if (ret < 0)
557                 goto out_proc;
558
559         net->ct.sysctl_checksum = 1;
560         net->ct.sysctl_log_invalid = 0;
561         ret = nf_conntrack_standalone_init_sysctl(net);
562         if (ret < 0)
563                 goto out_sysctl;
564
565         return 0;
566
567 out_sysctl:
568         nf_conntrack_standalone_fini_proc(net);
569 out_proc:
570         nf_conntrack_cleanup_net(net);
571 out_init:
572         return ret;
573 }
574
575 static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
576 {
577         struct net *net;
578
579         list_for_each_entry(net, net_exit_list, exit_list) {
580                 nf_conntrack_standalone_fini_sysctl(net);
581                 nf_conntrack_standalone_fini_proc(net);
582         }
583         nf_conntrack_cleanup_net_list(net_exit_list);
584 }
585
586 static struct pernet_operations nf_conntrack_net_ops = {
587         .init           = nf_conntrack_pernet_init,
588         .exit_batch     = nf_conntrack_pernet_exit,
589 };
590
591 static int __init nf_conntrack_standalone_init(void)
592 {
593         int ret = nf_conntrack_init_start();
594         if (ret < 0)
595                 goto out_start;
596
597 #ifdef CONFIG_SYSCTL
598         nf_ct_netfilter_header =
599                 register_net_sysctl(&init_net, "net", nf_ct_netfilter_table);
600         if (!nf_ct_netfilter_header) {
601                 pr_err("nf_conntrack: can't register to sysctl.\n");
602                 ret = -ENOMEM;
603                 goto out_sysctl;
604         }
605 #endif
606
607         ret = register_pernet_subsys(&nf_conntrack_net_ops);
608         if (ret < 0)
609                 goto out_pernet;
610
611         nf_conntrack_init_end();
612         return 0;
613
614 out_pernet:
615 #ifdef CONFIG_SYSCTL
616         unregister_net_sysctl_table(nf_ct_netfilter_header);
617 out_sysctl:
618 #endif
619         nf_conntrack_cleanup_end();
620 out_start:
621         return ret;
622 }
623
624 static void __exit nf_conntrack_standalone_fini(void)
625 {
626         nf_conntrack_cleanup_start();
627         unregister_pernet_subsys(&nf_conntrack_net_ops);
628 #ifdef CONFIG_SYSCTL
629         unregister_net_sysctl_table(nf_ct_netfilter_header);
630 #endif
631         nf_conntrack_cleanup_end();
632 }
633
634 module_init(nf_conntrack_standalone_init);
635 module_exit(nf_conntrack_standalone_fini);
636
637 /* Some modules need us, but don't depend directly on any symbol.
638    They should call this. */
639 void need_conntrack(void)
640 {
641 }
642 EXPORT_SYMBOL_GPL(need_conntrack);