Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / net / netfilter / core.c
1 /* netfilter.c: look after the filters for various protocols.
2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3  *
4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5  * way.
6  *
7  * Rusty Russell (C)2000 -- This code is GPL.
8  * Patrick McHardy (c) 2006-2012
9  */
10 #include <linux/kernel.h>
11 #include <linux/netfilter.h>
12 #include <net/protocol.h>
13 #include <linux/init.h>
14 #include <linux/skbuff.h>
15 #include <linux/wait.h>
16 #include <linux/module.h>
17 #include <linux/interrupt.h>
18 #include <linux/if.h>
19 #include <linux/netdevice.h>
20 #include <linux/netfilter_ipv6.h>
21 #include <linux/inetdevice.h>
22 #include <linux/proc_fs.h>
23 #include <linux/mutex.h>
24 #include <linux/slab.h>
25 #include <linux/locallock.h>
26 #include <net/net_namespace.h>
27 #include <net/sock.h>
28
29 #include "nf_internals.h"
30
31 #ifdef CONFIG_PREEMPT_RT_BASE
32 DEFINE_LOCAL_IRQ_LOCK(xt_write_lock);
33 EXPORT_PER_CPU_SYMBOL(xt_write_lock);
34 #endif
35
36 static DEFINE_MUTEX(afinfo_mutex);
37
38 const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
39 EXPORT_SYMBOL(nf_afinfo);
40 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
41 EXPORT_SYMBOL_GPL(nf_ipv6_ops);
42
43 int nf_register_afinfo(const struct nf_afinfo *afinfo)
44 {
45         mutex_lock(&afinfo_mutex);
46         RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
47         mutex_unlock(&afinfo_mutex);
48         return 0;
49 }
50 EXPORT_SYMBOL_GPL(nf_register_afinfo);
51
52 void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
53 {
54         mutex_lock(&afinfo_mutex);
55         RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
56         mutex_unlock(&afinfo_mutex);
57         synchronize_rcu();
58 }
59 EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
60
61 struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
62 EXPORT_SYMBOL(nf_hooks);
63
64 #ifdef HAVE_JUMP_LABEL
65 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
66 EXPORT_SYMBOL(nf_hooks_needed);
67 #endif
68
69 static DEFINE_MUTEX(nf_hook_mutex);
70
71 int nf_register_hook(struct nf_hook_ops *reg)
72 {
73         struct nf_hook_ops *elem;
74
75         mutex_lock(&nf_hook_mutex);
76         list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
77                 if (reg->priority < elem->priority)
78                         break;
79         }
80         list_add_rcu(&reg->list, elem->list.prev);
81         mutex_unlock(&nf_hook_mutex);
82 #ifdef HAVE_JUMP_LABEL
83         static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
84 #endif
85         return 0;
86 }
87 EXPORT_SYMBOL(nf_register_hook);
88
89 void nf_unregister_hook(struct nf_hook_ops *reg)
90 {
91         mutex_lock(&nf_hook_mutex);
92         list_del_rcu(&reg->list);
93         mutex_unlock(&nf_hook_mutex);
94 #ifdef HAVE_JUMP_LABEL
95         static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
96 #endif
97         synchronize_net();
98 }
99 EXPORT_SYMBOL(nf_unregister_hook);
100
101 int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
102 {
103         unsigned int i;
104         int err = 0;
105
106         for (i = 0; i < n; i++) {
107                 err = nf_register_hook(&reg[i]);
108                 if (err)
109                         goto err;
110         }
111         return err;
112
113 err:
114         if (i > 0)
115                 nf_unregister_hooks(reg, i);
116         return err;
117 }
118 EXPORT_SYMBOL(nf_register_hooks);
119
120 void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
121 {
122         while (n-- > 0)
123                 nf_unregister_hook(&reg[n]);
124 }
125 EXPORT_SYMBOL(nf_unregister_hooks);
126
127 unsigned int nf_iterate(struct list_head *head,
128                         struct sk_buff *skb,
129                         struct nf_hook_state *state,
130                         struct nf_hook_ops **elemp)
131 {
132         unsigned int verdict;
133
134         /*
135          * The caller must not block between calls to this
136          * function because of risk of continuing from deleted element.
137          */
138         list_for_each_entry_continue_rcu((*elemp), head, list) {
139                 if (state->thresh > (*elemp)->priority)
140                         continue;
141
142                 /* Optimization: we don't need to hold module
143                    reference here, since function can't sleep. --RR */
144 repeat:
145                 verdict = (*elemp)->hook(*elemp, skb, state);
146                 if (verdict != NF_ACCEPT) {
147 #ifdef CONFIG_NETFILTER_DEBUG
148                         if (unlikely((verdict & NF_VERDICT_MASK)
149                                                         > NF_MAX_VERDICT)) {
150                                 NFDEBUG("Evil return from %p(%u).\n",
151                                         (*elemp)->hook, state->hook);
152                                 continue;
153                         }
154 #endif
155                         if (verdict != NF_REPEAT)
156                                 return verdict;
157                         goto repeat;
158                 }
159         }
160         return NF_ACCEPT;
161 }
162
163
164 /* Returns 1 if okfn() needs to be executed by the caller,
165  * -EPERM for NF_DROP, 0 otherwise. */
166 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
167 {
168         struct nf_hook_ops *elem;
169         unsigned int verdict;
170         int ret = 0;
171
172         /* We may already have this, but read-locks nest anyway */
173         rcu_read_lock();
174
175         elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
176                               struct nf_hook_ops, list);
177 next_hook:
178         verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
179                              &elem);
180         if (verdict == NF_ACCEPT || verdict == NF_STOP) {
181                 ret = 1;
182         } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
183                 kfree_skb(skb);
184                 ret = NF_DROP_GETERR(verdict);
185                 if (ret == 0)
186                         ret = -EPERM;
187         } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
188                 int err = nf_queue(skb, elem, state,
189                                    verdict >> NF_VERDICT_QBITS);
190                 if (err < 0) {
191                         if (err == -ECANCELED)
192                                 goto next_hook;
193                         if (err == -ESRCH &&
194                            (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
195                                 goto next_hook;
196                         kfree_skb(skb);
197                 }
198         }
199         rcu_read_unlock();
200         return ret;
201 }
202 EXPORT_SYMBOL(nf_hook_slow);
203
204
205 int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
206 {
207         if (writable_len > skb->len)
208                 return 0;
209
210         /* Not exclusive use of packet?  Must copy. */
211         if (!skb_cloned(skb)) {
212                 if (writable_len <= skb_headlen(skb))
213                         return 1;
214         } else if (skb_clone_writable(skb, writable_len))
215                 return 1;
216
217         if (writable_len <= skb_headlen(skb))
218                 writable_len = 0;
219         else
220                 writable_len -= skb_headlen(skb);
221
222         return !!__pskb_pull_tail(skb, writable_len);
223 }
224 EXPORT_SYMBOL(skb_make_writable);
225
226 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
227 /* This does not belong here, but locally generated errors need it if connection
228    tracking in use: without this, connection may not be in hash table, and hence
229    manufactured ICMP or RST packets will not be associated with it. */
230 void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
231                 __rcu __read_mostly;
232 EXPORT_SYMBOL(ip_ct_attach);
233
234 void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
235 {
236         void (*attach)(struct sk_buff *, const struct sk_buff *);
237
238         if (skb->nfct) {
239                 rcu_read_lock();
240                 attach = rcu_dereference(ip_ct_attach);
241                 if (attach)
242                         attach(new, skb);
243                 rcu_read_unlock();
244         }
245 }
246 EXPORT_SYMBOL(nf_ct_attach);
247
248 void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
249 EXPORT_SYMBOL(nf_ct_destroy);
250
251 void nf_conntrack_destroy(struct nf_conntrack *nfct)
252 {
253         void (*destroy)(struct nf_conntrack *);
254
255         rcu_read_lock();
256         destroy = rcu_dereference(nf_ct_destroy);
257         BUG_ON(destroy == NULL);
258         destroy(nfct);
259         rcu_read_unlock();
260 }
261 EXPORT_SYMBOL(nf_conntrack_destroy);
262
263 struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
264 EXPORT_SYMBOL_GPL(nfq_ct_hook);
265
266 struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook __read_mostly;
267 EXPORT_SYMBOL_GPL(nfq_ct_nat_hook);
268
269 #endif /* CONFIG_NF_CONNTRACK */
270
271 #ifdef CONFIG_NF_NAT_NEEDED
272 void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
273 EXPORT_SYMBOL(nf_nat_decode_session_hook);
274 #endif
275
276 static int __net_init netfilter_net_init(struct net *net)
277 {
278 #ifdef CONFIG_PROC_FS
279         net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
280                                                 net->proc_net);
281         if (!net->nf.proc_netfilter) {
282                 if (!net_eq(net, &init_net))
283                         pr_err("cannot create netfilter proc entry");
284
285                 return -ENOMEM;
286         }
287 #endif
288         return 0;
289 }
290
291 static void __net_exit netfilter_net_exit(struct net *net)
292 {
293         remove_proc_entry("netfilter", net->proc_net);
294 }
295
296 static struct pernet_operations netfilter_net_ops = {
297         .init = netfilter_net_init,
298         .exit = netfilter_net_exit,
299 };
300
301 int __init netfilter_init(void)
302 {
303         int i, h, ret;
304
305         for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
306                 for (h = 0; h < NF_MAX_HOOKS; h++)
307                         INIT_LIST_HEAD(&nf_hooks[i][h]);
308         }
309
310         ret = register_pernet_subsys(&netfilter_net_ops);
311         if (ret < 0)
312                 goto err;
313
314         ret = netfilter_log_init();
315         if (ret < 0)
316                 goto err_pernet;
317
318         return 0;
319 err_pernet:
320         unregister_pernet_subsys(&netfilter_net_ops);
321 err:
322         return ret;
323 }