Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         u32 hash = inet_addr_hash(net, addr);
142         struct net_device *result = NULL;
143         struct in_ifaddr *ifa;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147                 if (ifa->ifa_local == addr) {
148                         struct net_device *dev = ifa->ifa_dev->dev;
149
150                         if (!net_eq(dev_net(dev), net))
151                                 continue;
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188         return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205         if (ifa->ifa_dev)
206                 in_dev_put(ifa->ifa_dev);
207         kfree(ifa);
208 }
209
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217         struct net_device *dev = idev->dev;
218
219         WARN_ON(idev->ifa_list);
220         WARN_ON(idev->mc_list);
221         kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225         dev_put(dev);
226         if (!idev->dead)
227                 pr_err("Freeing alive in_device %p\n", idev);
228         else
229                 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235         struct in_device *in_dev;
236         int err = -ENOMEM;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         in_dev_hold(in_dev);
256
257         err = devinet_sysctl_register(in_dev);
258         if (err) {
259                 in_dev->dead = 1;
260                 in_dev_put(in_dev);
261                 in_dev = NULL;
262                 goto out;
263         }
264         ip_mc_init_dev(in_dev);
265         if (dev->flags & IFF_UP)
266                 ip_mc_up(in_dev);
267
268         /* we can receive as soon as ip_ptr is set -- do this last */
269         rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271         return in_dev ?: ERR_PTR(err);
272 out_kfree:
273         kfree(in_dev);
274         in_dev = NULL;
275         goto out;
276 }
277
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280         struct in_device *idev = container_of(head, struct in_device, rcu_head);
281         in_dev_put(idev);
282 }
283
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286         struct in_ifaddr *ifa;
287         struct net_device *dev;
288
289         ASSERT_RTNL();
290
291         dev = in_dev->dev;
292
293         in_dev->dead = 1;
294
295         ip_mc_destroy_dev(in_dev);
296
297         while ((ifa = in_dev->ifa_list) != NULL) {
298                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299                 inet_free_ifa(ifa);
300         }
301
302         RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304         devinet_sysctl_unregister(in_dev);
305         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306         arp_ifdown(dev);
307
308         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313         rcu_read_lock();
314         for_primary_ifa(in_dev) {
315                 if (inet_ifa_match(a, ifa)) {
316                         if (!b || inet_ifa_match(b, ifa)) {
317                                 rcu_read_unlock();
318                                 return 1;
319                         }
320                 }
321         } endfor_ifa(in_dev);
322         rcu_read_unlock();
323         return 0;
324 }
325
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327                          int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329         struct in_ifaddr *promote = NULL;
330         struct in_ifaddr *ifa, *ifa1 = *ifap;
331         struct in_ifaddr *last_prim = in_dev->ifa_list;
332         struct in_ifaddr *prev_prom = NULL;
333         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335         ASSERT_RTNL();
336
337         /* 1. Deleting primary ifaddr forces deletion all secondaries
338          * unless alias promotion is set
339          **/
340
341         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343
344                 while ((ifa = *ifap1) != NULL) {
345                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346                             ifa1->ifa_scope <= ifa->ifa_scope)
347                                 last_prim = ifa;
348
349                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350                             ifa1->ifa_mask != ifa->ifa_mask ||
351                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
352                                 ifap1 = &ifa->ifa_next;
353                                 prev_prom = ifa;
354                                 continue;
355                         }
356
357                         if (!do_promote) {
358                                 inet_hash_remove(ifa);
359                                 *ifap1 = ifa->ifa_next;
360
361                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362                                 blocking_notifier_call_chain(&inetaddr_chain,
363                                                 NETDEV_DOWN, ifa);
364                                 inet_free_ifa(ifa);
365                         } else {
366                                 promote = ifa;
367                                 break;
368                         }
369                 }
370         }
371
372         /* On promotion all secondaries from subnet are changing
373          * the primary IP, we must remove all their routes silently
374          * and later to add them back with new prefsrc. Do this
375          * while all addresses are on the device list.
376          */
377         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378                 if (ifa1->ifa_mask == ifa->ifa_mask &&
379                     inet_ifa_match(ifa1->ifa_address, ifa))
380                         fib_del_ifaddr(ifa, ifa1);
381         }
382
383         /* 2. Unlink it */
384
385         *ifap = ifa1->ifa_next;
386         inet_hash_remove(ifa1);
387
388         /* 3. Announce address deletion */
389
390         /* Send message first, then call notifier.
391            At first sight, FIB update triggered by notifier
392            will refer to already deleted ifaddr, that could confuse
393            netlink listeners. It is not true: look, gated sees
394            that route deleted and if it still thinks that ifaddr
395            is valid, it will try to restore deleted routes... Grr.
396            So that, this order is correct.
397          */
398         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400
401         if (promote) {
402                 struct in_ifaddr *next_sec = promote->ifa_next;
403
404                 if (prev_prom) {
405                         prev_prom->ifa_next = promote->ifa_next;
406                         promote->ifa_next = last_prim->ifa_next;
407                         last_prim->ifa_next = promote;
408                 }
409
410                 promote->ifa_flags &= ~IFA_F_SECONDARY;
411                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412                 blocking_notifier_call_chain(&inetaddr_chain,
413                                 NETDEV_UP, promote);
414                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415                         if (ifa1->ifa_mask != ifa->ifa_mask ||
416                             !inet_ifa_match(ifa1->ifa_address, ifa))
417                                         continue;
418                         fib_add_ifaddr(ifa);
419                 }
420
421         }
422         if (destroy)
423                 inet_free_ifa(ifa1);
424 }
425
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427                          int destroy)
428 {
429         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430 }
431
432 static void check_lifetime(struct work_struct *work);
433
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437                              u32 portid)
438 {
439         struct in_device *in_dev = ifa->ifa_dev;
440         struct in_ifaddr *ifa1, **ifap, **last_primary;
441
442         ASSERT_RTNL();
443
444         if (!ifa->ifa_local) {
445                 inet_free_ifa(ifa);
446                 return 0;
447         }
448
449         ifa->ifa_flags &= ~IFA_F_SECONDARY;
450         last_primary = &in_dev->ifa_list;
451
452         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453              ifap = &ifa1->ifa_next) {
454                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455                     ifa->ifa_scope <= ifa1->ifa_scope)
456                         last_primary = &ifa1->ifa_next;
457                 if (ifa1->ifa_mask == ifa->ifa_mask &&
458                     inet_ifa_match(ifa1->ifa_address, ifa)) {
459                         if (ifa1->ifa_local == ifa->ifa_local) {
460                                 inet_free_ifa(ifa);
461                                 return -EEXIST;
462                         }
463                         if (ifa1->ifa_scope != ifa->ifa_scope) {
464                                 inet_free_ifa(ifa);
465                                 return -EINVAL;
466                         }
467                         ifa->ifa_flags |= IFA_F_SECONDARY;
468                 }
469         }
470
471         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472                 prandom_seed((__force u32) ifa->ifa_local);
473                 ifap = last_primary;
474         }
475
476         ifa->ifa_next = *ifap;
477         *ifap = ifa;
478
479         inet_hash_insert(dev_net(in_dev->dev), ifa);
480
481         cancel_delayed_work(&check_lifetime_work);
482         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483
484         /* Send message first, then call notifier.
485            Notifier will trigger FIB update, so that
486            listeners of netlink will know about new ifaddr */
487         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489
490         return 0;
491 }
492
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
494 {
495         return __inet_insert_ifa(ifa, NULL, 0);
496 }
497
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499 {
500         struct in_device *in_dev = __in_dev_get_rtnl(dev);
501
502         ASSERT_RTNL();
503
504         if (!in_dev) {
505                 inet_free_ifa(ifa);
506                 return -ENOBUFS;
507         }
508         ipv4_devconf_setall(in_dev);
509         neigh_parms_data_state_setall(in_dev->arp_parms);
510         if (ifa->ifa_dev != in_dev) {
511                 WARN_ON(ifa->ifa_dev);
512                 in_dev_hold(in_dev);
513                 ifa->ifa_dev = in_dev;
514         }
515         if (ipv4_is_loopback(ifa->ifa_local))
516                 ifa->ifa_scope = RT_SCOPE_HOST;
517         return inet_insert_ifa(ifa);
518 }
519
520 /* Caller must hold RCU or RTNL :
521  * We dont take a reference on found in_device
522  */
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
524 {
525         struct net_device *dev;
526         struct in_device *in_dev = NULL;
527
528         rcu_read_lock();
529         dev = dev_get_by_index_rcu(net, ifindex);
530         if (dev)
531                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532         rcu_read_unlock();
533         return in_dev;
534 }
535 EXPORT_SYMBOL(inetdev_by_index);
536
537 /* Called only from RTNL semaphored context. No locks. */
538
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540                                     __be32 mask)
541 {
542         ASSERT_RTNL();
543
544         for_primary_ifa(in_dev) {
545                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546                         return ifa;
547         } endfor_ifa(in_dev);
548         return NULL;
549 }
550
551 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
552 {
553         struct ip_mreqn mreq = {
554                 .imr_multiaddr.s_addr = ifa->ifa_address,
555                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
556         };
557         int ret;
558
559         ASSERT_RTNL();
560
561         lock_sock(sk);
562         if (join)
563                 ret = ip_mc_join_group(sk, &mreq);
564         else
565                 ret = ip_mc_leave_group(sk, &mreq);
566         release_sock(sk);
567
568         return ret;
569 }
570
571 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
572 {
573         struct net *net = sock_net(skb->sk);
574         struct nlattr *tb[IFA_MAX+1];
575         struct in_device *in_dev;
576         struct ifaddrmsg *ifm;
577         struct in_ifaddr *ifa, **ifap;
578         int err = -EINVAL;
579
580         ASSERT_RTNL();
581
582         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
583         if (err < 0)
584                 goto errout;
585
586         ifm = nlmsg_data(nlh);
587         in_dev = inetdev_by_index(net, ifm->ifa_index);
588         if (!in_dev) {
589                 err = -ENODEV;
590                 goto errout;
591         }
592
593         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
594              ifap = &ifa->ifa_next) {
595                 if (tb[IFA_LOCAL] &&
596                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
597                         continue;
598
599                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
600                         continue;
601
602                 if (tb[IFA_ADDRESS] &&
603                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
604                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
605                         continue;
606
607                 if (ipv4_is_multicast(ifa->ifa_address))
608                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
609                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
610                 return 0;
611         }
612
613         err = -EADDRNOTAVAIL;
614 errout:
615         return err;
616 }
617
618 #define INFINITY_LIFE_TIME      0xFFFFFFFF
619
620 static void check_lifetime(struct work_struct *work)
621 {
622         unsigned long now, next, next_sec, next_sched;
623         struct in_ifaddr *ifa;
624         struct hlist_node *n;
625         int i;
626
627         now = jiffies;
628         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
629
630         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
631                 bool change_needed = false;
632
633                 rcu_read_lock();
634                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
635                         unsigned long age;
636
637                         if (ifa->ifa_flags & IFA_F_PERMANENT)
638                                 continue;
639
640                         /* We try to batch several events at once. */
641                         age = (now - ifa->ifa_tstamp +
642                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
643
644                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
645                             age >= ifa->ifa_valid_lft) {
646                                 change_needed = true;
647                         } else if (ifa->ifa_preferred_lft ==
648                                    INFINITY_LIFE_TIME) {
649                                 continue;
650                         } else if (age >= ifa->ifa_preferred_lft) {
651                                 if (time_before(ifa->ifa_tstamp +
652                                                 ifa->ifa_valid_lft * HZ, next))
653                                         next = ifa->ifa_tstamp +
654                                                ifa->ifa_valid_lft * HZ;
655
656                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
657                                         change_needed = true;
658                         } else if (time_before(ifa->ifa_tstamp +
659                                                ifa->ifa_preferred_lft * HZ,
660                                                next)) {
661                                 next = ifa->ifa_tstamp +
662                                        ifa->ifa_preferred_lft * HZ;
663                         }
664                 }
665                 rcu_read_unlock();
666                 if (!change_needed)
667                         continue;
668                 rtnl_lock();
669                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
670                         unsigned long age;
671
672                         if (ifa->ifa_flags & IFA_F_PERMANENT)
673                                 continue;
674
675                         /* We try to batch several events at once. */
676                         age = (now - ifa->ifa_tstamp +
677                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
678
679                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
680                             age >= ifa->ifa_valid_lft) {
681                                 struct in_ifaddr **ifap;
682
683                                 for (ifap = &ifa->ifa_dev->ifa_list;
684                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
685                                         if (*ifap == ifa) {
686                                                 inet_del_ifa(ifa->ifa_dev,
687                                                              ifap, 1);
688                                                 break;
689                                         }
690                                 }
691                         } else if (ifa->ifa_preferred_lft !=
692                                    INFINITY_LIFE_TIME &&
693                                    age >= ifa->ifa_preferred_lft &&
694                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
695                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
696                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
697                         }
698                 }
699                 rtnl_unlock();
700         }
701
702         next_sec = round_jiffies_up(next);
703         next_sched = next;
704
705         /* If rounded timeout is accurate enough, accept it. */
706         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
707                 next_sched = next_sec;
708
709         now = jiffies;
710         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
711         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
712                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
713
714         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
715                         next_sched - now);
716 }
717
718 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
719                              __u32 prefered_lft)
720 {
721         unsigned long timeout;
722
723         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
724
725         timeout = addrconf_timeout_fixup(valid_lft, HZ);
726         if (addrconf_finite_timeout(timeout))
727                 ifa->ifa_valid_lft = timeout;
728         else
729                 ifa->ifa_flags |= IFA_F_PERMANENT;
730
731         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
732         if (addrconf_finite_timeout(timeout)) {
733                 if (timeout == 0)
734                         ifa->ifa_flags |= IFA_F_DEPRECATED;
735                 ifa->ifa_preferred_lft = timeout;
736         }
737         ifa->ifa_tstamp = jiffies;
738         if (!ifa->ifa_cstamp)
739                 ifa->ifa_cstamp = ifa->ifa_tstamp;
740 }
741
742 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
743                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
744 {
745         struct nlattr *tb[IFA_MAX+1];
746         struct in_ifaddr *ifa;
747         struct ifaddrmsg *ifm;
748         struct net_device *dev;
749         struct in_device *in_dev;
750         int err;
751
752         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
753         if (err < 0)
754                 goto errout;
755
756         ifm = nlmsg_data(nlh);
757         err = -EINVAL;
758         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
759                 goto errout;
760
761         dev = __dev_get_by_index(net, ifm->ifa_index);
762         err = -ENODEV;
763         if (!dev)
764                 goto errout;
765
766         in_dev = __in_dev_get_rtnl(dev);
767         err = -ENOBUFS;
768         if (!in_dev)
769                 goto errout;
770
771         ifa = inet_alloc_ifa();
772         if (!ifa)
773                 /*
774                  * A potential indev allocation can be left alive, it stays
775                  * assigned to its device and is destroy with it.
776                  */
777                 goto errout;
778
779         ipv4_devconf_setall(in_dev);
780         neigh_parms_data_state_setall(in_dev->arp_parms);
781         in_dev_hold(in_dev);
782
783         if (!tb[IFA_ADDRESS])
784                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
785
786         INIT_HLIST_NODE(&ifa->hash);
787         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
788         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
789         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
790                                          ifm->ifa_flags;
791         ifa->ifa_scope = ifm->ifa_scope;
792         ifa->ifa_dev = in_dev;
793
794         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
795         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
796
797         if (tb[IFA_BROADCAST])
798                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
799
800         if (tb[IFA_LABEL])
801                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
802         else
803                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
804
805         if (tb[IFA_CACHEINFO]) {
806                 struct ifa_cacheinfo *ci;
807
808                 ci = nla_data(tb[IFA_CACHEINFO]);
809                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
810                         err = -EINVAL;
811                         goto errout_free;
812                 }
813                 *pvalid_lft = ci->ifa_valid;
814                 *pprefered_lft = ci->ifa_prefered;
815         }
816
817         return ifa;
818
819 errout_free:
820         inet_free_ifa(ifa);
821 errout:
822         return ERR_PTR(err);
823 }
824
825 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
826 {
827         struct in_device *in_dev = ifa->ifa_dev;
828         struct in_ifaddr *ifa1, **ifap;
829
830         if (!ifa->ifa_local)
831                 return NULL;
832
833         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
834              ifap = &ifa1->ifa_next) {
835                 if (ifa1->ifa_mask == ifa->ifa_mask &&
836                     inet_ifa_match(ifa1->ifa_address, ifa) &&
837                     ifa1->ifa_local == ifa->ifa_local)
838                         return ifa1;
839         }
840         return NULL;
841 }
842
843 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
844 {
845         struct net *net = sock_net(skb->sk);
846         struct in_ifaddr *ifa;
847         struct in_ifaddr *ifa_existing;
848         __u32 valid_lft = INFINITY_LIFE_TIME;
849         __u32 prefered_lft = INFINITY_LIFE_TIME;
850
851         ASSERT_RTNL();
852
853         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
854         if (IS_ERR(ifa))
855                 return PTR_ERR(ifa);
856
857         ifa_existing = find_matching_ifa(ifa);
858         if (!ifa_existing) {
859                 /* It would be best to check for !NLM_F_CREATE here but
860                  * userspace already relies on not having to provide this.
861                  */
862                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
863                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
864                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
865                                                true, ifa);
866
867                         if (ret < 0) {
868                                 inet_free_ifa(ifa);
869                                 return ret;
870                         }
871                 }
872                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
873         } else {
874                 inet_free_ifa(ifa);
875
876                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
877                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
878                         return -EEXIST;
879                 ifa = ifa_existing;
880                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
881                 cancel_delayed_work(&check_lifetime_work);
882                 queue_delayed_work(system_power_efficient_wq,
883                                 &check_lifetime_work, 0);
884                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
885                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
886         }
887         return 0;
888 }
889
890 /*
891  *      Determine a default network mask, based on the IP address.
892  */
893
894 static int inet_abc_len(__be32 addr)
895 {
896         int rc = -1;    /* Something else, probably a multicast. */
897
898         if (ipv4_is_zeronet(addr))
899                 rc = 0;
900         else {
901                 __u32 haddr = ntohl(addr);
902
903                 if (IN_CLASSA(haddr))
904                         rc = 8;
905                 else if (IN_CLASSB(haddr))
906                         rc = 16;
907                 else if (IN_CLASSC(haddr))
908                         rc = 24;
909         }
910
911         return rc;
912 }
913
914
915 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
916 {
917         struct ifreq ifr;
918         struct sockaddr_in sin_orig;
919         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
920         struct in_device *in_dev;
921         struct in_ifaddr **ifap = NULL;
922         struct in_ifaddr *ifa = NULL;
923         struct net_device *dev;
924         char *colon;
925         int ret = -EFAULT;
926         int tryaddrmatch = 0;
927
928         /*
929          *      Fetch the caller's info block into kernel space
930          */
931
932         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
933                 goto out;
934         ifr.ifr_name[IFNAMSIZ - 1] = 0;
935
936         /* save original address for comparison */
937         memcpy(&sin_orig, sin, sizeof(*sin));
938
939         colon = strchr(ifr.ifr_name, ':');
940         if (colon)
941                 *colon = 0;
942
943         dev_load(net, ifr.ifr_name);
944
945         switch (cmd) {
946         case SIOCGIFADDR:       /* Get interface address */
947         case SIOCGIFBRDADDR:    /* Get the broadcast address */
948         case SIOCGIFDSTADDR:    /* Get the destination address */
949         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
950                 /* Note that these ioctls will not sleep,
951                    so that we do not impose a lock.
952                    One day we will be forced to put shlock here (I mean SMP)
953                  */
954                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
955                 memset(sin, 0, sizeof(*sin));
956                 sin->sin_family = AF_INET;
957                 break;
958
959         case SIOCSIFFLAGS:
960                 ret = -EPERM;
961                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
962                         goto out;
963                 break;
964         case SIOCSIFADDR:       /* Set interface address (and family) */
965         case SIOCSIFBRDADDR:    /* Set the broadcast address */
966         case SIOCSIFDSTADDR:    /* Set the destination address */
967         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
968                 ret = -EPERM;
969                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
970                         goto out;
971                 ret = -EINVAL;
972                 if (sin->sin_family != AF_INET)
973                         goto out;
974                 break;
975         default:
976                 ret = -EINVAL;
977                 goto out;
978         }
979
980         rtnl_lock();
981
982         ret = -ENODEV;
983         dev = __dev_get_by_name(net, ifr.ifr_name);
984         if (!dev)
985                 goto done;
986
987         if (colon)
988                 *colon = ':';
989
990         in_dev = __in_dev_get_rtnl(dev);
991         if (in_dev) {
992                 if (tryaddrmatch) {
993                         /* Matthias Andree */
994                         /* compare label and address (4.4BSD style) */
995                         /* note: we only do this for a limited set of ioctls
996                            and only if the original address family was AF_INET.
997                            This is checked above. */
998                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
999                              ifap = &ifa->ifa_next) {
1000                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1001                                     sin_orig.sin_addr.s_addr ==
1002                                                         ifa->ifa_local) {
1003                                         break; /* found */
1004                                 }
1005                         }
1006                 }
1007                 /* we didn't get a match, maybe the application is
1008                    4.3BSD-style and passed in junk so we fall back to
1009                    comparing just the label */
1010                 if (!ifa) {
1011                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1012                              ifap = &ifa->ifa_next)
1013                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1014                                         break;
1015                 }
1016         }
1017
1018         ret = -EADDRNOTAVAIL;
1019         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1020                 goto done;
1021
1022         switch (cmd) {
1023         case SIOCGIFADDR:       /* Get interface address */
1024                 sin->sin_addr.s_addr = ifa->ifa_local;
1025                 goto rarok;
1026
1027         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1028                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1029                 goto rarok;
1030
1031         case SIOCGIFDSTADDR:    /* Get the destination address */
1032                 sin->sin_addr.s_addr = ifa->ifa_address;
1033                 goto rarok;
1034
1035         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1036                 sin->sin_addr.s_addr = ifa->ifa_mask;
1037                 goto rarok;
1038
1039         case SIOCSIFFLAGS:
1040                 if (colon) {
1041                         ret = -EADDRNOTAVAIL;
1042                         if (!ifa)
1043                                 break;
1044                         ret = 0;
1045                         if (!(ifr.ifr_flags & IFF_UP))
1046                                 inet_del_ifa(in_dev, ifap, 1);
1047                         break;
1048                 }
1049                 ret = dev_change_flags(dev, ifr.ifr_flags);
1050                 break;
1051
1052         case SIOCSIFADDR:       /* Set interface address (and family) */
1053                 ret = -EINVAL;
1054                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1055                         break;
1056
1057                 if (!ifa) {
1058                         ret = -ENOBUFS;
1059                         ifa = inet_alloc_ifa();
1060                         if (!ifa)
1061                                 break;
1062                         INIT_HLIST_NODE(&ifa->hash);
1063                         if (colon)
1064                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1065                         else
1066                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1067                 } else {
1068                         ret = 0;
1069                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1070                                 break;
1071                         inet_del_ifa(in_dev, ifap, 0);
1072                         ifa->ifa_broadcast = 0;
1073                         ifa->ifa_scope = 0;
1074                 }
1075
1076                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1077
1078                 if (!(dev->flags & IFF_POINTOPOINT)) {
1079                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1080                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1081                         if ((dev->flags & IFF_BROADCAST) &&
1082                             ifa->ifa_prefixlen < 31)
1083                                 ifa->ifa_broadcast = ifa->ifa_address |
1084                                                      ~ifa->ifa_mask;
1085                 } else {
1086                         ifa->ifa_prefixlen = 32;
1087                         ifa->ifa_mask = inet_make_mask(32);
1088                 }
1089                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1090                 ret = inet_set_ifa(dev, ifa);
1091                 break;
1092
1093         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1094                 ret = 0;
1095                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1096                         inet_del_ifa(in_dev, ifap, 0);
1097                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1098                         inet_insert_ifa(ifa);
1099                 }
1100                 break;
1101
1102         case SIOCSIFDSTADDR:    /* Set the destination address */
1103                 ret = 0;
1104                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1105                         break;
1106                 ret = -EINVAL;
1107                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1108                         break;
1109                 ret = 0;
1110                 inet_del_ifa(in_dev, ifap, 0);
1111                 ifa->ifa_address = sin->sin_addr.s_addr;
1112                 inet_insert_ifa(ifa);
1113                 break;
1114
1115         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1116
1117                 /*
1118                  *      The mask we set must be legal.
1119                  */
1120                 ret = -EINVAL;
1121                 if (bad_mask(sin->sin_addr.s_addr, 0))
1122                         break;
1123                 ret = 0;
1124                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1125                         __be32 old_mask = ifa->ifa_mask;
1126                         inet_del_ifa(in_dev, ifap, 0);
1127                         ifa->ifa_mask = sin->sin_addr.s_addr;
1128                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1129
1130                         /* See if current broadcast address matches
1131                          * with current netmask, then recalculate
1132                          * the broadcast address. Otherwise it's a
1133                          * funny address, so don't touch it since
1134                          * the user seems to know what (s)he's doing...
1135                          */
1136                         if ((dev->flags & IFF_BROADCAST) &&
1137                             (ifa->ifa_prefixlen < 31) &&
1138                             (ifa->ifa_broadcast ==
1139                              (ifa->ifa_local|~old_mask))) {
1140                                 ifa->ifa_broadcast = (ifa->ifa_local |
1141                                                       ~sin->sin_addr.s_addr);
1142                         }
1143                         inet_insert_ifa(ifa);
1144                 }
1145                 break;
1146         }
1147 done:
1148         rtnl_unlock();
1149 out:
1150         return ret;
1151 rarok:
1152         rtnl_unlock();
1153         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1154         goto out;
1155 }
1156
1157 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1158 {
1159         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1160         struct in_ifaddr *ifa;
1161         struct ifreq ifr;
1162         int done = 0;
1163
1164         if (!in_dev)
1165                 goto out;
1166
1167         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1168                 if (!buf) {
1169                         done += sizeof(ifr);
1170                         continue;
1171                 }
1172                 if (len < (int) sizeof(ifr))
1173                         break;
1174                 memset(&ifr, 0, sizeof(struct ifreq));
1175                 strcpy(ifr.ifr_name, ifa->ifa_label);
1176
1177                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1178                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1179                                                                 ifa->ifa_local;
1180
1181                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1182                         done = -EFAULT;
1183                         break;
1184                 }
1185                 buf  += sizeof(struct ifreq);
1186                 len  -= sizeof(struct ifreq);
1187                 done += sizeof(struct ifreq);
1188         }
1189 out:
1190         return done;
1191 }
1192
1193 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1194 {
1195         __be32 addr = 0;
1196         struct in_device *in_dev;
1197         struct net *net = dev_net(dev);
1198
1199         rcu_read_lock();
1200         in_dev = __in_dev_get_rcu(dev);
1201         if (!in_dev)
1202                 goto no_in_dev;
1203
1204         for_primary_ifa(in_dev) {
1205                 if (ifa->ifa_scope > scope)
1206                         continue;
1207                 if (!dst || inet_ifa_match(dst, ifa)) {
1208                         addr = ifa->ifa_local;
1209                         break;
1210                 }
1211                 if (!addr)
1212                         addr = ifa->ifa_local;
1213         } endfor_ifa(in_dev);
1214
1215         if (addr)
1216                 goto out_unlock;
1217 no_in_dev:
1218
1219         /* Not loopback addresses on loopback should be preferred
1220            in this case. It is important that lo is the first interface
1221            in dev_base list.
1222          */
1223         for_each_netdev_rcu(net, dev) {
1224                 in_dev = __in_dev_get_rcu(dev);
1225                 if (!in_dev)
1226                         continue;
1227
1228                 for_primary_ifa(in_dev) {
1229                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1230                             ifa->ifa_scope <= scope) {
1231                                 addr = ifa->ifa_local;
1232                                 goto out_unlock;
1233                         }
1234                 } endfor_ifa(in_dev);
1235         }
1236 out_unlock:
1237         rcu_read_unlock();
1238         return addr;
1239 }
1240 EXPORT_SYMBOL(inet_select_addr);
1241
1242 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1243                               __be32 local, int scope)
1244 {
1245         int same = 0;
1246         __be32 addr = 0;
1247
1248         for_ifa(in_dev) {
1249                 if (!addr &&
1250                     (local == ifa->ifa_local || !local) &&
1251                     ifa->ifa_scope <= scope) {
1252                         addr = ifa->ifa_local;
1253                         if (same)
1254                                 break;
1255                 }
1256                 if (!same) {
1257                         same = (!local || inet_ifa_match(local, ifa)) &&
1258                                 (!dst || inet_ifa_match(dst, ifa));
1259                         if (same && addr) {
1260                                 if (local || !dst)
1261                                         break;
1262                                 /* Is the selected addr into dst subnet? */
1263                                 if (inet_ifa_match(addr, ifa))
1264                                         break;
1265                                 /* No, then can we use new local src? */
1266                                 if (ifa->ifa_scope <= scope) {
1267                                         addr = ifa->ifa_local;
1268                                         break;
1269                                 }
1270                                 /* search for large dst subnet for addr */
1271                                 same = 0;
1272                         }
1273                 }
1274         } endfor_ifa(in_dev);
1275
1276         return same ? addr : 0;
1277 }
1278
1279 /*
1280  * Confirm that local IP address exists using wildcards:
1281  * - net: netns to check, cannot be NULL
1282  * - in_dev: only on this interface, NULL=any interface
1283  * - dst: only in the same subnet as dst, 0=any dst
1284  * - local: address, 0=autoselect the local address
1285  * - scope: maximum allowed scope value for the local address
1286  */
1287 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1288                          __be32 dst, __be32 local, int scope)
1289 {
1290         __be32 addr = 0;
1291         struct net_device *dev;
1292
1293         if (in_dev)
1294                 return confirm_addr_indev(in_dev, dst, local, scope);
1295
1296         rcu_read_lock();
1297         for_each_netdev_rcu(net, dev) {
1298                 in_dev = __in_dev_get_rcu(dev);
1299                 if (in_dev) {
1300                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1301                         if (addr)
1302                                 break;
1303                 }
1304         }
1305         rcu_read_unlock();
1306
1307         return addr;
1308 }
1309 EXPORT_SYMBOL(inet_confirm_addr);
1310
1311 /*
1312  *      Device notifier
1313  */
1314
1315 int register_inetaddr_notifier(struct notifier_block *nb)
1316 {
1317         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1318 }
1319 EXPORT_SYMBOL(register_inetaddr_notifier);
1320
1321 int unregister_inetaddr_notifier(struct notifier_block *nb)
1322 {
1323         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1324 }
1325 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1326
1327 /* Rename ifa_labels for a device name change. Make some effort to preserve
1328  * existing alias numbering and to create unique labels if possible.
1329 */
1330 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1331 {
1332         struct in_ifaddr *ifa;
1333         int named = 0;
1334
1335         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1336                 char old[IFNAMSIZ], *dot;
1337
1338                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1339                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1340                 if (named++ == 0)
1341                         goto skip;
1342                 dot = strchr(old, ':');
1343                 if (!dot) {
1344                         sprintf(old, ":%d", named);
1345                         dot = old;
1346                 }
1347                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1348                         strcat(ifa->ifa_label, dot);
1349                 else
1350                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1351 skip:
1352                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1353         }
1354 }
1355
1356 static bool inetdev_valid_mtu(unsigned int mtu)
1357 {
1358         return mtu >= 68;
1359 }
1360
1361 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1362                                         struct in_device *in_dev)
1363
1364 {
1365         struct in_ifaddr *ifa;
1366
1367         for (ifa = in_dev->ifa_list; ifa;
1368              ifa = ifa->ifa_next) {
1369                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1370                          ifa->ifa_local, dev,
1371                          ifa->ifa_local, NULL,
1372                          dev->dev_addr, NULL);
1373         }
1374 }
1375
1376 /* Called only under RTNL semaphore */
1377
1378 static int inetdev_event(struct notifier_block *this, unsigned long event,
1379                          void *ptr)
1380 {
1381         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1382         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1383
1384         ASSERT_RTNL();
1385
1386         if (!in_dev) {
1387                 if (event == NETDEV_REGISTER) {
1388                         in_dev = inetdev_init(dev);
1389                         if (IS_ERR(in_dev))
1390                                 return notifier_from_errno(PTR_ERR(in_dev));
1391                         if (dev->flags & IFF_LOOPBACK) {
1392                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1393                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1394                         }
1395                 } else if (event == NETDEV_CHANGEMTU) {
1396                         /* Re-enabling IP */
1397                         if (inetdev_valid_mtu(dev->mtu))
1398                                 in_dev = inetdev_init(dev);
1399                 }
1400                 goto out;
1401         }
1402
1403         switch (event) {
1404         case NETDEV_REGISTER:
1405                 pr_debug("%s: bug\n", __func__);
1406                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1407                 break;
1408         case NETDEV_UP:
1409                 if (!inetdev_valid_mtu(dev->mtu))
1410                         break;
1411                 if (dev->flags & IFF_LOOPBACK) {
1412                         struct in_ifaddr *ifa = inet_alloc_ifa();
1413
1414                         if (ifa) {
1415                                 INIT_HLIST_NODE(&ifa->hash);
1416                                 ifa->ifa_local =
1417                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1418                                 ifa->ifa_prefixlen = 8;
1419                                 ifa->ifa_mask = inet_make_mask(8);
1420                                 in_dev_hold(in_dev);
1421                                 ifa->ifa_dev = in_dev;
1422                                 ifa->ifa_scope = RT_SCOPE_HOST;
1423                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1424                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1425                                                  INFINITY_LIFE_TIME);
1426                                 ipv4_devconf_setall(in_dev);
1427                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1428                                 inet_insert_ifa(ifa);
1429                         }
1430                 }
1431                 ip_mc_up(in_dev);
1432                 /* fall through */
1433         case NETDEV_CHANGEADDR:
1434                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1435                         break;
1436                 /* fall through */
1437         case NETDEV_NOTIFY_PEERS:
1438                 /* Send gratuitous ARP to notify of link change */
1439                 inetdev_send_gratuitous_arp(dev, in_dev);
1440                 break;
1441         case NETDEV_DOWN:
1442                 ip_mc_down(in_dev);
1443                 break;
1444         case NETDEV_PRE_TYPE_CHANGE:
1445                 ip_mc_unmap(in_dev);
1446                 break;
1447         case NETDEV_POST_TYPE_CHANGE:
1448                 ip_mc_remap(in_dev);
1449                 break;
1450         case NETDEV_CHANGEMTU:
1451                 if (inetdev_valid_mtu(dev->mtu))
1452                         break;
1453                 /* disable IP when MTU is not enough */
1454         case NETDEV_UNREGISTER:
1455                 inetdev_destroy(in_dev);
1456                 break;
1457         case NETDEV_CHANGENAME:
1458                 /* Do not notify about label change, this event is
1459                  * not interesting to applications using netlink.
1460                  */
1461                 inetdev_changename(dev, in_dev);
1462
1463                 devinet_sysctl_unregister(in_dev);
1464                 devinet_sysctl_register(in_dev);
1465                 break;
1466         }
1467 out:
1468         return NOTIFY_DONE;
1469 }
1470
1471 static struct notifier_block ip_netdev_notifier = {
1472         .notifier_call = inetdev_event,
1473 };
1474
1475 static size_t inet_nlmsg_size(void)
1476 {
1477         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1478                + nla_total_size(4) /* IFA_ADDRESS */
1479                + nla_total_size(4) /* IFA_LOCAL */
1480                + nla_total_size(4) /* IFA_BROADCAST */
1481                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1482                + nla_total_size(4)  /* IFA_FLAGS */
1483                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1484 }
1485
1486 static inline u32 cstamp_delta(unsigned long cstamp)
1487 {
1488         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1489 }
1490
1491 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1492                          unsigned long tstamp, u32 preferred, u32 valid)
1493 {
1494         struct ifa_cacheinfo ci;
1495
1496         ci.cstamp = cstamp_delta(cstamp);
1497         ci.tstamp = cstamp_delta(tstamp);
1498         ci.ifa_prefered = preferred;
1499         ci.ifa_valid = valid;
1500
1501         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1502 }
1503
1504 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1505                             u32 portid, u32 seq, int event, unsigned int flags)
1506 {
1507         struct ifaddrmsg *ifm;
1508         struct nlmsghdr  *nlh;
1509         u32 preferred, valid;
1510
1511         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1512         if (!nlh)
1513                 return -EMSGSIZE;
1514
1515         ifm = nlmsg_data(nlh);
1516         ifm->ifa_family = AF_INET;
1517         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1518         ifm->ifa_flags = ifa->ifa_flags;
1519         ifm->ifa_scope = ifa->ifa_scope;
1520         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1521
1522         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1523                 preferred = ifa->ifa_preferred_lft;
1524                 valid = ifa->ifa_valid_lft;
1525                 if (preferred != INFINITY_LIFE_TIME) {
1526                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1527
1528                         if (preferred > tval)
1529                                 preferred -= tval;
1530                         else
1531                                 preferred = 0;
1532                         if (valid != INFINITY_LIFE_TIME) {
1533                                 if (valid > tval)
1534                                         valid -= tval;
1535                                 else
1536                                         valid = 0;
1537                         }
1538                 }
1539         } else {
1540                 preferred = INFINITY_LIFE_TIME;
1541                 valid = INFINITY_LIFE_TIME;
1542         }
1543         if ((ifa->ifa_address &&
1544              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1545             (ifa->ifa_local &&
1546              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1547             (ifa->ifa_broadcast &&
1548              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1549             (ifa->ifa_label[0] &&
1550              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1551             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1552             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1553                           preferred, valid))
1554                 goto nla_put_failure;
1555
1556         nlmsg_end(skb, nlh);
1557         return 0;
1558
1559 nla_put_failure:
1560         nlmsg_cancel(skb, nlh);
1561         return -EMSGSIZE;
1562 }
1563
1564 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1565 {
1566         struct net *net = sock_net(skb->sk);
1567         int h, s_h;
1568         int idx, s_idx;
1569         int ip_idx, s_ip_idx;
1570         struct net_device *dev;
1571         struct in_device *in_dev;
1572         struct in_ifaddr *ifa;
1573         struct hlist_head *head;
1574
1575         s_h = cb->args[0];
1576         s_idx = idx = cb->args[1];
1577         s_ip_idx = ip_idx = cb->args[2];
1578
1579         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1580                 idx = 0;
1581                 head = &net->dev_index_head[h];
1582                 rcu_read_lock();
1583                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1584                           net->dev_base_seq;
1585                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1586                         if (idx < s_idx)
1587                                 goto cont;
1588                         if (h > s_h || idx > s_idx)
1589                                 s_ip_idx = 0;
1590                         in_dev = __in_dev_get_rcu(dev);
1591                         if (!in_dev)
1592                                 goto cont;
1593
1594                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1595                              ifa = ifa->ifa_next, ip_idx++) {
1596                                 if (ip_idx < s_ip_idx)
1597                                         continue;
1598                                 if (inet_fill_ifaddr(skb, ifa,
1599                                              NETLINK_CB(cb->skb).portid,
1600                                              cb->nlh->nlmsg_seq,
1601                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1602                                         rcu_read_unlock();
1603                                         goto done;
1604                                 }
1605                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1606                         }
1607 cont:
1608                         idx++;
1609                 }
1610                 rcu_read_unlock();
1611         }
1612
1613 done:
1614         cb->args[0] = h;
1615         cb->args[1] = idx;
1616         cb->args[2] = ip_idx;
1617
1618         return skb->len;
1619 }
1620
1621 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1622                       u32 portid)
1623 {
1624         struct sk_buff *skb;
1625         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1626         int err = -ENOBUFS;
1627         struct net *net;
1628
1629         net = dev_net(ifa->ifa_dev->dev);
1630         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1631         if (!skb)
1632                 goto errout;
1633
1634         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1635         if (err < 0) {
1636                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1637                 WARN_ON(err == -EMSGSIZE);
1638                 kfree_skb(skb);
1639                 goto errout;
1640         }
1641         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1642         return;
1643 errout:
1644         if (err < 0)
1645                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1646 }
1647
1648 static size_t inet_get_link_af_size(const struct net_device *dev)
1649 {
1650         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1651
1652         if (!in_dev)
1653                 return 0;
1654
1655         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1656 }
1657
1658 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1659 {
1660         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1661         struct nlattr *nla;
1662         int i;
1663
1664         if (!in_dev)
1665                 return -ENODATA;
1666
1667         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1668         if (!nla)
1669                 return -EMSGSIZE;
1670
1671         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1672                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1673
1674         return 0;
1675 }
1676
1677 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1678         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1679 };
1680
1681 static int inet_validate_link_af(const struct net_device *dev,
1682                                  const struct nlattr *nla)
1683 {
1684         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1685         int err, rem;
1686
1687         if (dev && !__in_dev_get_rtnl(dev))
1688                 return -EAFNOSUPPORT;
1689
1690         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1691         if (err < 0)
1692                 return err;
1693
1694         if (tb[IFLA_INET_CONF]) {
1695                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1696                         int cfgid = nla_type(a);
1697
1698                         if (nla_len(a) < 4)
1699                                 return -EINVAL;
1700
1701                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1702                                 return -EINVAL;
1703                 }
1704         }
1705
1706         return 0;
1707 }
1708
1709 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1710 {
1711         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1712         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1713         int rem;
1714
1715         if (!in_dev)
1716                 return -EAFNOSUPPORT;
1717
1718         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1719                 BUG();
1720
1721         if (tb[IFLA_INET_CONF]) {
1722                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1723                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1724         }
1725
1726         return 0;
1727 }
1728
1729 static int inet_netconf_msgsize_devconf(int type)
1730 {
1731         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1732                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1733
1734         /* type -1 is used for ALL */
1735         if (type == -1 || type == NETCONFA_FORWARDING)
1736                 size += nla_total_size(4);
1737         if (type == -1 || type == NETCONFA_RP_FILTER)
1738                 size += nla_total_size(4);
1739         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1740                 size += nla_total_size(4);
1741         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1742                 size += nla_total_size(4);
1743
1744         return size;
1745 }
1746
1747 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1748                                      struct ipv4_devconf *devconf, u32 portid,
1749                                      u32 seq, int event, unsigned int flags,
1750                                      int type)
1751 {
1752         struct nlmsghdr  *nlh;
1753         struct netconfmsg *ncm;
1754
1755         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1756                         flags);
1757         if (!nlh)
1758                 return -EMSGSIZE;
1759
1760         ncm = nlmsg_data(nlh);
1761         ncm->ncm_family = AF_INET;
1762
1763         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1764                 goto nla_put_failure;
1765
1766         /* type -1 is used for ALL */
1767         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1768             nla_put_s32(skb, NETCONFA_FORWARDING,
1769                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1770                 goto nla_put_failure;
1771         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1772             nla_put_s32(skb, NETCONFA_RP_FILTER,
1773                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1774                 goto nla_put_failure;
1775         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1776             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1777                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1778                 goto nla_put_failure;
1779         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1780             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1781                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1782                 goto nla_put_failure;
1783
1784         nlmsg_end(skb, nlh);
1785         return 0;
1786
1787 nla_put_failure:
1788         nlmsg_cancel(skb, nlh);
1789         return -EMSGSIZE;
1790 }
1791
1792 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1793                                  struct ipv4_devconf *devconf)
1794 {
1795         struct sk_buff *skb;
1796         int err = -ENOBUFS;
1797
1798         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1799         if (!skb)
1800                 goto errout;
1801
1802         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1803                                         RTM_NEWNETCONF, 0, type);
1804         if (err < 0) {
1805                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1806                 WARN_ON(err == -EMSGSIZE);
1807                 kfree_skb(skb);
1808                 goto errout;
1809         }
1810         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1811         return;
1812 errout:
1813         if (err < 0)
1814                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1815 }
1816
1817 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1818         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1819         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1820         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1821         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1822 };
1823
1824 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1825                                     struct nlmsghdr *nlh)
1826 {
1827         struct net *net = sock_net(in_skb->sk);
1828         struct nlattr *tb[NETCONFA_MAX+1];
1829         struct netconfmsg *ncm;
1830         struct sk_buff *skb;
1831         struct ipv4_devconf *devconf;
1832         struct in_device *in_dev;
1833         struct net_device *dev;
1834         int ifindex;
1835         int err;
1836
1837         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1838                           devconf_ipv4_policy);
1839         if (err < 0)
1840                 goto errout;
1841
1842         err = EINVAL;
1843         if (!tb[NETCONFA_IFINDEX])
1844                 goto errout;
1845
1846         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1847         switch (ifindex) {
1848         case NETCONFA_IFINDEX_ALL:
1849                 devconf = net->ipv4.devconf_all;
1850                 break;
1851         case NETCONFA_IFINDEX_DEFAULT:
1852                 devconf = net->ipv4.devconf_dflt;
1853                 break;
1854         default:
1855                 dev = __dev_get_by_index(net, ifindex);
1856                 if (!dev)
1857                         goto errout;
1858                 in_dev = __in_dev_get_rtnl(dev);
1859                 if (!in_dev)
1860                         goto errout;
1861                 devconf = &in_dev->cnf;
1862                 break;
1863         }
1864
1865         err = -ENOBUFS;
1866         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1867         if (!skb)
1868                 goto errout;
1869
1870         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1871                                         NETLINK_CB(in_skb).portid,
1872                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1873                                         -1);
1874         if (err < 0) {
1875                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1876                 WARN_ON(err == -EMSGSIZE);
1877                 kfree_skb(skb);
1878                 goto errout;
1879         }
1880         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1881 errout:
1882         return err;
1883 }
1884
1885 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1886                                      struct netlink_callback *cb)
1887 {
1888         struct net *net = sock_net(skb->sk);
1889         int h, s_h;
1890         int idx, s_idx;
1891         struct net_device *dev;
1892         struct in_device *in_dev;
1893         struct hlist_head *head;
1894
1895         s_h = cb->args[0];
1896         s_idx = idx = cb->args[1];
1897
1898         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1899                 idx = 0;
1900                 head = &net->dev_index_head[h];
1901                 rcu_read_lock();
1902                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1903                           net->dev_base_seq;
1904                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1905                         if (idx < s_idx)
1906                                 goto cont;
1907                         in_dev = __in_dev_get_rcu(dev);
1908                         if (!in_dev)
1909                                 goto cont;
1910
1911                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1912                                                       &in_dev->cnf,
1913                                                       NETLINK_CB(cb->skb).portid,
1914                                                       cb->nlh->nlmsg_seq,
1915                                                       RTM_NEWNETCONF,
1916                                                       NLM_F_MULTI,
1917                                                       -1) < 0) {
1918                                 rcu_read_unlock();
1919                                 goto done;
1920                         }
1921                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1922 cont:
1923                         idx++;
1924                 }
1925                 rcu_read_unlock();
1926         }
1927         if (h == NETDEV_HASHENTRIES) {
1928                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1929                                               net->ipv4.devconf_all,
1930                                               NETLINK_CB(cb->skb).portid,
1931                                               cb->nlh->nlmsg_seq,
1932                                               RTM_NEWNETCONF, NLM_F_MULTI,
1933                                               -1) < 0)
1934                         goto done;
1935                 else
1936                         h++;
1937         }
1938         if (h == NETDEV_HASHENTRIES + 1) {
1939                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1940                                               net->ipv4.devconf_dflt,
1941                                               NETLINK_CB(cb->skb).portid,
1942                                               cb->nlh->nlmsg_seq,
1943                                               RTM_NEWNETCONF, NLM_F_MULTI,
1944                                               -1) < 0)
1945                         goto done;
1946                 else
1947                         h++;
1948         }
1949 done:
1950         cb->args[0] = h;
1951         cb->args[1] = idx;
1952
1953         return skb->len;
1954 }
1955
1956 #ifdef CONFIG_SYSCTL
1957
1958 static void devinet_copy_dflt_conf(struct net *net, int i)
1959 {
1960         struct net_device *dev;
1961
1962         rcu_read_lock();
1963         for_each_netdev_rcu(net, dev) {
1964                 struct in_device *in_dev;
1965
1966                 in_dev = __in_dev_get_rcu(dev);
1967                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1968                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1969         }
1970         rcu_read_unlock();
1971 }
1972
1973 /* called with RTNL locked */
1974 static void inet_forward_change(struct net *net)
1975 {
1976         struct net_device *dev;
1977         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1978
1979         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1980         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1981         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1982                                     NETCONFA_IFINDEX_ALL,
1983                                     net->ipv4.devconf_all);
1984         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1985                                     NETCONFA_IFINDEX_DEFAULT,
1986                                     net->ipv4.devconf_dflt);
1987
1988         for_each_netdev(net, dev) {
1989                 struct in_device *in_dev;
1990                 if (on)
1991                         dev_disable_lro(dev);
1992                 rcu_read_lock();
1993                 in_dev = __in_dev_get_rcu(dev);
1994                 if (in_dev) {
1995                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1996                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1997                                                     dev->ifindex, &in_dev->cnf);
1998                 }
1999                 rcu_read_unlock();
2000         }
2001 }
2002
2003 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2004 {
2005         if (cnf == net->ipv4.devconf_dflt)
2006                 return NETCONFA_IFINDEX_DEFAULT;
2007         else if (cnf == net->ipv4.devconf_all)
2008                 return NETCONFA_IFINDEX_ALL;
2009         else {
2010                 struct in_device *idev
2011                         = container_of(cnf, struct in_device, cnf);
2012                 return idev->dev->ifindex;
2013         }
2014 }
2015
2016 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2017                              void __user *buffer,
2018                              size_t *lenp, loff_t *ppos)
2019 {
2020         int old_value = *(int *)ctl->data;
2021         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2022         int new_value = *(int *)ctl->data;
2023
2024         if (write) {
2025                 struct ipv4_devconf *cnf = ctl->extra1;
2026                 struct net *net = ctl->extra2;
2027                 int i = (int *)ctl->data - cnf->data;
2028                 int ifindex;
2029
2030                 set_bit(i, cnf->state);
2031
2032                 if (cnf == net->ipv4.devconf_dflt)
2033                         devinet_copy_dflt_conf(net, i);
2034                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2035                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2036                         if ((new_value == 0) && (old_value != 0))
2037                                 rt_cache_flush(net);
2038
2039                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2040                     new_value != old_value) {
2041                         ifindex = devinet_conf_ifindex(net, cnf);
2042                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2043                                                     ifindex, cnf);
2044                 }
2045                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2046                     new_value != old_value) {
2047                         ifindex = devinet_conf_ifindex(net, cnf);
2048                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2049                                                     ifindex, cnf);
2050                 }
2051         }
2052
2053         return ret;
2054 }
2055
2056 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2057                                   void __user *buffer,
2058                                   size_t *lenp, loff_t *ppos)
2059 {
2060         int *valp = ctl->data;
2061         int val = *valp;
2062         loff_t pos = *ppos;
2063         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2064
2065         if (write && *valp != val) {
2066                 struct net *net = ctl->extra2;
2067
2068                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2069                         if (!rtnl_trylock()) {
2070                                 /* Restore the original values before restarting */
2071                                 *valp = val;
2072                                 *ppos = pos;
2073                                 return restart_syscall();
2074                         }
2075                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2076                                 inet_forward_change(net);
2077                         } else {
2078                                 struct ipv4_devconf *cnf = ctl->extra1;
2079                                 struct in_device *idev =
2080                                         container_of(cnf, struct in_device, cnf);
2081                                 if (*valp)
2082                                         dev_disable_lro(idev->dev);
2083                                 inet_netconf_notify_devconf(net,
2084                                                             NETCONFA_FORWARDING,
2085                                                             idev->dev->ifindex,
2086                                                             cnf);
2087                         }
2088                         rtnl_unlock();
2089                         rt_cache_flush(net);
2090                 } else
2091                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2092                                                     NETCONFA_IFINDEX_DEFAULT,
2093                                                     net->ipv4.devconf_dflt);
2094         }
2095
2096         return ret;
2097 }
2098
2099 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2100                                 void __user *buffer,
2101                                 size_t *lenp, loff_t *ppos)
2102 {
2103         int *valp = ctl->data;
2104         int val = *valp;
2105         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2106         struct net *net = ctl->extra2;
2107
2108         if (write && *valp != val)
2109                 rt_cache_flush(net);
2110
2111         return ret;
2112 }
2113
2114 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2115         { \
2116                 .procname       = name, \
2117                 .data           = ipv4_devconf.data + \
2118                                   IPV4_DEVCONF_ ## attr - 1, \
2119                 .maxlen         = sizeof(int), \
2120                 .mode           = mval, \
2121                 .proc_handler   = proc, \
2122                 .extra1         = &ipv4_devconf, \
2123         }
2124
2125 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2126         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2127
2128 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2129         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2130
2131 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2132         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2133
2134 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2135         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2136
2137 static struct devinet_sysctl_table {
2138         struct ctl_table_header *sysctl_header;
2139         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2140 } devinet_sysctl = {
2141         .devinet_vars = {
2142                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2143                                              devinet_sysctl_forward),
2144                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2145
2146                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2147                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2148                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2149                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2150                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2151                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2152                                         "accept_source_route"),
2153                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2154                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2155                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2156                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2157                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2158                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2159                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2160                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2161                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2162                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2163                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2164                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2165                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2166                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2167                                         "force_igmp_version"),
2168                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2169                                         "igmpv2_unsolicited_report_interval"),
2170                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2171                                         "igmpv3_unsolicited_report_interval"),
2172
2173                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2174                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2175                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2176                                               "promote_secondaries"),
2177                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2178                                               "route_localnet"),
2179         },
2180 };
2181
2182 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2183                                         struct ipv4_devconf *p)
2184 {
2185         int i;
2186         struct devinet_sysctl_table *t;
2187         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2188
2189         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2190         if (!t)
2191                 goto out;
2192
2193         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2194                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2195                 t->devinet_vars[i].extra1 = p;
2196                 t->devinet_vars[i].extra2 = net;
2197         }
2198
2199         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2200
2201         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2202         if (!t->sysctl_header)
2203                 goto free;
2204
2205         p->sysctl = t;
2206         return 0;
2207
2208 free:
2209         kfree(t);
2210 out:
2211         return -ENOBUFS;
2212 }
2213
2214 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2215 {
2216         struct devinet_sysctl_table *t = cnf->sysctl;
2217
2218         if (!t)
2219                 return;
2220
2221         cnf->sysctl = NULL;
2222         unregister_net_sysctl_table(t->sysctl_header);
2223         kfree(t);
2224 }
2225
2226 static int devinet_sysctl_register(struct in_device *idev)
2227 {
2228         int err;
2229
2230         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2231                 return -EINVAL;
2232
2233         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2234         if (err)
2235                 return err;
2236         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2237                                         &idev->cnf);
2238         if (err)
2239                 neigh_sysctl_unregister(idev->arp_parms);
2240         return err;
2241 }
2242
2243 static void devinet_sysctl_unregister(struct in_device *idev)
2244 {
2245         __devinet_sysctl_unregister(&idev->cnf);
2246         neigh_sysctl_unregister(idev->arp_parms);
2247 }
2248
2249 static struct ctl_table ctl_forward_entry[] = {
2250         {
2251                 .procname       = "ip_forward",
2252                 .data           = &ipv4_devconf.data[
2253                                         IPV4_DEVCONF_FORWARDING - 1],
2254                 .maxlen         = sizeof(int),
2255                 .mode           = 0644,
2256                 .proc_handler   = devinet_sysctl_forward,
2257                 .extra1         = &ipv4_devconf,
2258                 .extra2         = &init_net,
2259         },
2260         { },
2261 };
2262 #endif
2263
2264 static __net_init int devinet_init_net(struct net *net)
2265 {
2266         int err;
2267         struct ipv4_devconf *all, *dflt;
2268 #ifdef CONFIG_SYSCTL
2269         struct ctl_table *tbl = ctl_forward_entry;
2270         struct ctl_table_header *forw_hdr;
2271 #endif
2272
2273         err = -ENOMEM;
2274         all = &ipv4_devconf;
2275         dflt = &ipv4_devconf_dflt;
2276
2277         if (!net_eq(net, &init_net)) {
2278                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2279                 if (!all)
2280                         goto err_alloc_all;
2281
2282                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2283                 if (!dflt)
2284                         goto err_alloc_dflt;
2285
2286 #ifdef CONFIG_SYSCTL
2287                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2288                 if (!tbl)
2289                         goto err_alloc_ctl;
2290
2291                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2292                 tbl[0].extra1 = all;
2293                 tbl[0].extra2 = net;
2294 #endif
2295         }
2296
2297 #ifdef CONFIG_SYSCTL
2298         err = __devinet_sysctl_register(net, "all", all);
2299         if (err < 0)
2300                 goto err_reg_all;
2301
2302         err = __devinet_sysctl_register(net, "default", dflt);
2303         if (err < 0)
2304                 goto err_reg_dflt;
2305
2306         err = -ENOMEM;
2307         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2308         if (!forw_hdr)
2309                 goto err_reg_ctl;
2310         net->ipv4.forw_hdr = forw_hdr;
2311 #endif
2312
2313         net->ipv4.devconf_all = all;
2314         net->ipv4.devconf_dflt = dflt;
2315         return 0;
2316
2317 #ifdef CONFIG_SYSCTL
2318 err_reg_ctl:
2319         __devinet_sysctl_unregister(dflt);
2320 err_reg_dflt:
2321         __devinet_sysctl_unregister(all);
2322 err_reg_all:
2323         if (tbl != ctl_forward_entry)
2324                 kfree(tbl);
2325 err_alloc_ctl:
2326 #endif
2327         if (dflt != &ipv4_devconf_dflt)
2328                 kfree(dflt);
2329 err_alloc_dflt:
2330         if (all != &ipv4_devconf)
2331                 kfree(all);
2332 err_alloc_all:
2333         return err;
2334 }
2335
2336 static __net_exit void devinet_exit_net(struct net *net)
2337 {
2338 #ifdef CONFIG_SYSCTL
2339         struct ctl_table *tbl;
2340
2341         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2342         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2343         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2344         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2345         kfree(tbl);
2346 #endif
2347         kfree(net->ipv4.devconf_dflt);
2348         kfree(net->ipv4.devconf_all);
2349 }
2350
2351 static __net_initdata struct pernet_operations devinet_ops = {
2352         .init = devinet_init_net,
2353         .exit = devinet_exit_net,
2354 };
2355
2356 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2357         .family           = AF_INET,
2358         .fill_link_af     = inet_fill_link_af,
2359         .get_link_af_size = inet_get_link_af_size,
2360         .validate_link_af = inet_validate_link_af,
2361         .set_link_af      = inet_set_link_af,
2362 };
2363
2364 void __init devinet_init(void)
2365 {
2366         int i;
2367
2368         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2369                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2370
2371         register_pernet_subsys(&devinet_ops);
2372
2373         register_gifconf(PF_INET, inet_gifconf);
2374         register_netdevice_notifier(&ip_netdev_notifier);
2375
2376         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2377
2378         rtnl_af_register(&inet_af_ops);
2379
2380         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2381         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2382         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2383         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2384                       inet_netconf_dump_devconf, NULL);
2385 }
2386