Upgrade to 4.4.50-rt62
[kvmfornfv.git] / kernel / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(const struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         u32 hash = inet_addr_hash(net, addr);
142         struct net_device *result = NULL;
143         struct in_ifaddr *ifa;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147                 if (ifa->ifa_local == addr) {
148                         struct net_device *dev = ifa->ifa_dev->dev;
149
150                         if (!net_eq(dev_net(dev), net))
151                                 continue;
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188         return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205         if (ifa->ifa_dev)
206                 in_dev_put(ifa->ifa_dev);
207         kfree(ifa);
208 }
209
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217         struct net_device *dev = idev->dev;
218
219         WARN_ON(idev->ifa_list);
220         WARN_ON(idev->mc_list);
221         kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225         dev_put(dev);
226         if (!idev->dead)
227                 pr_err("Freeing alive in_device %p\n", idev);
228         else
229                 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235         struct in_device *in_dev;
236         int err = -ENOMEM;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         in_dev_hold(in_dev);
256
257         err = devinet_sysctl_register(in_dev);
258         if (err) {
259                 in_dev->dead = 1;
260                 in_dev_put(in_dev);
261                 in_dev = NULL;
262                 goto out;
263         }
264         ip_mc_init_dev(in_dev);
265         if (dev->flags & IFF_UP)
266                 ip_mc_up(in_dev);
267
268         /* we can receive as soon as ip_ptr is set -- do this last */
269         rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271         return in_dev ?: ERR_PTR(err);
272 out_kfree:
273         kfree(in_dev);
274         in_dev = NULL;
275         goto out;
276 }
277
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280         struct in_device *idev = container_of(head, struct in_device, rcu_head);
281         in_dev_put(idev);
282 }
283
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286         struct in_ifaddr *ifa;
287         struct net_device *dev;
288
289         ASSERT_RTNL();
290
291         dev = in_dev->dev;
292
293         in_dev->dead = 1;
294
295         ip_mc_destroy_dev(in_dev);
296
297         while ((ifa = in_dev->ifa_list) != NULL) {
298                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299                 inet_free_ifa(ifa);
300         }
301
302         RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304         devinet_sysctl_unregister(in_dev);
305         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306         arp_ifdown(dev);
307
308         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313         rcu_read_lock();
314         for_primary_ifa(in_dev) {
315                 if (inet_ifa_match(a, ifa)) {
316                         if (!b || inet_ifa_match(b, ifa)) {
317                                 rcu_read_unlock();
318                                 return 1;
319                         }
320                 }
321         } endfor_ifa(in_dev);
322         rcu_read_unlock();
323         return 0;
324 }
325
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327                          int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329         struct in_ifaddr *promote = NULL;
330         struct in_ifaddr *ifa, *ifa1 = *ifap;
331         struct in_ifaddr *last_prim = in_dev->ifa_list;
332         struct in_ifaddr *prev_prom = NULL;
333         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335         ASSERT_RTNL();
336
337         if (in_dev->dead)
338                 goto no_promotions;
339
340         /* 1. Deleting primary ifaddr forces deletion all secondaries
341          * unless alias promotion is set
342          **/
343
344         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
345                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
346
347                 while ((ifa = *ifap1) != NULL) {
348                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
349                             ifa1->ifa_scope <= ifa->ifa_scope)
350                                 last_prim = ifa;
351
352                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
353                             ifa1->ifa_mask != ifa->ifa_mask ||
354                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
355                                 ifap1 = &ifa->ifa_next;
356                                 prev_prom = ifa;
357                                 continue;
358                         }
359
360                         if (!do_promote) {
361                                 inet_hash_remove(ifa);
362                                 *ifap1 = ifa->ifa_next;
363
364                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
365                                 blocking_notifier_call_chain(&inetaddr_chain,
366                                                 NETDEV_DOWN, ifa);
367                                 inet_free_ifa(ifa);
368                         } else {
369                                 promote = ifa;
370                                 break;
371                         }
372                 }
373         }
374
375         /* On promotion all secondaries from subnet are changing
376          * the primary IP, we must remove all their routes silently
377          * and later to add them back with new prefsrc. Do this
378          * while all addresses are on the device list.
379          */
380         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
381                 if (ifa1->ifa_mask == ifa->ifa_mask &&
382                     inet_ifa_match(ifa1->ifa_address, ifa))
383                         fib_del_ifaddr(ifa, ifa1);
384         }
385
386 no_promotions:
387         /* 2. Unlink it */
388
389         *ifap = ifa1->ifa_next;
390         inet_hash_remove(ifa1);
391
392         /* 3. Announce address deletion */
393
394         /* Send message first, then call notifier.
395            At first sight, FIB update triggered by notifier
396            will refer to already deleted ifaddr, that could confuse
397            netlink listeners. It is not true: look, gated sees
398            that route deleted and if it still thinks that ifaddr
399            is valid, it will try to restore deleted routes... Grr.
400            So that, this order is correct.
401          */
402         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
403         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
404
405         if (promote) {
406                 struct in_ifaddr *next_sec = promote->ifa_next;
407
408                 if (prev_prom) {
409                         prev_prom->ifa_next = promote->ifa_next;
410                         promote->ifa_next = last_prim->ifa_next;
411                         last_prim->ifa_next = promote;
412                 }
413
414                 promote->ifa_flags &= ~IFA_F_SECONDARY;
415                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
416                 blocking_notifier_call_chain(&inetaddr_chain,
417                                 NETDEV_UP, promote);
418                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
419                         if (ifa1->ifa_mask != ifa->ifa_mask ||
420                             !inet_ifa_match(ifa1->ifa_address, ifa))
421                                         continue;
422                         fib_add_ifaddr(ifa);
423                 }
424
425         }
426         if (destroy)
427                 inet_free_ifa(ifa1);
428 }
429
430 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
431                          int destroy)
432 {
433         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
434 }
435
436 static void check_lifetime(struct work_struct *work);
437
438 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
439
440 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
441                              u32 portid)
442 {
443         struct in_device *in_dev = ifa->ifa_dev;
444         struct in_ifaddr *ifa1, **ifap, **last_primary;
445
446         ASSERT_RTNL();
447
448         if (!ifa->ifa_local) {
449                 inet_free_ifa(ifa);
450                 return 0;
451         }
452
453         ifa->ifa_flags &= ~IFA_F_SECONDARY;
454         last_primary = &in_dev->ifa_list;
455
456         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
457              ifap = &ifa1->ifa_next) {
458                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
459                     ifa->ifa_scope <= ifa1->ifa_scope)
460                         last_primary = &ifa1->ifa_next;
461                 if (ifa1->ifa_mask == ifa->ifa_mask &&
462                     inet_ifa_match(ifa1->ifa_address, ifa)) {
463                         if (ifa1->ifa_local == ifa->ifa_local) {
464                                 inet_free_ifa(ifa);
465                                 return -EEXIST;
466                         }
467                         if (ifa1->ifa_scope != ifa->ifa_scope) {
468                                 inet_free_ifa(ifa);
469                                 return -EINVAL;
470                         }
471                         ifa->ifa_flags |= IFA_F_SECONDARY;
472                 }
473         }
474
475         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
476                 prandom_seed((__force u32) ifa->ifa_local);
477                 ifap = last_primary;
478         }
479
480         ifa->ifa_next = *ifap;
481         *ifap = ifa;
482
483         inet_hash_insert(dev_net(in_dev->dev), ifa);
484
485         cancel_delayed_work(&check_lifetime_work);
486         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
487
488         /* Send message first, then call notifier.
489            Notifier will trigger FIB update, so that
490            listeners of netlink will know about new ifaddr */
491         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
492         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
493
494         return 0;
495 }
496
497 static int inet_insert_ifa(struct in_ifaddr *ifa)
498 {
499         return __inet_insert_ifa(ifa, NULL, 0);
500 }
501
502 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
503 {
504         struct in_device *in_dev = __in_dev_get_rtnl(dev);
505
506         ASSERT_RTNL();
507
508         if (!in_dev) {
509                 inet_free_ifa(ifa);
510                 return -ENOBUFS;
511         }
512         ipv4_devconf_setall(in_dev);
513         neigh_parms_data_state_setall(in_dev->arp_parms);
514         if (ifa->ifa_dev != in_dev) {
515                 WARN_ON(ifa->ifa_dev);
516                 in_dev_hold(in_dev);
517                 ifa->ifa_dev = in_dev;
518         }
519         if (ipv4_is_loopback(ifa->ifa_local))
520                 ifa->ifa_scope = RT_SCOPE_HOST;
521         return inet_insert_ifa(ifa);
522 }
523
524 /* Caller must hold RCU or RTNL :
525  * We dont take a reference on found in_device
526  */
527 struct in_device *inetdev_by_index(struct net *net, int ifindex)
528 {
529         struct net_device *dev;
530         struct in_device *in_dev = NULL;
531
532         rcu_read_lock();
533         dev = dev_get_by_index_rcu(net, ifindex);
534         if (dev)
535                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
536         rcu_read_unlock();
537         return in_dev;
538 }
539 EXPORT_SYMBOL(inetdev_by_index);
540
541 /* Called only from RTNL semaphored context. No locks. */
542
543 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
544                                     __be32 mask)
545 {
546         ASSERT_RTNL();
547
548         for_primary_ifa(in_dev) {
549                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
550                         return ifa;
551         } endfor_ifa(in_dev);
552         return NULL;
553 }
554
555 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
556 {
557         struct ip_mreqn mreq = {
558                 .imr_multiaddr.s_addr = ifa->ifa_address,
559                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
560         };
561         int ret;
562
563         ASSERT_RTNL();
564
565         lock_sock(sk);
566         if (join)
567                 ret = ip_mc_join_group(sk, &mreq);
568         else
569                 ret = ip_mc_leave_group(sk, &mreq);
570         release_sock(sk);
571
572         return ret;
573 }
574
575 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
576 {
577         struct net *net = sock_net(skb->sk);
578         struct nlattr *tb[IFA_MAX+1];
579         struct in_device *in_dev;
580         struct ifaddrmsg *ifm;
581         struct in_ifaddr *ifa, **ifap;
582         int err = -EINVAL;
583
584         ASSERT_RTNL();
585
586         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
587         if (err < 0)
588                 goto errout;
589
590         ifm = nlmsg_data(nlh);
591         in_dev = inetdev_by_index(net, ifm->ifa_index);
592         if (!in_dev) {
593                 err = -ENODEV;
594                 goto errout;
595         }
596
597         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
598              ifap = &ifa->ifa_next) {
599                 if (tb[IFA_LOCAL] &&
600                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
601                         continue;
602
603                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
604                         continue;
605
606                 if (tb[IFA_ADDRESS] &&
607                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
608                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
609                         continue;
610
611                 if (ipv4_is_multicast(ifa->ifa_address))
612                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
613                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
614                 return 0;
615         }
616
617         err = -EADDRNOTAVAIL;
618 errout:
619         return err;
620 }
621
622 #define INFINITY_LIFE_TIME      0xFFFFFFFF
623
624 static void check_lifetime(struct work_struct *work)
625 {
626         unsigned long now, next, next_sec, next_sched;
627         struct in_ifaddr *ifa;
628         struct hlist_node *n;
629         int i;
630
631         now = jiffies;
632         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
633
634         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
635                 bool change_needed = false;
636
637                 rcu_read_lock();
638                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
639                         unsigned long age;
640
641                         if (ifa->ifa_flags & IFA_F_PERMANENT)
642                                 continue;
643
644                         /* We try to batch several events at once. */
645                         age = (now - ifa->ifa_tstamp +
646                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
647
648                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
649                             age >= ifa->ifa_valid_lft) {
650                                 change_needed = true;
651                         } else if (ifa->ifa_preferred_lft ==
652                                    INFINITY_LIFE_TIME) {
653                                 continue;
654                         } else if (age >= ifa->ifa_preferred_lft) {
655                                 if (time_before(ifa->ifa_tstamp +
656                                                 ifa->ifa_valid_lft * HZ, next))
657                                         next = ifa->ifa_tstamp +
658                                                ifa->ifa_valid_lft * HZ;
659
660                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
661                                         change_needed = true;
662                         } else if (time_before(ifa->ifa_tstamp +
663                                                ifa->ifa_preferred_lft * HZ,
664                                                next)) {
665                                 next = ifa->ifa_tstamp +
666                                        ifa->ifa_preferred_lft * HZ;
667                         }
668                 }
669                 rcu_read_unlock();
670                 if (!change_needed)
671                         continue;
672                 rtnl_lock();
673                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
674                         unsigned long age;
675
676                         if (ifa->ifa_flags & IFA_F_PERMANENT)
677                                 continue;
678
679                         /* We try to batch several events at once. */
680                         age = (now - ifa->ifa_tstamp +
681                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
682
683                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
684                             age >= ifa->ifa_valid_lft) {
685                                 struct in_ifaddr **ifap;
686
687                                 for (ifap = &ifa->ifa_dev->ifa_list;
688                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
689                                         if (*ifap == ifa) {
690                                                 inet_del_ifa(ifa->ifa_dev,
691                                                              ifap, 1);
692                                                 break;
693                                         }
694                                 }
695                         } else if (ifa->ifa_preferred_lft !=
696                                    INFINITY_LIFE_TIME &&
697                                    age >= ifa->ifa_preferred_lft &&
698                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
699                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
700                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
701                         }
702                 }
703                 rtnl_unlock();
704         }
705
706         next_sec = round_jiffies_up(next);
707         next_sched = next;
708
709         /* If rounded timeout is accurate enough, accept it. */
710         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
711                 next_sched = next_sec;
712
713         now = jiffies;
714         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
715         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
716                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
717
718         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
719                         next_sched - now);
720 }
721
722 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
723                              __u32 prefered_lft)
724 {
725         unsigned long timeout;
726
727         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
728
729         timeout = addrconf_timeout_fixup(valid_lft, HZ);
730         if (addrconf_finite_timeout(timeout))
731                 ifa->ifa_valid_lft = timeout;
732         else
733                 ifa->ifa_flags |= IFA_F_PERMANENT;
734
735         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
736         if (addrconf_finite_timeout(timeout)) {
737                 if (timeout == 0)
738                         ifa->ifa_flags |= IFA_F_DEPRECATED;
739                 ifa->ifa_preferred_lft = timeout;
740         }
741         ifa->ifa_tstamp = jiffies;
742         if (!ifa->ifa_cstamp)
743                 ifa->ifa_cstamp = ifa->ifa_tstamp;
744 }
745
746 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
747                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
748 {
749         struct nlattr *tb[IFA_MAX+1];
750         struct in_ifaddr *ifa;
751         struct ifaddrmsg *ifm;
752         struct net_device *dev;
753         struct in_device *in_dev;
754         int err;
755
756         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
757         if (err < 0)
758                 goto errout;
759
760         ifm = nlmsg_data(nlh);
761         err = -EINVAL;
762         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
763                 goto errout;
764
765         dev = __dev_get_by_index(net, ifm->ifa_index);
766         err = -ENODEV;
767         if (!dev)
768                 goto errout;
769
770         in_dev = __in_dev_get_rtnl(dev);
771         err = -ENOBUFS;
772         if (!in_dev)
773                 goto errout;
774
775         ifa = inet_alloc_ifa();
776         if (!ifa)
777                 /*
778                  * A potential indev allocation can be left alive, it stays
779                  * assigned to its device and is destroy with it.
780                  */
781                 goto errout;
782
783         ipv4_devconf_setall(in_dev);
784         neigh_parms_data_state_setall(in_dev->arp_parms);
785         in_dev_hold(in_dev);
786
787         if (!tb[IFA_ADDRESS])
788                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
789
790         INIT_HLIST_NODE(&ifa->hash);
791         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
792         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
793         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
794                                          ifm->ifa_flags;
795         ifa->ifa_scope = ifm->ifa_scope;
796         ifa->ifa_dev = in_dev;
797
798         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
799         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
800
801         if (tb[IFA_BROADCAST])
802                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
803
804         if (tb[IFA_LABEL])
805                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
806         else
807                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
808
809         if (tb[IFA_CACHEINFO]) {
810                 struct ifa_cacheinfo *ci;
811
812                 ci = nla_data(tb[IFA_CACHEINFO]);
813                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
814                         err = -EINVAL;
815                         goto errout_free;
816                 }
817                 *pvalid_lft = ci->ifa_valid;
818                 *pprefered_lft = ci->ifa_prefered;
819         }
820
821         return ifa;
822
823 errout_free:
824         inet_free_ifa(ifa);
825 errout:
826         return ERR_PTR(err);
827 }
828
829 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
830 {
831         struct in_device *in_dev = ifa->ifa_dev;
832         struct in_ifaddr *ifa1, **ifap;
833
834         if (!ifa->ifa_local)
835                 return NULL;
836
837         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
838              ifap = &ifa1->ifa_next) {
839                 if (ifa1->ifa_mask == ifa->ifa_mask &&
840                     inet_ifa_match(ifa1->ifa_address, ifa) &&
841                     ifa1->ifa_local == ifa->ifa_local)
842                         return ifa1;
843         }
844         return NULL;
845 }
846
847 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
848 {
849         struct net *net = sock_net(skb->sk);
850         struct in_ifaddr *ifa;
851         struct in_ifaddr *ifa_existing;
852         __u32 valid_lft = INFINITY_LIFE_TIME;
853         __u32 prefered_lft = INFINITY_LIFE_TIME;
854
855         ASSERT_RTNL();
856
857         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
858         if (IS_ERR(ifa))
859                 return PTR_ERR(ifa);
860
861         ifa_existing = find_matching_ifa(ifa);
862         if (!ifa_existing) {
863                 /* It would be best to check for !NLM_F_CREATE here but
864                  * userspace already relies on not having to provide this.
865                  */
866                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
867                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
868                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
869                                                true, ifa);
870
871                         if (ret < 0) {
872                                 inet_free_ifa(ifa);
873                                 return ret;
874                         }
875                 }
876                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
877         } else {
878                 inet_free_ifa(ifa);
879
880                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
881                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
882                         return -EEXIST;
883                 ifa = ifa_existing;
884                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
885                 cancel_delayed_work(&check_lifetime_work);
886                 queue_delayed_work(system_power_efficient_wq,
887                                 &check_lifetime_work, 0);
888                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
889         }
890         return 0;
891 }
892
893 /*
894  *      Determine a default network mask, based on the IP address.
895  */
896
897 static int inet_abc_len(__be32 addr)
898 {
899         int rc = -1;    /* Something else, probably a multicast. */
900
901         if (ipv4_is_zeronet(addr))
902                 rc = 0;
903         else {
904                 __u32 haddr = ntohl(addr);
905
906                 if (IN_CLASSA(haddr))
907                         rc = 8;
908                 else if (IN_CLASSB(haddr))
909                         rc = 16;
910                 else if (IN_CLASSC(haddr))
911                         rc = 24;
912         }
913
914         return rc;
915 }
916
917
918 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
919 {
920         struct ifreq ifr;
921         struct sockaddr_in sin_orig;
922         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
923         struct in_device *in_dev;
924         struct in_ifaddr **ifap = NULL;
925         struct in_ifaddr *ifa = NULL;
926         struct net_device *dev;
927         char *colon;
928         int ret = -EFAULT;
929         int tryaddrmatch = 0;
930
931         /*
932          *      Fetch the caller's info block into kernel space
933          */
934
935         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
936                 goto out;
937         ifr.ifr_name[IFNAMSIZ - 1] = 0;
938
939         /* save original address for comparison */
940         memcpy(&sin_orig, sin, sizeof(*sin));
941
942         colon = strchr(ifr.ifr_name, ':');
943         if (colon)
944                 *colon = 0;
945
946         dev_load(net, ifr.ifr_name);
947
948         switch (cmd) {
949         case SIOCGIFADDR:       /* Get interface address */
950         case SIOCGIFBRDADDR:    /* Get the broadcast address */
951         case SIOCGIFDSTADDR:    /* Get the destination address */
952         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
953                 /* Note that these ioctls will not sleep,
954                    so that we do not impose a lock.
955                    One day we will be forced to put shlock here (I mean SMP)
956                  */
957                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
958                 memset(sin, 0, sizeof(*sin));
959                 sin->sin_family = AF_INET;
960                 break;
961
962         case SIOCSIFFLAGS:
963                 ret = -EPERM;
964                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
965                         goto out;
966                 break;
967         case SIOCSIFADDR:       /* Set interface address (and family) */
968         case SIOCSIFBRDADDR:    /* Set the broadcast address */
969         case SIOCSIFDSTADDR:    /* Set the destination address */
970         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
971                 ret = -EPERM;
972                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
973                         goto out;
974                 ret = -EINVAL;
975                 if (sin->sin_family != AF_INET)
976                         goto out;
977                 break;
978         default:
979                 ret = -EINVAL;
980                 goto out;
981         }
982
983         rtnl_lock();
984
985         ret = -ENODEV;
986         dev = __dev_get_by_name(net, ifr.ifr_name);
987         if (!dev)
988                 goto done;
989
990         if (colon)
991                 *colon = ':';
992
993         in_dev = __in_dev_get_rtnl(dev);
994         if (in_dev) {
995                 if (tryaddrmatch) {
996                         /* Matthias Andree */
997                         /* compare label and address (4.4BSD style) */
998                         /* note: we only do this for a limited set of ioctls
999                            and only if the original address family was AF_INET.
1000                            This is checked above. */
1001                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1002                              ifap = &ifa->ifa_next) {
1003                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1004                                     sin_orig.sin_addr.s_addr ==
1005                                                         ifa->ifa_local) {
1006                                         break; /* found */
1007                                 }
1008                         }
1009                 }
1010                 /* we didn't get a match, maybe the application is
1011                    4.3BSD-style and passed in junk so we fall back to
1012                    comparing just the label */
1013                 if (!ifa) {
1014                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1015                              ifap = &ifa->ifa_next)
1016                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1017                                         break;
1018                 }
1019         }
1020
1021         ret = -EADDRNOTAVAIL;
1022         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1023                 goto done;
1024
1025         switch (cmd) {
1026         case SIOCGIFADDR:       /* Get interface address */
1027                 sin->sin_addr.s_addr = ifa->ifa_local;
1028                 goto rarok;
1029
1030         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1031                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1032                 goto rarok;
1033
1034         case SIOCGIFDSTADDR:    /* Get the destination address */
1035                 sin->sin_addr.s_addr = ifa->ifa_address;
1036                 goto rarok;
1037
1038         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1039                 sin->sin_addr.s_addr = ifa->ifa_mask;
1040                 goto rarok;
1041
1042         case SIOCSIFFLAGS:
1043                 if (colon) {
1044                         ret = -EADDRNOTAVAIL;
1045                         if (!ifa)
1046                                 break;
1047                         ret = 0;
1048                         if (!(ifr.ifr_flags & IFF_UP))
1049                                 inet_del_ifa(in_dev, ifap, 1);
1050                         break;
1051                 }
1052                 ret = dev_change_flags(dev, ifr.ifr_flags);
1053                 break;
1054
1055         case SIOCSIFADDR:       /* Set interface address (and family) */
1056                 ret = -EINVAL;
1057                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1058                         break;
1059
1060                 if (!ifa) {
1061                         ret = -ENOBUFS;
1062                         ifa = inet_alloc_ifa();
1063                         if (!ifa)
1064                                 break;
1065                         INIT_HLIST_NODE(&ifa->hash);
1066                         if (colon)
1067                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1068                         else
1069                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1070                 } else {
1071                         ret = 0;
1072                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1073                                 break;
1074                         inet_del_ifa(in_dev, ifap, 0);
1075                         ifa->ifa_broadcast = 0;
1076                         ifa->ifa_scope = 0;
1077                 }
1078
1079                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1080
1081                 if (!(dev->flags & IFF_POINTOPOINT)) {
1082                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1083                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1084                         if ((dev->flags & IFF_BROADCAST) &&
1085                             ifa->ifa_prefixlen < 31)
1086                                 ifa->ifa_broadcast = ifa->ifa_address |
1087                                                      ~ifa->ifa_mask;
1088                 } else {
1089                         ifa->ifa_prefixlen = 32;
1090                         ifa->ifa_mask = inet_make_mask(32);
1091                 }
1092                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1093                 ret = inet_set_ifa(dev, ifa);
1094                 break;
1095
1096         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1097                 ret = 0;
1098                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1099                         inet_del_ifa(in_dev, ifap, 0);
1100                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1101                         inet_insert_ifa(ifa);
1102                 }
1103                 break;
1104
1105         case SIOCSIFDSTADDR:    /* Set the destination address */
1106                 ret = 0;
1107                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1108                         break;
1109                 ret = -EINVAL;
1110                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1111                         break;
1112                 ret = 0;
1113                 inet_del_ifa(in_dev, ifap, 0);
1114                 ifa->ifa_address = sin->sin_addr.s_addr;
1115                 inet_insert_ifa(ifa);
1116                 break;
1117
1118         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1119
1120                 /*
1121                  *      The mask we set must be legal.
1122                  */
1123                 ret = -EINVAL;
1124                 if (bad_mask(sin->sin_addr.s_addr, 0))
1125                         break;
1126                 ret = 0;
1127                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1128                         __be32 old_mask = ifa->ifa_mask;
1129                         inet_del_ifa(in_dev, ifap, 0);
1130                         ifa->ifa_mask = sin->sin_addr.s_addr;
1131                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1132
1133                         /* See if current broadcast address matches
1134                          * with current netmask, then recalculate
1135                          * the broadcast address. Otherwise it's a
1136                          * funny address, so don't touch it since
1137                          * the user seems to know what (s)he's doing...
1138                          */
1139                         if ((dev->flags & IFF_BROADCAST) &&
1140                             (ifa->ifa_prefixlen < 31) &&
1141                             (ifa->ifa_broadcast ==
1142                              (ifa->ifa_local|~old_mask))) {
1143                                 ifa->ifa_broadcast = (ifa->ifa_local |
1144                                                       ~sin->sin_addr.s_addr);
1145                         }
1146                         inet_insert_ifa(ifa);
1147                 }
1148                 break;
1149         }
1150 done:
1151         rtnl_unlock();
1152 out:
1153         return ret;
1154 rarok:
1155         rtnl_unlock();
1156         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1157         goto out;
1158 }
1159
1160 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1161 {
1162         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1163         struct in_ifaddr *ifa;
1164         struct ifreq ifr;
1165         int done = 0;
1166
1167         if (!in_dev)
1168                 goto out;
1169
1170         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1171                 if (!buf) {
1172                         done += sizeof(ifr);
1173                         continue;
1174                 }
1175                 if (len < (int) sizeof(ifr))
1176                         break;
1177                 memset(&ifr, 0, sizeof(struct ifreq));
1178                 strcpy(ifr.ifr_name, ifa->ifa_label);
1179
1180                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1181                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1182                                                                 ifa->ifa_local;
1183
1184                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1185                         done = -EFAULT;
1186                         break;
1187                 }
1188                 buf  += sizeof(struct ifreq);
1189                 len  -= sizeof(struct ifreq);
1190                 done += sizeof(struct ifreq);
1191         }
1192 out:
1193         return done;
1194 }
1195
1196 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1197 {
1198         __be32 addr = 0;
1199         struct in_device *in_dev;
1200         struct net *net = dev_net(dev);
1201
1202         rcu_read_lock();
1203         in_dev = __in_dev_get_rcu(dev);
1204         if (!in_dev)
1205                 goto no_in_dev;
1206
1207         for_primary_ifa(in_dev) {
1208                 if (ifa->ifa_scope > scope)
1209                         continue;
1210                 if (!dst || inet_ifa_match(dst, ifa)) {
1211                         addr = ifa->ifa_local;
1212                         break;
1213                 }
1214                 if (!addr)
1215                         addr = ifa->ifa_local;
1216         } endfor_ifa(in_dev);
1217
1218         if (addr)
1219                 goto out_unlock;
1220 no_in_dev:
1221
1222         /* Not loopback addresses on loopback should be preferred
1223            in this case. It is important that lo is the first interface
1224            in dev_base list.
1225          */
1226         for_each_netdev_rcu(net, dev) {
1227                 in_dev = __in_dev_get_rcu(dev);
1228                 if (!in_dev)
1229                         continue;
1230
1231                 for_primary_ifa(in_dev) {
1232                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1233                             ifa->ifa_scope <= scope) {
1234                                 addr = ifa->ifa_local;
1235                                 goto out_unlock;
1236                         }
1237                 } endfor_ifa(in_dev);
1238         }
1239 out_unlock:
1240         rcu_read_unlock();
1241         return addr;
1242 }
1243 EXPORT_SYMBOL(inet_select_addr);
1244
1245 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1246                               __be32 local, int scope)
1247 {
1248         int same = 0;
1249         __be32 addr = 0;
1250
1251         for_ifa(in_dev) {
1252                 if (!addr &&
1253                     (local == ifa->ifa_local || !local) &&
1254                     ifa->ifa_scope <= scope) {
1255                         addr = ifa->ifa_local;
1256                         if (same)
1257                                 break;
1258                 }
1259                 if (!same) {
1260                         same = (!local || inet_ifa_match(local, ifa)) &&
1261                                 (!dst || inet_ifa_match(dst, ifa));
1262                         if (same && addr) {
1263                                 if (local || !dst)
1264                                         break;
1265                                 /* Is the selected addr into dst subnet? */
1266                                 if (inet_ifa_match(addr, ifa))
1267                                         break;
1268                                 /* No, then can we use new local src? */
1269                                 if (ifa->ifa_scope <= scope) {
1270                                         addr = ifa->ifa_local;
1271                                         break;
1272                                 }
1273                                 /* search for large dst subnet for addr */
1274                                 same = 0;
1275                         }
1276                 }
1277         } endfor_ifa(in_dev);
1278
1279         return same ? addr : 0;
1280 }
1281
1282 /*
1283  * Confirm that local IP address exists using wildcards:
1284  * - net: netns to check, cannot be NULL
1285  * - in_dev: only on this interface, NULL=any interface
1286  * - dst: only in the same subnet as dst, 0=any dst
1287  * - local: address, 0=autoselect the local address
1288  * - scope: maximum allowed scope value for the local address
1289  */
1290 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1291                          __be32 dst, __be32 local, int scope)
1292 {
1293         __be32 addr = 0;
1294         struct net_device *dev;
1295
1296         if (in_dev)
1297                 return confirm_addr_indev(in_dev, dst, local, scope);
1298
1299         rcu_read_lock();
1300         for_each_netdev_rcu(net, dev) {
1301                 in_dev = __in_dev_get_rcu(dev);
1302                 if (in_dev) {
1303                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1304                         if (addr)
1305                                 break;
1306                 }
1307         }
1308         rcu_read_unlock();
1309
1310         return addr;
1311 }
1312 EXPORT_SYMBOL(inet_confirm_addr);
1313
1314 /*
1315  *      Device notifier
1316  */
1317
1318 int register_inetaddr_notifier(struct notifier_block *nb)
1319 {
1320         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1321 }
1322 EXPORT_SYMBOL(register_inetaddr_notifier);
1323
1324 int unregister_inetaddr_notifier(struct notifier_block *nb)
1325 {
1326         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1327 }
1328 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1329
1330 /* Rename ifa_labels for a device name change. Make some effort to preserve
1331  * existing alias numbering and to create unique labels if possible.
1332 */
1333 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1334 {
1335         struct in_ifaddr *ifa;
1336         int named = 0;
1337
1338         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1339                 char old[IFNAMSIZ], *dot;
1340
1341                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1342                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1343                 if (named++ == 0)
1344                         goto skip;
1345                 dot = strchr(old, ':');
1346                 if (!dot) {
1347                         sprintf(old, ":%d", named);
1348                         dot = old;
1349                 }
1350                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1351                         strcat(ifa->ifa_label, dot);
1352                 else
1353                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1354 skip:
1355                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1356         }
1357 }
1358
1359 static bool inetdev_valid_mtu(unsigned int mtu)
1360 {
1361         return mtu >= 68;
1362 }
1363
1364 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1365                                         struct in_device *in_dev)
1366
1367 {
1368         struct in_ifaddr *ifa;
1369
1370         for (ifa = in_dev->ifa_list; ifa;
1371              ifa = ifa->ifa_next) {
1372                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1373                          ifa->ifa_local, dev,
1374                          ifa->ifa_local, NULL,
1375                          dev->dev_addr, NULL);
1376         }
1377 }
1378
1379 /* Called only under RTNL semaphore */
1380
1381 static int inetdev_event(struct notifier_block *this, unsigned long event,
1382                          void *ptr)
1383 {
1384         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1385         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1386
1387         ASSERT_RTNL();
1388
1389         if (!in_dev) {
1390                 if (event == NETDEV_REGISTER) {
1391                         in_dev = inetdev_init(dev);
1392                         if (IS_ERR(in_dev))
1393                                 return notifier_from_errno(PTR_ERR(in_dev));
1394                         if (dev->flags & IFF_LOOPBACK) {
1395                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1396                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1397                         }
1398                 } else if (event == NETDEV_CHANGEMTU) {
1399                         /* Re-enabling IP */
1400                         if (inetdev_valid_mtu(dev->mtu))
1401                                 in_dev = inetdev_init(dev);
1402                 }
1403                 goto out;
1404         }
1405
1406         switch (event) {
1407         case NETDEV_REGISTER:
1408                 pr_debug("%s: bug\n", __func__);
1409                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1410                 break;
1411         case NETDEV_UP:
1412                 if (!inetdev_valid_mtu(dev->mtu))
1413                         break;
1414                 if (dev->flags & IFF_LOOPBACK) {
1415                         struct in_ifaddr *ifa = inet_alloc_ifa();
1416
1417                         if (ifa) {
1418                                 INIT_HLIST_NODE(&ifa->hash);
1419                                 ifa->ifa_local =
1420                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1421                                 ifa->ifa_prefixlen = 8;
1422                                 ifa->ifa_mask = inet_make_mask(8);
1423                                 in_dev_hold(in_dev);
1424                                 ifa->ifa_dev = in_dev;
1425                                 ifa->ifa_scope = RT_SCOPE_HOST;
1426                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1427                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1428                                                  INFINITY_LIFE_TIME);
1429                                 ipv4_devconf_setall(in_dev);
1430                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1431                                 inet_insert_ifa(ifa);
1432                         }
1433                 }
1434                 ip_mc_up(in_dev);
1435                 /* fall through */
1436         case NETDEV_CHANGEADDR:
1437                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1438                         break;
1439                 /* fall through */
1440         case NETDEV_NOTIFY_PEERS:
1441                 /* Send gratuitous ARP to notify of link change */
1442                 inetdev_send_gratuitous_arp(dev, in_dev);
1443                 break;
1444         case NETDEV_DOWN:
1445                 ip_mc_down(in_dev);
1446                 break;
1447         case NETDEV_PRE_TYPE_CHANGE:
1448                 ip_mc_unmap(in_dev);
1449                 break;
1450         case NETDEV_POST_TYPE_CHANGE:
1451                 ip_mc_remap(in_dev);
1452                 break;
1453         case NETDEV_CHANGEMTU:
1454                 if (inetdev_valid_mtu(dev->mtu))
1455                         break;
1456                 /* disable IP when MTU is not enough */
1457         case NETDEV_UNREGISTER:
1458                 inetdev_destroy(in_dev);
1459                 break;
1460         case NETDEV_CHANGENAME:
1461                 /* Do not notify about label change, this event is
1462                  * not interesting to applications using netlink.
1463                  */
1464                 inetdev_changename(dev, in_dev);
1465
1466                 devinet_sysctl_unregister(in_dev);
1467                 devinet_sysctl_register(in_dev);
1468                 break;
1469         }
1470 out:
1471         return NOTIFY_DONE;
1472 }
1473
1474 static struct notifier_block ip_netdev_notifier = {
1475         .notifier_call = inetdev_event,
1476 };
1477
1478 static size_t inet_nlmsg_size(void)
1479 {
1480         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1481                + nla_total_size(4) /* IFA_ADDRESS */
1482                + nla_total_size(4) /* IFA_LOCAL */
1483                + nla_total_size(4) /* IFA_BROADCAST */
1484                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1485                + nla_total_size(4)  /* IFA_FLAGS */
1486                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1487 }
1488
1489 static inline u32 cstamp_delta(unsigned long cstamp)
1490 {
1491         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1492 }
1493
1494 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1495                          unsigned long tstamp, u32 preferred, u32 valid)
1496 {
1497         struct ifa_cacheinfo ci;
1498
1499         ci.cstamp = cstamp_delta(cstamp);
1500         ci.tstamp = cstamp_delta(tstamp);
1501         ci.ifa_prefered = preferred;
1502         ci.ifa_valid = valid;
1503
1504         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1505 }
1506
1507 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1508                             u32 portid, u32 seq, int event, unsigned int flags)
1509 {
1510         struct ifaddrmsg *ifm;
1511         struct nlmsghdr  *nlh;
1512         u32 preferred, valid;
1513
1514         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1515         if (!nlh)
1516                 return -EMSGSIZE;
1517
1518         ifm = nlmsg_data(nlh);
1519         ifm->ifa_family = AF_INET;
1520         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1521         ifm->ifa_flags = ifa->ifa_flags;
1522         ifm->ifa_scope = ifa->ifa_scope;
1523         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1524
1525         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1526                 preferred = ifa->ifa_preferred_lft;
1527                 valid = ifa->ifa_valid_lft;
1528                 if (preferred != INFINITY_LIFE_TIME) {
1529                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1530
1531                         if (preferred > tval)
1532                                 preferred -= tval;
1533                         else
1534                                 preferred = 0;
1535                         if (valid != INFINITY_LIFE_TIME) {
1536                                 if (valid > tval)
1537                                         valid -= tval;
1538                                 else
1539                                         valid = 0;
1540                         }
1541                 }
1542         } else {
1543                 preferred = INFINITY_LIFE_TIME;
1544                 valid = INFINITY_LIFE_TIME;
1545         }
1546         if ((ifa->ifa_address &&
1547              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1548             (ifa->ifa_local &&
1549              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1550             (ifa->ifa_broadcast &&
1551              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1552             (ifa->ifa_label[0] &&
1553              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1554             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1555             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1556                           preferred, valid))
1557                 goto nla_put_failure;
1558
1559         nlmsg_end(skb, nlh);
1560         return 0;
1561
1562 nla_put_failure:
1563         nlmsg_cancel(skb, nlh);
1564         return -EMSGSIZE;
1565 }
1566
1567 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1568 {
1569         struct net *net = sock_net(skb->sk);
1570         int h, s_h;
1571         int idx, s_idx;
1572         int ip_idx, s_ip_idx;
1573         struct net_device *dev;
1574         struct in_device *in_dev;
1575         struct in_ifaddr *ifa;
1576         struct hlist_head *head;
1577
1578         s_h = cb->args[0];
1579         s_idx = idx = cb->args[1];
1580         s_ip_idx = ip_idx = cb->args[2];
1581
1582         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1583                 idx = 0;
1584                 head = &net->dev_index_head[h];
1585                 rcu_read_lock();
1586                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1587                           net->dev_base_seq;
1588                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1589                         if (idx < s_idx)
1590                                 goto cont;
1591                         if (h > s_h || idx > s_idx)
1592                                 s_ip_idx = 0;
1593                         in_dev = __in_dev_get_rcu(dev);
1594                         if (!in_dev)
1595                                 goto cont;
1596
1597                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1598                              ifa = ifa->ifa_next, ip_idx++) {
1599                                 if (ip_idx < s_ip_idx)
1600                                         continue;
1601                                 if (inet_fill_ifaddr(skb, ifa,
1602                                              NETLINK_CB(cb->skb).portid,
1603                                              cb->nlh->nlmsg_seq,
1604                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1605                                         rcu_read_unlock();
1606                                         goto done;
1607                                 }
1608                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1609                         }
1610 cont:
1611                         idx++;
1612                 }
1613                 rcu_read_unlock();
1614         }
1615
1616 done:
1617         cb->args[0] = h;
1618         cb->args[1] = idx;
1619         cb->args[2] = ip_idx;
1620
1621         return skb->len;
1622 }
1623
1624 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1625                       u32 portid)
1626 {
1627         struct sk_buff *skb;
1628         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1629         int err = -ENOBUFS;
1630         struct net *net;
1631
1632         net = dev_net(ifa->ifa_dev->dev);
1633         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1634         if (!skb)
1635                 goto errout;
1636
1637         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1638         if (err < 0) {
1639                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1640                 WARN_ON(err == -EMSGSIZE);
1641                 kfree_skb(skb);
1642                 goto errout;
1643         }
1644         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1645         return;
1646 errout:
1647         if (err < 0)
1648                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1649 }
1650
1651 static size_t inet_get_link_af_size(const struct net_device *dev,
1652                                     u32 ext_filter_mask)
1653 {
1654         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1655
1656         if (!in_dev)
1657                 return 0;
1658
1659         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1660 }
1661
1662 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1663                              u32 ext_filter_mask)
1664 {
1665         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1666         struct nlattr *nla;
1667         int i;
1668
1669         if (!in_dev)
1670                 return -ENODATA;
1671
1672         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1673         if (!nla)
1674                 return -EMSGSIZE;
1675
1676         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1677                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1678
1679         return 0;
1680 }
1681
1682 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1683         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1684 };
1685
1686 static int inet_validate_link_af(const struct net_device *dev,
1687                                  const struct nlattr *nla)
1688 {
1689         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1690         int err, rem;
1691
1692         if (dev && !__in_dev_get_rtnl(dev))
1693                 return -EAFNOSUPPORT;
1694
1695         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1696         if (err < 0)
1697                 return err;
1698
1699         if (tb[IFLA_INET_CONF]) {
1700                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1701                         int cfgid = nla_type(a);
1702
1703                         if (nla_len(a) < 4)
1704                                 return -EINVAL;
1705
1706                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1707                                 return -EINVAL;
1708                 }
1709         }
1710
1711         return 0;
1712 }
1713
1714 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1715 {
1716         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1717         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1718         int rem;
1719
1720         if (!in_dev)
1721                 return -EAFNOSUPPORT;
1722
1723         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1724                 BUG();
1725
1726         if (tb[IFLA_INET_CONF]) {
1727                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1728                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1729         }
1730
1731         return 0;
1732 }
1733
1734 static int inet_netconf_msgsize_devconf(int type)
1735 {
1736         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1737                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1738
1739         /* type -1 is used for ALL */
1740         if (type == -1 || type == NETCONFA_FORWARDING)
1741                 size += nla_total_size(4);
1742         if (type == -1 || type == NETCONFA_RP_FILTER)
1743                 size += nla_total_size(4);
1744         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1745                 size += nla_total_size(4);
1746         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1747                 size += nla_total_size(4);
1748         if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1749                 size += nla_total_size(4);
1750
1751         return size;
1752 }
1753
1754 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1755                                      struct ipv4_devconf *devconf, u32 portid,
1756                                      u32 seq, int event, unsigned int flags,
1757                                      int type)
1758 {
1759         struct nlmsghdr  *nlh;
1760         struct netconfmsg *ncm;
1761
1762         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1763                         flags);
1764         if (!nlh)
1765                 return -EMSGSIZE;
1766
1767         ncm = nlmsg_data(nlh);
1768         ncm->ncm_family = AF_INET;
1769
1770         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1771                 goto nla_put_failure;
1772
1773         /* type -1 is used for ALL */
1774         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1775             nla_put_s32(skb, NETCONFA_FORWARDING,
1776                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1777                 goto nla_put_failure;
1778         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1779             nla_put_s32(skb, NETCONFA_RP_FILTER,
1780                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1781                 goto nla_put_failure;
1782         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1783             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1784                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1785                 goto nla_put_failure;
1786         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1787             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1788                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1789                 goto nla_put_failure;
1790         if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1791             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1792                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1793                 goto nla_put_failure;
1794
1795         nlmsg_end(skb, nlh);
1796         return 0;
1797
1798 nla_put_failure:
1799         nlmsg_cancel(skb, nlh);
1800         return -EMSGSIZE;
1801 }
1802
1803 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1804                                  struct ipv4_devconf *devconf)
1805 {
1806         struct sk_buff *skb;
1807         int err = -ENOBUFS;
1808
1809         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1810         if (!skb)
1811                 goto errout;
1812
1813         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1814                                         RTM_NEWNETCONF, 0, type);
1815         if (err < 0) {
1816                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1817                 WARN_ON(err == -EMSGSIZE);
1818                 kfree_skb(skb);
1819                 goto errout;
1820         }
1821         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1822         return;
1823 errout:
1824         if (err < 0)
1825                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1826 }
1827
1828 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1829         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1830         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1831         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1832         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1833         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1834 };
1835
1836 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1837                                     struct nlmsghdr *nlh)
1838 {
1839         struct net *net = sock_net(in_skb->sk);
1840         struct nlattr *tb[NETCONFA_MAX+1];
1841         struct netconfmsg *ncm;
1842         struct sk_buff *skb;
1843         struct ipv4_devconf *devconf;
1844         struct in_device *in_dev;
1845         struct net_device *dev;
1846         int ifindex;
1847         int err;
1848
1849         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1850                           devconf_ipv4_policy);
1851         if (err < 0)
1852                 goto errout;
1853
1854         err = -EINVAL;
1855         if (!tb[NETCONFA_IFINDEX])
1856                 goto errout;
1857
1858         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1859         switch (ifindex) {
1860         case NETCONFA_IFINDEX_ALL:
1861                 devconf = net->ipv4.devconf_all;
1862                 break;
1863         case NETCONFA_IFINDEX_DEFAULT:
1864                 devconf = net->ipv4.devconf_dflt;
1865                 break;
1866         default:
1867                 dev = __dev_get_by_index(net, ifindex);
1868                 if (!dev)
1869                         goto errout;
1870                 in_dev = __in_dev_get_rtnl(dev);
1871                 if (!in_dev)
1872                         goto errout;
1873                 devconf = &in_dev->cnf;
1874                 break;
1875         }
1876
1877         err = -ENOBUFS;
1878         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1879         if (!skb)
1880                 goto errout;
1881
1882         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1883                                         NETLINK_CB(in_skb).portid,
1884                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1885                                         -1);
1886         if (err < 0) {
1887                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1888                 WARN_ON(err == -EMSGSIZE);
1889                 kfree_skb(skb);
1890                 goto errout;
1891         }
1892         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1893 errout:
1894         return err;
1895 }
1896
1897 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1898                                      struct netlink_callback *cb)
1899 {
1900         struct net *net = sock_net(skb->sk);
1901         int h, s_h;
1902         int idx, s_idx;
1903         struct net_device *dev;
1904         struct in_device *in_dev;
1905         struct hlist_head *head;
1906
1907         s_h = cb->args[0];
1908         s_idx = idx = cb->args[1];
1909
1910         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1911                 idx = 0;
1912                 head = &net->dev_index_head[h];
1913                 rcu_read_lock();
1914                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1915                           net->dev_base_seq;
1916                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1917                         if (idx < s_idx)
1918                                 goto cont;
1919                         in_dev = __in_dev_get_rcu(dev);
1920                         if (!in_dev)
1921                                 goto cont;
1922
1923                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1924                                                       &in_dev->cnf,
1925                                                       NETLINK_CB(cb->skb).portid,
1926                                                       cb->nlh->nlmsg_seq,
1927                                                       RTM_NEWNETCONF,
1928                                                       NLM_F_MULTI,
1929                                                       -1) < 0) {
1930                                 rcu_read_unlock();
1931                                 goto done;
1932                         }
1933                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1934 cont:
1935                         idx++;
1936                 }
1937                 rcu_read_unlock();
1938         }
1939         if (h == NETDEV_HASHENTRIES) {
1940                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1941                                               net->ipv4.devconf_all,
1942                                               NETLINK_CB(cb->skb).portid,
1943                                               cb->nlh->nlmsg_seq,
1944                                               RTM_NEWNETCONF, NLM_F_MULTI,
1945                                               -1) < 0)
1946                         goto done;
1947                 else
1948                         h++;
1949         }
1950         if (h == NETDEV_HASHENTRIES + 1) {
1951                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1952                                               net->ipv4.devconf_dflt,
1953                                               NETLINK_CB(cb->skb).portid,
1954                                               cb->nlh->nlmsg_seq,
1955                                               RTM_NEWNETCONF, NLM_F_MULTI,
1956                                               -1) < 0)
1957                         goto done;
1958                 else
1959                         h++;
1960         }
1961 done:
1962         cb->args[0] = h;
1963         cb->args[1] = idx;
1964
1965         return skb->len;
1966 }
1967
1968 #ifdef CONFIG_SYSCTL
1969
1970 static void devinet_copy_dflt_conf(struct net *net, int i)
1971 {
1972         struct net_device *dev;
1973
1974         rcu_read_lock();
1975         for_each_netdev_rcu(net, dev) {
1976                 struct in_device *in_dev;
1977
1978                 in_dev = __in_dev_get_rcu(dev);
1979                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1980                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1981         }
1982         rcu_read_unlock();
1983 }
1984
1985 /* called with RTNL locked */
1986 static void inet_forward_change(struct net *net)
1987 {
1988         struct net_device *dev;
1989         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1990
1991         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1992         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1993         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1994                                     NETCONFA_IFINDEX_ALL,
1995                                     net->ipv4.devconf_all);
1996         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1997                                     NETCONFA_IFINDEX_DEFAULT,
1998                                     net->ipv4.devconf_dflt);
1999
2000         for_each_netdev(net, dev) {
2001                 struct in_device *in_dev;
2002                 if (on)
2003                         dev_disable_lro(dev);
2004                 rcu_read_lock();
2005                 in_dev = __in_dev_get_rcu(dev);
2006                 if (in_dev) {
2007                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2008                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2009                                                     dev->ifindex, &in_dev->cnf);
2010                 }
2011                 rcu_read_unlock();
2012         }
2013 }
2014
2015 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2016 {
2017         if (cnf == net->ipv4.devconf_dflt)
2018                 return NETCONFA_IFINDEX_DEFAULT;
2019         else if (cnf == net->ipv4.devconf_all)
2020                 return NETCONFA_IFINDEX_ALL;
2021         else {
2022                 struct in_device *idev
2023                         = container_of(cnf, struct in_device, cnf);
2024                 return idev->dev->ifindex;
2025         }
2026 }
2027
2028 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2029                              void __user *buffer,
2030                              size_t *lenp, loff_t *ppos)
2031 {
2032         int old_value = *(int *)ctl->data;
2033         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2034         int new_value = *(int *)ctl->data;
2035
2036         if (write) {
2037                 struct ipv4_devconf *cnf = ctl->extra1;
2038                 struct net *net = ctl->extra2;
2039                 int i = (int *)ctl->data - cnf->data;
2040                 int ifindex;
2041
2042                 set_bit(i, cnf->state);
2043
2044                 if (cnf == net->ipv4.devconf_dflt)
2045                         devinet_copy_dflt_conf(net, i);
2046                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2047                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2048                         if ((new_value == 0) && (old_value != 0))
2049                                 rt_cache_flush(net);
2050
2051                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2052                     new_value != old_value) {
2053                         ifindex = devinet_conf_ifindex(net, cnf);
2054                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2055                                                     ifindex, cnf);
2056                 }
2057                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2058                     new_value != old_value) {
2059                         ifindex = devinet_conf_ifindex(net, cnf);
2060                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2061                                                     ifindex, cnf);
2062                 }
2063                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2064                     new_value != old_value) {
2065                         ifindex = devinet_conf_ifindex(net, cnf);
2066                         inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2067                                                     ifindex, cnf);
2068                 }
2069         }
2070
2071         return ret;
2072 }
2073
2074 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2075                                   void __user *buffer,
2076                                   size_t *lenp, loff_t *ppos)
2077 {
2078         int *valp = ctl->data;
2079         int val = *valp;
2080         loff_t pos = *ppos;
2081         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2082
2083         if (write && *valp != val) {
2084                 struct net *net = ctl->extra2;
2085
2086                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2087                         if (!rtnl_trylock()) {
2088                                 /* Restore the original values before restarting */
2089                                 *valp = val;
2090                                 *ppos = pos;
2091                                 return restart_syscall();
2092                         }
2093                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2094                                 inet_forward_change(net);
2095                         } else {
2096                                 struct ipv4_devconf *cnf = ctl->extra1;
2097                                 struct in_device *idev =
2098                                         container_of(cnf, struct in_device, cnf);
2099                                 if (*valp)
2100                                         dev_disable_lro(idev->dev);
2101                                 inet_netconf_notify_devconf(net,
2102                                                             NETCONFA_FORWARDING,
2103                                                             idev->dev->ifindex,
2104                                                             cnf);
2105                         }
2106                         rtnl_unlock();
2107                         rt_cache_flush(net);
2108                 } else
2109                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2110                                                     NETCONFA_IFINDEX_DEFAULT,
2111                                                     net->ipv4.devconf_dflt);
2112         }
2113
2114         return ret;
2115 }
2116
2117 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2118                                 void __user *buffer,
2119                                 size_t *lenp, loff_t *ppos)
2120 {
2121         int *valp = ctl->data;
2122         int val = *valp;
2123         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2124         struct net *net = ctl->extra2;
2125
2126         if (write && *valp != val)
2127                 rt_cache_flush(net);
2128
2129         return ret;
2130 }
2131
2132 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2133         { \
2134                 .procname       = name, \
2135                 .data           = ipv4_devconf.data + \
2136                                   IPV4_DEVCONF_ ## attr - 1, \
2137                 .maxlen         = sizeof(int), \
2138                 .mode           = mval, \
2139                 .proc_handler   = proc, \
2140                 .extra1         = &ipv4_devconf, \
2141         }
2142
2143 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2144         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2145
2146 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2147         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2148
2149 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2150         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2151
2152 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2153         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2154
2155 static struct devinet_sysctl_table {
2156         struct ctl_table_header *sysctl_header;
2157         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2158 } devinet_sysctl = {
2159         .devinet_vars = {
2160                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2161                                              devinet_sysctl_forward),
2162                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2163
2164                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2165                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2166                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2167                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2168                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2169                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2170                                         "accept_source_route"),
2171                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2172                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2173                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2174                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2175                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2176                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2177                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2178                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2179                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2180                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2181                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2182                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2183                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2184                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2185                                         "force_igmp_version"),
2186                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2187                                         "igmpv2_unsolicited_report_interval"),
2188                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2189                                         "igmpv3_unsolicited_report_interval"),
2190                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2191                                         "ignore_routes_with_linkdown"),
2192
2193                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2194                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2195                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2196                                               "promote_secondaries"),
2197                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2198                                               "route_localnet"),
2199         },
2200 };
2201
2202 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2203                                         struct ipv4_devconf *p)
2204 {
2205         int i;
2206         struct devinet_sysctl_table *t;
2207         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2208
2209         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2210         if (!t)
2211                 goto out;
2212
2213         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2214                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2215                 t->devinet_vars[i].extra1 = p;
2216                 t->devinet_vars[i].extra2 = net;
2217         }
2218
2219         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2220
2221         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2222         if (!t->sysctl_header)
2223                 goto free;
2224
2225         p->sysctl = t;
2226         return 0;
2227
2228 free:
2229         kfree(t);
2230 out:
2231         return -ENOBUFS;
2232 }
2233
2234 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2235 {
2236         struct devinet_sysctl_table *t = cnf->sysctl;
2237
2238         if (!t)
2239                 return;
2240
2241         cnf->sysctl = NULL;
2242         unregister_net_sysctl_table(t->sysctl_header);
2243         kfree(t);
2244 }
2245
2246 static int devinet_sysctl_register(struct in_device *idev)
2247 {
2248         int err;
2249
2250         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2251                 return -EINVAL;
2252
2253         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2254         if (err)
2255                 return err;
2256         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2257                                         &idev->cnf);
2258         if (err)
2259                 neigh_sysctl_unregister(idev->arp_parms);
2260         return err;
2261 }
2262
2263 static void devinet_sysctl_unregister(struct in_device *idev)
2264 {
2265         __devinet_sysctl_unregister(&idev->cnf);
2266         neigh_sysctl_unregister(idev->arp_parms);
2267 }
2268
2269 static struct ctl_table ctl_forward_entry[] = {
2270         {
2271                 .procname       = "ip_forward",
2272                 .data           = &ipv4_devconf.data[
2273                                         IPV4_DEVCONF_FORWARDING - 1],
2274                 .maxlen         = sizeof(int),
2275                 .mode           = 0644,
2276                 .proc_handler   = devinet_sysctl_forward,
2277                 .extra1         = &ipv4_devconf,
2278                 .extra2         = &init_net,
2279         },
2280         { },
2281 };
2282 #endif
2283
2284 static __net_init int devinet_init_net(struct net *net)
2285 {
2286         int err;
2287         struct ipv4_devconf *all, *dflt;
2288 #ifdef CONFIG_SYSCTL
2289         struct ctl_table *tbl = ctl_forward_entry;
2290         struct ctl_table_header *forw_hdr;
2291 #endif
2292
2293         err = -ENOMEM;
2294         all = &ipv4_devconf;
2295         dflt = &ipv4_devconf_dflt;
2296
2297         if (!net_eq(net, &init_net)) {
2298                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2299                 if (!all)
2300                         goto err_alloc_all;
2301
2302                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2303                 if (!dflt)
2304                         goto err_alloc_dflt;
2305
2306 #ifdef CONFIG_SYSCTL
2307                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2308                 if (!tbl)
2309                         goto err_alloc_ctl;
2310
2311                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2312                 tbl[0].extra1 = all;
2313                 tbl[0].extra2 = net;
2314 #endif
2315         }
2316
2317 #ifdef CONFIG_SYSCTL
2318         err = __devinet_sysctl_register(net, "all", all);
2319         if (err < 0)
2320                 goto err_reg_all;
2321
2322         err = __devinet_sysctl_register(net, "default", dflt);
2323         if (err < 0)
2324                 goto err_reg_dflt;
2325
2326         err = -ENOMEM;
2327         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2328         if (!forw_hdr)
2329                 goto err_reg_ctl;
2330         net->ipv4.forw_hdr = forw_hdr;
2331 #endif
2332
2333         net->ipv4.devconf_all = all;
2334         net->ipv4.devconf_dflt = dflt;
2335         return 0;
2336
2337 #ifdef CONFIG_SYSCTL
2338 err_reg_ctl:
2339         __devinet_sysctl_unregister(dflt);
2340 err_reg_dflt:
2341         __devinet_sysctl_unregister(all);
2342 err_reg_all:
2343         if (tbl != ctl_forward_entry)
2344                 kfree(tbl);
2345 err_alloc_ctl:
2346 #endif
2347         if (dflt != &ipv4_devconf_dflt)
2348                 kfree(dflt);
2349 err_alloc_dflt:
2350         if (all != &ipv4_devconf)
2351                 kfree(all);
2352 err_alloc_all:
2353         return err;
2354 }
2355
2356 static __net_exit void devinet_exit_net(struct net *net)
2357 {
2358 #ifdef CONFIG_SYSCTL
2359         struct ctl_table *tbl;
2360
2361         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2362         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2363         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2364         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2365         kfree(tbl);
2366 #endif
2367         kfree(net->ipv4.devconf_dflt);
2368         kfree(net->ipv4.devconf_all);
2369 }
2370
2371 static __net_initdata struct pernet_operations devinet_ops = {
2372         .init = devinet_init_net,
2373         .exit = devinet_exit_net,
2374 };
2375
2376 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2377         .family           = AF_INET,
2378         .fill_link_af     = inet_fill_link_af,
2379         .get_link_af_size = inet_get_link_af_size,
2380         .validate_link_af = inet_validate_link_af,
2381         .set_link_af      = inet_set_link_af,
2382 };
2383
2384 void __init devinet_init(void)
2385 {
2386         int i;
2387
2388         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2389                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2390
2391         register_pernet_subsys(&devinet_ops);
2392
2393         register_gifconf(PF_INET, inet_gifconf);
2394         register_netdevice_notifier(&ip_netdev_notifier);
2395
2396         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2397
2398         rtnl_af_register(&inet_af_ops);
2399
2400         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2401         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2402         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2403         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2404                       inet_netconf_dump_devconf, NULL);
2405 }