Upgrade to 4.4.50-rt62
[kvmfornfv.git] / kernel / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65
66 #include <asm/uaccess.h>
67
68 #ifdef CONFIG_SYSCTL
69 #include <linux/sysctl.h>
70 #endif
71
72 enum rt6_nud_state {
73         RT6_NUD_FAIL_HARD = -3,
74         RT6_NUD_FAIL_PROBE = -2,
75         RT6_NUD_FAIL_DO_RR = -1,
76         RT6_NUD_SUCCEED = 1
77 };
78
79 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
80 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
81 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
82 static unsigned int      ip6_mtu(const struct dst_entry *dst);
83 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84 static void             ip6_dst_destroy(struct dst_entry *);
85 static void             ip6_dst_ifdown(struct dst_entry *,
86                                        struct net_device *dev, int how);
87 static int               ip6_dst_gc(struct dst_ops *ops);
88
89 static int              ip6_pkt_discard(struct sk_buff *skb);
90 static int              ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
91 static int              ip6_pkt_prohibit(struct sk_buff *skb);
92 static int              ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95                                            struct sk_buff *skb, u32 mtu);
96 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97                                         struct sk_buff *skb);
98 static void             rt6_dst_from_metrics_check(struct rt6_info *rt);
99 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct net *net,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, int ifindex,
105                                            unsigned int pref);
106 static struct rt6_info *rt6_get_route_info(struct net *net,
107                                            const struct in6_addr *prefix, int prefixlen,
108                                            const struct in6_addr *gwaddr, int ifindex);
109 #endif
110
111 struct uncached_list {
112         spinlock_t              lock;
113         struct list_head        head;
114 };
115
116 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
117
118 static void rt6_uncached_list_add(struct rt6_info *rt)
119 {
120         struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
121
122         rt->dst.flags |= DST_NOCACHE;
123         rt->rt6i_uncached_list = ul;
124
125         spin_lock_bh(&ul->lock);
126         list_add_tail(&rt->rt6i_uncached, &ul->head);
127         spin_unlock_bh(&ul->lock);
128 }
129
130 static void rt6_uncached_list_del(struct rt6_info *rt)
131 {
132         if (!list_empty(&rt->rt6i_uncached)) {
133                 struct uncached_list *ul = rt->rt6i_uncached_list;
134
135                 spin_lock_bh(&ul->lock);
136                 list_del(&rt->rt6i_uncached);
137                 spin_unlock_bh(&ul->lock);
138         }
139 }
140
141 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
142 {
143         struct net_device *loopback_dev = net->loopback_dev;
144         int cpu;
145
146         if (dev == loopback_dev)
147                 return;
148
149         for_each_possible_cpu(cpu) {
150                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
151                 struct rt6_info *rt;
152
153                 spin_lock_bh(&ul->lock);
154                 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
155                         struct inet6_dev *rt_idev = rt->rt6i_idev;
156                         struct net_device *rt_dev = rt->dst.dev;
157
158                         if (rt_idev->dev == dev) {
159                                 rt->rt6i_idev = in6_dev_get(loopback_dev);
160                                 in6_dev_put(rt_idev);
161                         }
162
163                         if (rt_dev == dev) {
164                                 rt->dst.dev = loopback_dev;
165                                 dev_hold(rt->dst.dev);
166                                 dev_put(rt_dev);
167                         }
168                 }
169                 spin_unlock_bh(&ul->lock);
170         }
171 }
172
173 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
174 {
175         return dst_metrics_write_ptr(rt->dst.from);
176 }
177
178 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
179 {
180         struct rt6_info *rt = (struct rt6_info *)dst;
181
182         if (rt->rt6i_flags & RTF_PCPU)
183                 return rt6_pcpu_cow_metrics(rt);
184         else if (rt->rt6i_flags & RTF_CACHE)
185                 return NULL;
186         else
187                 return dst_cow_metrics_generic(dst, old);
188 }
189
190 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
191                                              struct sk_buff *skb,
192                                              const void *daddr)
193 {
194         struct in6_addr *p = &rt->rt6i_gateway;
195
196         if (!ipv6_addr_any(p))
197                 return (const void *) p;
198         else if (skb)
199                 return &ipv6_hdr(skb)->daddr;
200         return daddr;
201 }
202
203 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
204                                           struct sk_buff *skb,
205                                           const void *daddr)
206 {
207         struct rt6_info *rt = (struct rt6_info *) dst;
208         struct neighbour *n;
209
210         daddr = choose_neigh_daddr(rt, skb, daddr);
211         n = __ipv6_neigh_lookup(dst->dev, daddr);
212         if (n)
213                 return n;
214         return neigh_create(&nd_tbl, daddr, dst->dev);
215 }
216
217 static struct dst_ops ip6_dst_ops_template = {
218         .family                 =       AF_INET6,
219         .gc                     =       ip6_dst_gc,
220         .gc_thresh              =       1024,
221         .check                  =       ip6_dst_check,
222         .default_advmss         =       ip6_default_advmss,
223         .mtu                    =       ip6_mtu,
224         .cow_metrics            =       ipv6_cow_metrics,
225         .destroy                =       ip6_dst_destroy,
226         .ifdown                 =       ip6_dst_ifdown,
227         .negative_advice        =       ip6_negative_advice,
228         .link_failure           =       ip6_link_failure,
229         .update_pmtu            =       ip6_rt_update_pmtu,
230         .redirect               =       rt6_do_redirect,
231         .local_out              =       __ip6_local_out,
232         .neigh_lookup           =       ip6_neigh_lookup,
233 };
234
235 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
236 {
237         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
238
239         return mtu ? : dst->dev->mtu;
240 }
241
242 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
243                                          struct sk_buff *skb, u32 mtu)
244 {
245 }
246
247 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
248                                       struct sk_buff *skb)
249 {
250 }
251
252 static struct dst_ops ip6_dst_blackhole_ops = {
253         .family                 =       AF_INET6,
254         .destroy                =       ip6_dst_destroy,
255         .check                  =       ip6_dst_check,
256         .mtu                    =       ip6_blackhole_mtu,
257         .default_advmss         =       ip6_default_advmss,
258         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
259         .redirect               =       ip6_rt_blackhole_redirect,
260         .cow_metrics            =       dst_cow_metrics_generic,
261         .neigh_lookup           =       ip6_neigh_lookup,
262 };
263
264 static const u32 ip6_template_metrics[RTAX_MAX] = {
265         [RTAX_HOPLIMIT - 1] = 0,
266 };
267
268 static const struct rt6_info ip6_null_entry_template = {
269         .dst = {
270                 .__refcnt       = ATOMIC_INIT(1),
271                 .__use          = 1,
272                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
273                 .error          = -ENETUNREACH,
274                 .input          = ip6_pkt_discard,
275                 .output         = ip6_pkt_discard_out,
276         },
277         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
278         .rt6i_protocol  = RTPROT_KERNEL,
279         .rt6i_metric    = ~(u32) 0,
280         .rt6i_ref       = ATOMIC_INIT(1),
281 };
282
283 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
284
285 static const struct rt6_info ip6_prohibit_entry_template = {
286         .dst = {
287                 .__refcnt       = ATOMIC_INIT(1),
288                 .__use          = 1,
289                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
290                 .error          = -EACCES,
291                 .input          = ip6_pkt_prohibit,
292                 .output         = ip6_pkt_prohibit_out,
293         },
294         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
295         .rt6i_protocol  = RTPROT_KERNEL,
296         .rt6i_metric    = ~(u32) 0,
297         .rt6i_ref       = ATOMIC_INIT(1),
298 };
299
300 static const struct rt6_info ip6_blk_hole_entry_template = {
301         .dst = {
302                 .__refcnt       = ATOMIC_INIT(1),
303                 .__use          = 1,
304                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
305                 .error          = -EINVAL,
306                 .input          = dst_discard,
307                 .output         = dst_discard_out,
308         },
309         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
310         .rt6i_protocol  = RTPROT_KERNEL,
311         .rt6i_metric    = ~(u32) 0,
312         .rt6i_ref       = ATOMIC_INIT(1),
313 };
314
315 #endif
316
317 static void rt6_info_init(struct rt6_info *rt)
318 {
319         struct dst_entry *dst = &rt->dst;
320
321         memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
322         INIT_LIST_HEAD(&rt->rt6i_siblings);
323         INIT_LIST_HEAD(&rt->rt6i_uncached);
324 }
325
326 /* allocate dst with ip6_dst_ops */
327 static struct rt6_info *__ip6_dst_alloc(struct net *net,
328                                         struct net_device *dev,
329                                         int flags)
330 {
331         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
332                                         0, DST_OBSOLETE_FORCE_CHK, flags);
333
334         if (rt)
335                 rt6_info_init(rt);
336
337         return rt;
338 }
339
340 static struct rt6_info *ip6_dst_alloc(struct net *net,
341                                       struct net_device *dev,
342                                       int flags)
343 {
344         struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
345
346         if (rt) {
347                 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
348                 if (rt->rt6i_pcpu) {
349                         int cpu;
350
351                         for_each_possible_cpu(cpu) {
352                                 struct rt6_info **p;
353
354                                 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
355                                 /* no one shares rt */
356                                 *p =  NULL;
357                         }
358                 } else {
359                         dst_destroy((struct dst_entry *)rt);
360                         return NULL;
361                 }
362         }
363
364         return rt;
365 }
366
367 static void ip6_dst_destroy(struct dst_entry *dst)
368 {
369         struct rt6_info *rt = (struct rt6_info *)dst;
370         struct dst_entry *from = dst->from;
371         struct inet6_dev *idev;
372
373         dst_destroy_metrics_generic(dst);
374         free_percpu(rt->rt6i_pcpu);
375         rt6_uncached_list_del(rt);
376
377         idev = rt->rt6i_idev;
378         if (idev) {
379                 rt->rt6i_idev = NULL;
380                 in6_dev_put(idev);
381         }
382
383         dst->from = NULL;
384         dst_release(from);
385 }
386
387 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388                            int how)
389 {
390         struct rt6_info *rt = (struct rt6_info *)dst;
391         struct inet6_dev *idev = rt->rt6i_idev;
392         struct net_device *loopback_dev =
393                 dev_net(dev)->loopback_dev;
394
395         if (dev != loopback_dev) {
396                 if (idev && idev->dev == dev) {
397                         struct inet6_dev *loopback_idev =
398                                 in6_dev_get(loopback_dev);
399                         if (loopback_idev) {
400                                 rt->rt6i_idev = loopback_idev;
401                                 in6_dev_put(idev);
402                         }
403                 }
404         }
405 }
406
407 static bool __rt6_check_expired(const struct rt6_info *rt)
408 {
409         if (rt->rt6i_flags & RTF_EXPIRES)
410                 return time_after(jiffies, rt->dst.expires);
411         else
412                 return false;
413 }
414
415 static bool rt6_check_expired(const struct rt6_info *rt)
416 {
417         if (rt->rt6i_flags & RTF_EXPIRES) {
418                 if (time_after(jiffies, rt->dst.expires))
419                         return true;
420         } else if (rt->dst.from) {
421                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
422         }
423         return false;
424 }
425
426 /* Multipath route selection:
427  *   Hash based function using packet header and flowlabel.
428  * Adapted from fib_info_hashfn()
429  */
430 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
431                                const struct flowi6 *fl6)
432 {
433         return get_hash_from_flowi6(fl6) % candidate_count;
434 }
435
436 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
437                                              struct flowi6 *fl6, int oif,
438                                              int strict)
439 {
440         struct rt6_info *sibling, *next_sibling;
441         int route_choosen;
442
443         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
444         /* Don't change the route, if route_choosen == 0
445          * (siblings does not include ourself)
446          */
447         if (route_choosen)
448                 list_for_each_entry_safe(sibling, next_sibling,
449                                 &match->rt6i_siblings, rt6i_siblings) {
450                         route_choosen--;
451                         if (route_choosen == 0) {
452                                 if (rt6_score_route(sibling, oif, strict) < 0)
453                                         break;
454                                 match = sibling;
455                                 break;
456                         }
457                 }
458         return match;
459 }
460
461 /*
462  *      Route lookup. Any table->tb6_lock is implied.
463  */
464
465 static inline struct rt6_info *rt6_device_match(struct net *net,
466                                                     struct rt6_info *rt,
467                                                     const struct in6_addr *saddr,
468                                                     int oif,
469                                                     int flags)
470 {
471         struct rt6_info *local = NULL;
472         struct rt6_info *sprt;
473
474         if (!oif && ipv6_addr_any(saddr))
475                 goto out;
476
477         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
478                 struct net_device *dev = sprt->dst.dev;
479
480                 if (oif) {
481                         if (dev->ifindex == oif)
482                                 return sprt;
483                         if (dev->flags & IFF_LOOPBACK) {
484                                 if (!sprt->rt6i_idev ||
485                                     sprt->rt6i_idev->dev->ifindex != oif) {
486                                         if (flags & RT6_LOOKUP_F_IFACE)
487                                                 continue;
488                                         if (local &&
489                                             local->rt6i_idev->dev->ifindex == oif)
490                                                 continue;
491                                 }
492                                 local = sprt;
493                         }
494                 } else {
495                         if (ipv6_chk_addr(net, saddr, dev,
496                                           flags & RT6_LOOKUP_F_IFACE))
497                                 return sprt;
498                 }
499         }
500
501         if (oif) {
502                 if (local)
503                         return local;
504
505                 if (flags & RT6_LOOKUP_F_IFACE)
506                         return net->ipv6.ip6_null_entry;
507         }
508 out:
509         return rt;
510 }
511
512 #ifdef CONFIG_IPV6_ROUTER_PREF
513 struct __rt6_probe_work {
514         struct work_struct work;
515         struct in6_addr target;
516         struct net_device *dev;
517 };
518
519 static void rt6_probe_deferred(struct work_struct *w)
520 {
521         struct in6_addr mcaddr;
522         struct __rt6_probe_work *work =
523                 container_of(w, struct __rt6_probe_work, work);
524
525         addrconf_addr_solict_mult(&work->target, &mcaddr);
526         ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
527         dev_put(work->dev);
528         kfree(work);
529 }
530
531 static void rt6_probe(struct rt6_info *rt)
532 {
533         struct __rt6_probe_work *work;
534         struct neighbour *neigh;
535         /*
536          * Okay, this does not seem to be appropriate
537          * for now, however, we need to check if it
538          * is really so; aka Router Reachability Probing.
539          *
540          * Router Reachability Probe MUST be rate-limited
541          * to no more than one per minute.
542          */
543         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
544                 return;
545         rcu_read_lock_bh();
546         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
547         if (neigh) {
548                 if (neigh->nud_state & NUD_VALID)
549                         goto out;
550
551                 work = NULL;
552                 write_lock(&neigh->lock);
553                 if (!(neigh->nud_state & NUD_VALID) &&
554                     time_after(jiffies,
555                                neigh->updated +
556                                rt->rt6i_idev->cnf.rtr_probe_interval)) {
557                         work = kmalloc(sizeof(*work), GFP_ATOMIC);
558                         if (work)
559                                 __neigh_set_probe_once(neigh);
560                 }
561                 write_unlock(&neigh->lock);
562         } else {
563                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
564         }
565
566         if (work) {
567                 INIT_WORK(&work->work, rt6_probe_deferred);
568                 work->target = rt->rt6i_gateway;
569                 dev_hold(rt->dst.dev);
570                 work->dev = rt->dst.dev;
571                 schedule_work(&work->work);
572         }
573
574 out:
575         rcu_read_unlock_bh();
576 }
577 #else
578 static inline void rt6_probe(struct rt6_info *rt)
579 {
580 }
581 #endif
582
583 /*
584  * Default Router Selection (RFC 2461 6.3.6)
585  */
586 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
587 {
588         struct net_device *dev = rt->dst.dev;
589         if (!oif || dev->ifindex == oif)
590                 return 2;
591         if ((dev->flags & IFF_LOOPBACK) &&
592             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
593                 return 1;
594         return 0;
595 }
596
597 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
598 {
599         struct neighbour *neigh;
600         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
601
602         if (rt->rt6i_flags & RTF_NONEXTHOP ||
603             !(rt->rt6i_flags & RTF_GATEWAY))
604                 return RT6_NUD_SUCCEED;
605
606         rcu_read_lock_bh();
607         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
608         if (neigh) {
609                 read_lock(&neigh->lock);
610                 if (neigh->nud_state & NUD_VALID)
611                         ret = RT6_NUD_SUCCEED;
612 #ifdef CONFIG_IPV6_ROUTER_PREF
613                 else if (!(neigh->nud_state & NUD_FAILED))
614                         ret = RT6_NUD_SUCCEED;
615                 else
616                         ret = RT6_NUD_FAIL_PROBE;
617 #endif
618                 read_unlock(&neigh->lock);
619         } else {
620                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
621                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
622         }
623         rcu_read_unlock_bh();
624
625         return ret;
626 }
627
628 static int rt6_score_route(struct rt6_info *rt, int oif,
629                            int strict)
630 {
631         int m;
632
633         m = rt6_check_dev(rt, oif);
634         if (!m && (strict & RT6_LOOKUP_F_IFACE))
635                 return RT6_NUD_FAIL_HARD;
636 #ifdef CONFIG_IPV6_ROUTER_PREF
637         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
638 #endif
639         if (strict & RT6_LOOKUP_F_REACHABLE) {
640                 int n = rt6_check_neigh(rt);
641                 if (n < 0)
642                         return n;
643         }
644         return m;
645 }
646
647 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
648                                    int *mpri, struct rt6_info *match,
649                                    bool *do_rr)
650 {
651         int m;
652         bool match_do_rr = false;
653         struct inet6_dev *idev = rt->rt6i_idev;
654         struct net_device *dev = rt->dst.dev;
655
656         if (dev && !netif_carrier_ok(dev) &&
657             idev->cnf.ignore_routes_with_linkdown)
658                 goto out;
659
660         if (rt6_check_expired(rt))
661                 goto out;
662
663         m = rt6_score_route(rt, oif, strict);
664         if (m == RT6_NUD_FAIL_DO_RR) {
665                 match_do_rr = true;
666                 m = 0; /* lowest valid score */
667         } else if (m == RT6_NUD_FAIL_HARD) {
668                 goto out;
669         }
670
671         if (strict & RT6_LOOKUP_F_REACHABLE)
672                 rt6_probe(rt);
673
674         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
675         if (m > *mpri) {
676                 *do_rr = match_do_rr;
677                 *mpri = m;
678                 match = rt;
679         }
680 out:
681         return match;
682 }
683
684 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
685                                      struct rt6_info *rr_head,
686                                      u32 metric, int oif, int strict,
687                                      bool *do_rr)
688 {
689         struct rt6_info *rt, *match, *cont;
690         int mpri = -1;
691
692         match = NULL;
693         cont = NULL;
694         for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
695                 if (rt->rt6i_metric != metric) {
696                         cont = rt;
697                         break;
698                 }
699
700                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
701         }
702
703         for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
704                 if (rt->rt6i_metric != metric) {
705                         cont = rt;
706                         break;
707                 }
708
709                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
710         }
711
712         if (match || !cont)
713                 return match;
714
715         for (rt = cont; rt; rt = rt->dst.rt6_next)
716                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
717
718         return match;
719 }
720
721 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
722 {
723         struct rt6_info *match, *rt0;
724         struct net *net;
725         bool do_rr = false;
726
727         rt0 = fn->rr_ptr;
728         if (!rt0)
729                 fn->rr_ptr = rt0 = fn->leaf;
730
731         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
732                              &do_rr);
733
734         if (do_rr) {
735                 struct rt6_info *next = rt0->dst.rt6_next;
736
737                 /* no entries matched; do round-robin */
738                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
739                         next = fn->leaf;
740
741                 if (next != rt0)
742                         fn->rr_ptr = next;
743         }
744
745         net = dev_net(rt0->dst.dev);
746         return match ? match : net->ipv6.ip6_null_entry;
747 }
748
749 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
750 {
751         return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
752 }
753
754 #ifdef CONFIG_IPV6_ROUTE_INFO
755 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
756                   const struct in6_addr *gwaddr)
757 {
758         struct net *net = dev_net(dev);
759         struct route_info *rinfo = (struct route_info *) opt;
760         struct in6_addr prefix_buf, *prefix;
761         unsigned int pref;
762         unsigned long lifetime;
763         struct rt6_info *rt;
764
765         if (len < sizeof(struct route_info)) {
766                 return -EINVAL;
767         }
768
769         /* Sanity check for prefix_len and length */
770         if (rinfo->length > 3) {
771                 return -EINVAL;
772         } else if (rinfo->prefix_len > 128) {
773                 return -EINVAL;
774         } else if (rinfo->prefix_len > 64) {
775                 if (rinfo->length < 2) {
776                         return -EINVAL;
777                 }
778         } else if (rinfo->prefix_len > 0) {
779                 if (rinfo->length < 1) {
780                         return -EINVAL;
781                 }
782         }
783
784         pref = rinfo->route_pref;
785         if (pref == ICMPV6_ROUTER_PREF_INVALID)
786                 return -EINVAL;
787
788         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
789
790         if (rinfo->length == 3)
791                 prefix = (struct in6_addr *)rinfo->prefix;
792         else {
793                 /* this function is safe */
794                 ipv6_addr_prefix(&prefix_buf,
795                                  (struct in6_addr *)rinfo->prefix,
796                                  rinfo->prefix_len);
797                 prefix = &prefix_buf;
798         }
799
800         if (rinfo->prefix_len == 0)
801                 rt = rt6_get_dflt_router(gwaddr, dev);
802         else
803                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
804                                         gwaddr, dev->ifindex);
805
806         if (rt && !lifetime) {
807                 ip6_del_rt(rt);
808                 rt = NULL;
809         }
810
811         if (!rt && lifetime)
812                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
813                                         pref);
814         else if (rt)
815                 rt->rt6i_flags = RTF_ROUTEINFO |
816                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
817
818         if (rt) {
819                 if (!addrconf_finite_timeout(lifetime))
820                         rt6_clean_expires(rt);
821                 else
822                         rt6_set_expires(rt, jiffies + HZ * lifetime);
823
824                 ip6_rt_put(rt);
825         }
826         return 0;
827 }
828 #endif
829
830 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
831                                         struct in6_addr *saddr)
832 {
833         struct fib6_node *pn;
834         while (1) {
835                 if (fn->fn_flags & RTN_TL_ROOT)
836                         return NULL;
837                 pn = fn->parent;
838                 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
839                         fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
840                 else
841                         fn = pn;
842                 if (fn->fn_flags & RTN_RTINFO)
843                         return fn;
844         }
845 }
846
847 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
848                                              struct fib6_table *table,
849                                              struct flowi6 *fl6, int flags)
850 {
851         struct fib6_node *fn;
852         struct rt6_info *rt;
853
854         read_lock_bh(&table->tb6_lock);
855         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
856 restart:
857         rt = fn->leaf;
858         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
859         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
860                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
861         if (rt == net->ipv6.ip6_null_entry) {
862                 fn = fib6_backtrack(fn, &fl6->saddr);
863                 if (fn)
864                         goto restart;
865         }
866         dst_use(&rt->dst, jiffies);
867         read_unlock_bh(&table->tb6_lock);
868         return rt;
869
870 }
871
872 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
873                                     int flags)
874 {
875         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
876 }
877 EXPORT_SYMBOL_GPL(ip6_route_lookup);
878
879 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
880                             const struct in6_addr *saddr, int oif, int strict)
881 {
882         struct flowi6 fl6 = {
883                 .flowi6_oif = oif,
884                 .daddr = *daddr,
885         };
886         struct dst_entry *dst;
887         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
888
889         if (saddr) {
890                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
891                 flags |= RT6_LOOKUP_F_HAS_SADDR;
892         }
893
894         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
895         if (dst->error == 0)
896                 return (struct rt6_info *) dst;
897
898         dst_release(dst);
899
900         return NULL;
901 }
902 EXPORT_SYMBOL(rt6_lookup);
903
904 /* ip6_ins_rt is called with FREE table->tb6_lock.
905    It takes new route entry, the addition fails by any reason the
906    route is freed. In any case, if caller does not hold it, it may
907    be destroyed.
908  */
909
910 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
911                         struct mx6_config *mxc)
912 {
913         int err;
914         struct fib6_table *table;
915
916         table = rt->rt6i_table;
917         write_lock_bh(&table->tb6_lock);
918         err = fib6_add(&table->tb6_root, rt, info, mxc);
919         write_unlock_bh(&table->tb6_lock);
920
921         return err;
922 }
923
924 int ip6_ins_rt(struct rt6_info *rt)
925 {
926         struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
927         struct mx6_config mxc = { .mx = NULL, };
928
929         return __ip6_ins_rt(rt, &info, &mxc);
930 }
931
932 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
933                                            const struct in6_addr *daddr,
934                                            const struct in6_addr *saddr)
935 {
936         struct rt6_info *rt;
937
938         /*
939          *      Clone the route.
940          */
941
942         if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
943                 ort = (struct rt6_info *)ort->dst.from;
944
945         rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
946
947         if (!rt)
948                 return NULL;
949
950         ip6_rt_copy_init(rt, ort);
951         rt->rt6i_flags |= RTF_CACHE;
952         rt->rt6i_metric = 0;
953         rt->dst.flags |= DST_HOST;
954         rt->rt6i_dst.addr = *daddr;
955         rt->rt6i_dst.plen = 128;
956
957         if (!rt6_is_gw_or_nonexthop(ort)) {
958                 if (ort->rt6i_dst.plen != 128 &&
959                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
960                         rt->rt6i_flags |= RTF_ANYCAST;
961 #ifdef CONFIG_IPV6_SUBTREES
962                 if (rt->rt6i_src.plen && saddr) {
963                         rt->rt6i_src.addr = *saddr;
964                         rt->rt6i_src.plen = 128;
965                 }
966 #endif
967         }
968
969         return rt;
970 }
971
972 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
973 {
974         struct rt6_info *pcpu_rt;
975
976         pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
977                                   rt->dst.dev, rt->dst.flags);
978
979         if (!pcpu_rt)
980                 return NULL;
981         ip6_rt_copy_init(pcpu_rt, rt);
982         pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
983         pcpu_rt->rt6i_flags |= RTF_PCPU;
984         return pcpu_rt;
985 }
986
987 /* It should be called with read_lock_bh(&tb6_lock) acquired */
988 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
989 {
990         struct rt6_info *pcpu_rt, **p;
991
992         p = this_cpu_ptr(rt->rt6i_pcpu);
993         pcpu_rt = *p;
994
995         if (pcpu_rt) {
996                 dst_hold(&pcpu_rt->dst);
997                 rt6_dst_from_metrics_check(pcpu_rt);
998         }
999         return pcpu_rt;
1000 }
1001
1002 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1003 {
1004         struct fib6_table *table = rt->rt6i_table;
1005         struct rt6_info *pcpu_rt, *prev, **p;
1006
1007         pcpu_rt = ip6_rt_pcpu_alloc(rt);
1008         if (!pcpu_rt) {
1009                 struct net *net = dev_net(rt->dst.dev);
1010
1011                 dst_hold(&net->ipv6.ip6_null_entry->dst);
1012                 return net->ipv6.ip6_null_entry;
1013         }
1014
1015         read_lock_bh(&table->tb6_lock);
1016         if (rt->rt6i_pcpu) {
1017                 p = this_cpu_ptr(rt->rt6i_pcpu);
1018                 prev = cmpxchg(p, NULL, pcpu_rt);
1019                 if (prev) {
1020                         /* If someone did it before us, return prev instead */
1021                         dst_destroy(&pcpu_rt->dst);
1022                         pcpu_rt = prev;
1023                 }
1024         } else {
1025                 /* rt has been removed from the fib6 tree
1026                  * before we have a chance to acquire the read_lock.
1027                  * In this case, don't brother to create a pcpu rt
1028                  * since rt is going away anyway.  The next
1029                  * dst_check() will trigger a re-lookup.
1030                  */
1031                 dst_destroy(&pcpu_rt->dst);
1032                 pcpu_rt = rt;
1033         }
1034         dst_hold(&pcpu_rt->dst);
1035         rt6_dst_from_metrics_check(pcpu_rt);
1036         read_unlock_bh(&table->tb6_lock);
1037         return pcpu_rt;
1038 }
1039
1040 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
1041                                       struct flowi6 *fl6, int flags)
1042 {
1043         struct fib6_node *fn, *saved_fn;
1044         struct rt6_info *rt;
1045         int strict = 0;
1046
1047         strict |= flags & RT6_LOOKUP_F_IFACE;
1048         if (net->ipv6.devconf_all->forwarding == 0)
1049                 strict |= RT6_LOOKUP_F_REACHABLE;
1050
1051         read_lock_bh(&table->tb6_lock);
1052
1053         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1054         saved_fn = fn;
1055
1056         if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1057                 oif = 0;
1058
1059 redo_rt6_select:
1060         rt = rt6_select(fn, oif, strict);
1061         if (rt->rt6i_nsiblings)
1062                 rt = rt6_multipath_select(rt, fl6, oif, strict);
1063         if (rt == net->ipv6.ip6_null_entry) {
1064                 fn = fib6_backtrack(fn, &fl6->saddr);
1065                 if (fn)
1066                         goto redo_rt6_select;
1067                 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1068                         /* also consider unreachable route */
1069                         strict &= ~RT6_LOOKUP_F_REACHABLE;
1070                         fn = saved_fn;
1071                         goto redo_rt6_select;
1072                 }
1073         }
1074
1075
1076         if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1077                 dst_use(&rt->dst, jiffies);
1078                 read_unlock_bh(&table->tb6_lock);
1079
1080                 rt6_dst_from_metrics_check(rt);
1081                 return rt;
1082         } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1083                             !(rt->rt6i_flags & RTF_GATEWAY))) {
1084                 /* Create a RTF_CACHE clone which will not be
1085                  * owned by the fib6 tree.  It is for the special case where
1086                  * the daddr in the skb during the neighbor look-up is different
1087                  * from the fl6->daddr used to look-up route here.
1088                  */
1089
1090                 struct rt6_info *uncached_rt;
1091
1092                 dst_use(&rt->dst, jiffies);
1093                 read_unlock_bh(&table->tb6_lock);
1094
1095                 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1096                 dst_release(&rt->dst);
1097
1098                 if (uncached_rt)
1099                         rt6_uncached_list_add(uncached_rt);
1100                 else
1101                         uncached_rt = net->ipv6.ip6_null_entry;
1102
1103                 dst_hold(&uncached_rt->dst);
1104                 return uncached_rt;
1105
1106         } else {
1107                 /* Get a percpu copy */
1108
1109                 struct rt6_info *pcpu_rt;
1110
1111                 rt->dst.lastuse = jiffies;
1112                 rt->dst.__use++;
1113                 pcpu_rt = rt6_get_pcpu_route(rt);
1114
1115                 if (pcpu_rt) {
1116                         read_unlock_bh(&table->tb6_lock);
1117                 } else {
1118                         /* We have to do the read_unlock first
1119                          * because rt6_make_pcpu_route() may trigger
1120                          * ip6_dst_gc() which will take the write_lock.
1121                          */
1122                         dst_hold(&rt->dst);
1123                         read_unlock_bh(&table->tb6_lock);
1124                         pcpu_rt = rt6_make_pcpu_route(rt);
1125                         dst_release(&rt->dst);
1126                 }
1127
1128                 return pcpu_rt;
1129
1130         }
1131 }
1132
1133 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1134                                             struct flowi6 *fl6, int flags)
1135 {
1136         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1137 }
1138
1139 static struct dst_entry *ip6_route_input_lookup(struct net *net,
1140                                                 struct net_device *dev,
1141                                                 struct flowi6 *fl6, int flags)
1142 {
1143         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1144                 flags |= RT6_LOOKUP_F_IFACE;
1145
1146         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1147 }
1148
1149 void ip6_route_input(struct sk_buff *skb)
1150 {
1151         const struct ipv6hdr *iph = ipv6_hdr(skb);
1152         struct net *net = dev_net(skb->dev);
1153         int flags = RT6_LOOKUP_F_HAS_SADDR;
1154         struct ip_tunnel_info *tun_info;
1155         struct flowi6 fl6 = {
1156                 .flowi6_iif = l3mdev_fib_oif(skb->dev),
1157                 .daddr = iph->daddr,
1158                 .saddr = iph->saddr,
1159                 .flowlabel = ip6_flowinfo(iph),
1160                 .flowi6_mark = skb->mark,
1161                 .flowi6_proto = iph->nexthdr,
1162         };
1163
1164         tun_info = skb_tunnel_info(skb);
1165         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1166                 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1167         skb_dst_drop(skb);
1168         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1169 }
1170
1171 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1172                                              struct flowi6 *fl6, int flags)
1173 {
1174         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1175 }
1176
1177 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1178                                          struct flowi6 *fl6, int flags)
1179 {
1180         struct dst_entry *dst;
1181         bool any_src;
1182
1183         dst = l3mdev_rt6_dst_by_oif(net, fl6);
1184         if (dst)
1185                 return dst;
1186
1187         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1188
1189         any_src = ipv6_addr_any(&fl6->saddr);
1190         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1191             (fl6->flowi6_oif && any_src))
1192                 flags |= RT6_LOOKUP_F_IFACE;
1193
1194         if (!any_src)
1195                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1196         else if (sk)
1197                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1198
1199         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1200 }
1201 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1202
1203 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1204 {
1205         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1206         struct dst_entry *new = NULL;
1207
1208         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1209         if (rt) {
1210                 rt6_info_init(rt);
1211
1212                 new = &rt->dst;
1213                 new->__use = 1;
1214                 new->input = dst_discard;
1215                 new->output = dst_discard_out;
1216
1217                 dst_copy_metrics(new, &ort->dst);
1218                 rt->rt6i_idev = ort->rt6i_idev;
1219                 if (rt->rt6i_idev)
1220                         in6_dev_hold(rt->rt6i_idev);
1221
1222                 rt->rt6i_gateway = ort->rt6i_gateway;
1223                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1224                 rt->rt6i_metric = 0;
1225
1226                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1227 #ifdef CONFIG_IPV6_SUBTREES
1228                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1229 #endif
1230
1231                 dst_free(new);
1232         }
1233
1234         dst_release(dst_orig);
1235         return new ? new : ERR_PTR(-ENOMEM);
1236 }
1237
1238 /*
1239  *      Destination cache support functions
1240  */
1241
1242 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1243 {
1244         if (rt->dst.from &&
1245             dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1246                 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1247 }
1248
1249 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1250 {
1251         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1252                 return NULL;
1253
1254         if (rt6_check_expired(rt))
1255                 return NULL;
1256
1257         return &rt->dst;
1258 }
1259
1260 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1261 {
1262         if (!__rt6_check_expired(rt) &&
1263             rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1264             rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1265                 return &rt->dst;
1266         else
1267                 return NULL;
1268 }
1269
1270 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1271 {
1272         struct rt6_info *rt;
1273
1274         rt = (struct rt6_info *) dst;
1275
1276         /* All IPV6 dsts are created with ->obsolete set to the value
1277          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1278          * into this function always.
1279          */
1280
1281         rt6_dst_from_metrics_check(rt);
1282
1283         if (rt->rt6i_flags & RTF_PCPU ||
1284             (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1285                 return rt6_dst_from_check(rt, cookie);
1286         else
1287                 return rt6_check(rt, cookie);
1288 }
1289
1290 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1291 {
1292         struct rt6_info *rt = (struct rt6_info *) dst;
1293
1294         if (rt) {
1295                 if (rt->rt6i_flags & RTF_CACHE) {
1296                         if (rt6_check_expired(rt)) {
1297                                 ip6_del_rt(rt);
1298                                 dst = NULL;
1299                         }
1300                 } else {
1301                         dst_release(dst);
1302                         dst = NULL;
1303                 }
1304         }
1305         return dst;
1306 }
1307
1308 static void ip6_link_failure(struct sk_buff *skb)
1309 {
1310         struct rt6_info *rt;
1311
1312         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1313
1314         rt = (struct rt6_info *) skb_dst(skb);
1315         if (rt) {
1316                 if (rt->rt6i_flags & RTF_CACHE) {
1317                         dst_hold(&rt->dst);
1318                         ip6_del_rt(rt);
1319                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1320                         rt->rt6i_node->fn_sernum = -1;
1321                 }
1322         }
1323 }
1324
1325 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1326 {
1327         struct net *net = dev_net(rt->dst.dev);
1328
1329         rt->rt6i_flags |= RTF_MODIFIED;
1330         rt->rt6i_pmtu = mtu;
1331         rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1332 }
1333
1334 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1335 {
1336         return !(rt->rt6i_flags & RTF_CACHE) &&
1337                 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1338 }
1339
1340 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1341                                  const struct ipv6hdr *iph, u32 mtu)
1342 {
1343         struct rt6_info *rt6 = (struct rt6_info *)dst;
1344
1345         if (rt6->rt6i_flags & RTF_LOCAL)
1346                 return;
1347
1348         dst_confirm(dst);
1349         mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1350         if (mtu >= dst_mtu(dst))
1351                 return;
1352
1353         if (!rt6_cache_allowed_for_pmtu(rt6)) {
1354                 rt6_do_update_pmtu(rt6, mtu);
1355         } else {
1356                 const struct in6_addr *daddr, *saddr;
1357                 struct rt6_info *nrt6;
1358
1359                 if (iph) {
1360                         daddr = &iph->daddr;
1361                         saddr = &iph->saddr;
1362                 } else if (sk) {
1363                         daddr = &sk->sk_v6_daddr;
1364                         saddr = &inet6_sk(sk)->saddr;
1365                 } else {
1366                         return;
1367                 }
1368                 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1369                 if (nrt6) {
1370                         rt6_do_update_pmtu(nrt6, mtu);
1371
1372                         /* ip6_ins_rt(nrt6) will bump the
1373                          * rt6->rt6i_node->fn_sernum
1374                          * which will fail the next rt6_check() and
1375                          * invalidate the sk->sk_dst_cache.
1376                          */
1377                         ip6_ins_rt(nrt6);
1378                 }
1379         }
1380 }
1381
1382 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1383                                struct sk_buff *skb, u32 mtu)
1384 {
1385         __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1386 }
1387
1388 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1389                      int oif, u32 mark)
1390 {
1391         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1392         struct dst_entry *dst;
1393         struct flowi6 fl6;
1394
1395         memset(&fl6, 0, sizeof(fl6));
1396         fl6.flowi6_oif = oif;
1397         fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1398         fl6.daddr = iph->daddr;
1399         fl6.saddr = iph->saddr;
1400         fl6.flowlabel = ip6_flowinfo(iph);
1401
1402         dst = ip6_route_output(net, NULL, &fl6);
1403         if (!dst->error)
1404                 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1405         dst_release(dst);
1406 }
1407 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1408
1409 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1410 {
1411         ip6_update_pmtu(skb, sock_net(sk), mtu,
1412                         sk->sk_bound_dev_if, sk->sk_mark);
1413 }
1414 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1415
1416 /* Handle redirects */
1417 struct ip6rd_flowi {
1418         struct flowi6 fl6;
1419         struct in6_addr gateway;
1420 };
1421
1422 static struct rt6_info *__ip6_route_redirect(struct net *net,
1423                                              struct fib6_table *table,
1424                                              struct flowi6 *fl6,
1425                                              int flags)
1426 {
1427         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1428         struct rt6_info *rt;
1429         struct fib6_node *fn;
1430
1431         /* Get the "current" route for this destination and
1432          * check if the redirect has come from approriate router.
1433          *
1434          * RFC 4861 specifies that redirects should only be
1435          * accepted if they come from the nexthop to the target.
1436          * Due to the way the routes are chosen, this notion
1437          * is a bit fuzzy and one might need to check all possible
1438          * routes.
1439          */
1440
1441         read_lock_bh(&table->tb6_lock);
1442         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1443 restart:
1444         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1445                 if (rt6_check_expired(rt))
1446                         continue;
1447                 if (rt->dst.error)
1448                         break;
1449                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1450                         continue;
1451                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1452                         continue;
1453                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1454                         continue;
1455                 break;
1456         }
1457
1458         if (!rt)
1459                 rt = net->ipv6.ip6_null_entry;
1460         else if (rt->dst.error) {
1461                 rt = net->ipv6.ip6_null_entry;
1462                 goto out;
1463         }
1464
1465         if (rt == net->ipv6.ip6_null_entry) {
1466                 fn = fib6_backtrack(fn, &fl6->saddr);
1467                 if (fn)
1468                         goto restart;
1469         }
1470
1471 out:
1472         dst_hold(&rt->dst);
1473
1474         read_unlock_bh(&table->tb6_lock);
1475
1476         return rt;
1477 };
1478
1479 static struct dst_entry *ip6_route_redirect(struct net *net,
1480                                         const struct flowi6 *fl6,
1481                                         const struct in6_addr *gateway)
1482 {
1483         int flags = RT6_LOOKUP_F_HAS_SADDR;
1484         struct ip6rd_flowi rdfl;
1485
1486         rdfl.fl6 = *fl6;
1487         rdfl.gateway = *gateway;
1488
1489         return fib6_rule_lookup(net, &rdfl.fl6,
1490                                 flags, __ip6_route_redirect);
1491 }
1492
1493 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1494 {
1495         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1496         struct dst_entry *dst;
1497         struct flowi6 fl6;
1498
1499         memset(&fl6, 0, sizeof(fl6));
1500         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1501         fl6.flowi6_oif = oif;
1502         fl6.flowi6_mark = mark;
1503         fl6.daddr = iph->daddr;
1504         fl6.saddr = iph->saddr;
1505         fl6.flowlabel = ip6_flowinfo(iph);
1506
1507         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1508         rt6_do_redirect(dst, NULL, skb);
1509         dst_release(dst);
1510 }
1511 EXPORT_SYMBOL_GPL(ip6_redirect);
1512
1513 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1514                             u32 mark)
1515 {
1516         const struct ipv6hdr *iph = ipv6_hdr(skb);
1517         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1518         struct dst_entry *dst;
1519         struct flowi6 fl6;
1520
1521         memset(&fl6, 0, sizeof(fl6));
1522         fl6.flowi6_iif = LOOPBACK_IFINDEX;
1523         fl6.flowi6_oif = oif;
1524         fl6.flowi6_mark = mark;
1525         fl6.daddr = msg->dest;
1526         fl6.saddr = iph->daddr;
1527
1528         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1529         rt6_do_redirect(dst, NULL, skb);
1530         dst_release(dst);
1531 }
1532
1533 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1534 {
1535         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1536 }
1537 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1538
1539 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1540 {
1541         struct net_device *dev = dst->dev;
1542         unsigned int mtu = dst_mtu(dst);
1543         struct net *net = dev_net(dev);
1544
1545         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1546
1547         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1548                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1549
1550         /*
1551          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1552          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1553          * IPV6_MAXPLEN is also valid and means: "any MSS,
1554          * rely only on pmtu discovery"
1555          */
1556         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1557                 mtu = IPV6_MAXPLEN;
1558         return mtu;
1559 }
1560
1561 static unsigned int ip6_mtu(const struct dst_entry *dst)
1562 {
1563         const struct rt6_info *rt = (const struct rt6_info *)dst;
1564         unsigned int mtu = rt->rt6i_pmtu;
1565         struct inet6_dev *idev;
1566
1567         if (mtu)
1568                 goto out;
1569
1570         mtu = dst_metric_raw(dst, RTAX_MTU);
1571         if (mtu)
1572                 goto out;
1573
1574         mtu = IPV6_MIN_MTU;
1575
1576         rcu_read_lock();
1577         idev = __in6_dev_get(dst->dev);
1578         if (idev)
1579                 mtu = idev->cnf.mtu6;
1580         rcu_read_unlock();
1581
1582 out:
1583         return min_t(unsigned int, mtu, IP6_MAX_MTU);
1584 }
1585
1586 static struct dst_entry *icmp6_dst_gc_list;
1587 static DEFINE_SPINLOCK(icmp6_dst_lock);
1588
1589 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1590                                   struct flowi6 *fl6)
1591 {
1592         struct dst_entry *dst;
1593         struct rt6_info *rt;
1594         struct inet6_dev *idev = in6_dev_get(dev);
1595         struct net *net = dev_net(dev);
1596
1597         if (unlikely(!idev))
1598                 return ERR_PTR(-ENODEV);
1599
1600         rt = ip6_dst_alloc(net, dev, 0);
1601         if (unlikely(!rt)) {
1602                 in6_dev_put(idev);
1603                 dst = ERR_PTR(-ENOMEM);
1604                 goto out;
1605         }
1606
1607         rt->dst.flags |= DST_HOST;
1608         rt->dst.output  = ip6_output;
1609         atomic_set(&rt->dst.__refcnt, 1);
1610         rt->rt6i_gateway  = fl6->daddr;
1611         rt->rt6i_dst.addr = fl6->daddr;
1612         rt->rt6i_dst.plen = 128;
1613         rt->rt6i_idev     = idev;
1614         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1615
1616         spin_lock_bh(&icmp6_dst_lock);
1617         rt->dst.next = icmp6_dst_gc_list;
1618         icmp6_dst_gc_list = &rt->dst;
1619         spin_unlock_bh(&icmp6_dst_lock);
1620
1621         fib6_force_start_gc(net);
1622
1623         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1624
1625 out:
1626         return dst;
1627 }
1628
1629 int icmp6_dst_gc(void)
1630 {
1631         struct dst_entry *dst, **pprev;
1632         int more = 0;
1633
1634         spin_lock_bh(&icmp6_dst_lock);
1635         pprev = &icmp6_dst_gc_list;
1636
1637         while ((dst = *pprev) != NULL) {
1638                 if (!atomic_read(&dst->__refcnt)) {
1639                         *pprev = dst->next;
1640                         dst_free(dst);
1641                 } else {
1642                         pprev = &dst->next;
1643                         ++more;
1644                 }
1645         }
1646
1647         spin_unlock_bh(&icmp6_dst_lock);
1648
1649         return more;
1650 }
1651
1652 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1653                             void *arg)
1654 {
1655         struct dst_entry *dst, **pprev;
1656
1657         spin_lock_bh(&icmp6_dst_lock);
1658         pprev = &icmp6_dst_gc_list;
1659         while ((dst = *pprev) != NULL) {
1660                 struct rt6_info *rt = (struct rt6_info *) dst;
1661                 if (func(rt, arg)) {
1662                         *pprev = dst->next;
1663                         dst_free(dst);
1664                 } else {
1665                         pprev = &dst->next;
1666                 }
1667         }
1668         spin_unlock_bh(&icmp6_dst_lock);
1669 }
1670
1671 static int ip6_dst_gc(struct dst_ops *ops)
1672 {
1673         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1674         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1675         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1676         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1677         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1678         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1679         int entries;
1680
1681         entries = dst_entries_get_fast(ops);
1682         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1683             entries <= rt_max_size)
1684                 goto out;
1685
1686         net->ipv6.ip6_rt_gc_expire++;
1687         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1688         entries = dst_entries_get_slow(ops);
1689         if (entries < ops->gc_thresh)
1690                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1691 out:
1692         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1693         return entries > rt_max_size;
1694 }
1695
1696 static int ip6_convert_metrics(struct mx6_config *mxc,
1697                                const struct fib6_config *cfg)
1698 {
1699         bool ecn_ca = false;
1700         struct nlattr *nla;
1701         int remaining;
1702         u32 *mp;
1703
1704         if (!cfg->fc_mx)
1705                 return 0;
1706
1707         mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1708         if (unlikely(!mp))
1709                 return -ENOMEM;
1710
1711         nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1712                 int type = nla_type(nla);
1713                 u32 val;
1714
1715                 if (!type)
1716                         continue;
1717                 if (unlikely(type > RTAX_MAX))
1718                         goto err;
1719
1720                 if (type == RTAX_CC_ALGO) {
1721                         char tmp[TCP_CA_NAME_MAX];
1722
1723                         nla_strlcpy(tmp, nla, sizeof(tmp));
1724                         val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1725                         if (val == TCP_CA_UNSPEC)
1726                                 goto err;
1727                 } else {
1728                         val = nla_get_u32(nla);
1729                 }
1730                 if (type == RTAX_HOPLIMIT && val > 255)
1731                         val = 255;
1732                 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1733                         goto err;
1734
1735                 mp[type - 1] = val;
1736                 __set_bit(type - 1, mxc->mx_valid);
1737         }
1738
1739         if (ecn_ca) {
1740                 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1741                 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1742         }
1743
1744         mxc->mx = mp;
1745         return 0;
1746  err:
1747         kfree(mp);
1748         return -EINVAL;
1749 }
1750
1751 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1752 {
1753         struct net *net = cfg->fc_nlinfo.nl_net;
1754         struct rt6_info *rt = NULL;
1755         struct net_device *dev = NULL;
1756         struct inet6_dev *idev = NULL;
1757         struct fib6_table *table;
1758         int addr_type;
1759         int err = -EINVAL;
1760
1761         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1762                 goto out;
1763 #ifndef CONFIG_IPV6_SUBTREES
1764         if (cfg->fc_src_len)
1765                 goto out;
1766 #endif
1767         if (cfg->fc_ifindex) {
1768                 err = -ENODEV;
1769                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1770                 if (!dev)
1771                         goto out;
1772                 idev = in6_dev_get(dev);
1773                 if (!idev)
1774                         goto out;
1775         }
1776
1777         if (cfg->fc_metric == 0)
1778                 cfg->fc_metric = IP6_RT_PRIO_USER;
1779
1780         err = -ENOBUFS;
1781         if (cfg->fc_nlinfo.nlh &&
1782             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1783                 table = fib6_get_table(net, cfg->fc_table);
1784                 if (!table) {
1785                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1786                         table = fib6_new_table(net, cfg->fc_table);
1787                 }
1788         } else {
1789                 table = fib6_new_table(net, cfg->fc_table);
1790         }
1791
1792         if (!table)
1793                 goto out;
1794
1795         rt = ip6_dst_alloc(net, NULL,
1796                            (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1797
1798         if (!rt) {
1799                 err = -ENOMEM;
1800                 goto out;
1801         }
1802
1803         if (cfg->fc_flags & RTF_EXPIRES)
1804                 rt6_set_expires(rt, jiffies +
1805                                 clock_t_to_jiffies(cfg->fc_expires));
1806         else
1807                 rt6_clean_expires(rt);
1808
1809         if (cfg->fc_protocol == RTPROT_UNSPEC)
1810                 cfg->fc_protocol = RTPROT_BOOT;
1811         rt->rt6i_protocol = cfg->fc_protocol;
1812
1813         addr_type = ipv6_addr_type(&cfg->fc_dst);
1814
1815         if (addr_type & IPV6_ADDR_MULTICAST)
1816                 rt->dst.input = ip6_mc_input;
1817         else if (cfg->fc_flags & RTF_LOCAL)
1818                 rt->dst.input = ip6_input;
1819         else
1820                 rt->dst.input = ip6_forward;
1821
1822         rt->dst.output = ip6_output;
1823
1824         if (cfg->fc_encap) {
1825                 struct lwtunnel_state *lwtstate;
1826
1827                 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1828                                            cfg->fc_encap, AF_INET6, cfg,
1829                                            &lwtstate);
1830                 if (err)
1831                         goto out;
1832                 rt->dst.lwtstate = lwtstate_get(lwtstate);
1833                 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1834                         rt->dst.lwtstate->orig_output = rt->dst.output;
1835                         rt->dst.output = lwtunnel_output;
1836                 }
1837                 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1838                         rt->dst.lwtstate->orig_input = rt->dst.input;
1839                         rt->dst.input = lwtunnel_input;
1840                 }
1841         }
1842
1843         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1844         rt->rt6i_dst.plen = cfg->fc_dst_len;
1845         if (rt->rt6i_dst.plen == 128)
1846                 rt->dst.flags |= DST_HOST;
1847
1848 #ifdef CONFIG_IPV6_SUBTREES
1849         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1850         rt->rt6i_src.plen = cfg->fc_src_len;
1851 #endif
1852
1853         rt->rt6i_metric = cfg->fc_metric;
1854
1855         /* We cannot add true routes via loopback here,
1856            they would result in kernel looping; promote them to reject routes
1857          */
1858         if ((cfg->fc_flags & RTF_REJECT) ||
1859             (dev && (dev->flags & IFF_LOOPBACK) &&
1860              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1861              !(cfg->fc_flags & RTF_LOCAL))) {
1862                 /* hold loopback dev/idev if we haven't done so. */
1863                 if (dev != net->loopback_dev) {
1864                         if (dev) {
1865                                 dev_put(dev);
1866                                 in6_dev_put(idev);
1867                         }
1868                         dev = net->loopback_dev;
1869                         dev_hold(dev);
1870                         idev = in6_dev_get(dev);
1871                         if (!idev) {
1872                                 err = -ENODEV;
1873                                 goto out;
1874                         }
1875                 }
1876                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1877                 switch (cfg->fc_type) {
1878                 case RTN_BLACKHOLE:
1879                         rt->dst.error = -EINVAL;
1880                         rt->dst.output = dst_discard_out;
1881                         rt->dst.input = dst_discard;
1882                         break;
1883                 case RTN_PROHIBIT:
1884                         rt->dst.error = -EACCES;
1885                         rt->dst.output = ip6_pkt_prohibit_out;
1886                         rt->dst.input = ip6_pkt_prohibit;
1887                         break;
1888                 case RTN_THROW:
1889                 case RTN_UNREACHABLE:
1890                 default:
1891                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1892                                         : (cfg->fc_type == RTN_UNREACHABLE)
1893                                         ? -EHOSTUNREACH : -ENETUNREACH;
1894                         rt->dst.output = ip6_pkt_discard_out;
1895                         rt->dst.input = ip6_pkt_discard;
1896                         break;
1897                 }
1898                 goto install_route;
1899         }
1900
1901         if (cfg->fc_flags & RTF_GATEWAY) {
1902                 const struct in6_addr *gw_addr;
1903                 int gwa_type;
1904
1905                 gw_addr = &cfg->fc_gateway;
1906                 gwa_type = ipv6_addr_type(gw_addr);
1907
1908                 /* if gw_addr is local we will fail to detect this in case
1909                  * address is still TENTATIVE (DAD in progress). rt6_lookup()
1910                  * will return already-added prefix route via interface that
1911                  * prefix route was assigned to, which might be non-loopback.
1912                  */
1913                 err = -EINVAL;
1914                 if (ipv6_chk_addr_and_flags(net, gw_addr,
1915                                             gwa_type & IPV6_ADDR_LINKLOCAL ?
1916                                             dev : NULL, 0, 0))
1917                         goto out;
1918
1919                 rt->rt6i_gateway = *gw_addr;
1920
1921                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1922                         struct rt6_info *grt;
1923
1924                         /* IPv6 strictly inhibits using not link-local
1925                            addresses as nexthop address.
1926                            Otherwise, router will not able to send redirects.
1927                            It is very good, but in some (rare!) circumstances
1928                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1929                            some exceptions. --ANK
1930                          */
1931                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1932                                 goto out;
1933
1934                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1935
1936                         err = -EHOSTUNREACH;
1937                         if (!grt)
1938                                 goto out;
1939                         if (dev) {
1940                                 if (dev != grt->dst.dev) {
1941                                         ip6_rt_put(grt);
1942                                         goto out;
1943                                 }
1944                         } else {
1945                                 dev = grt->dst.dev;
1946                                 idev = grt->rt6i_idev;
1947                                 dev_hold(dev);
1948                                 in6_dev_hold(grt->rt6i_idev);
1949                         }
1950                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1951                                 err = 0;
1952                         ip6_rt_put(grt);
1953
1954                         if (err)
1955                                 goto out;
1956                 }
1957                 err = -EINVAL;
1958                 if (!dev || (dev->flags & IFF_LOOPBACK))
1959                         goto out;
1960         }
1961
1962         err = -ENODEV;
1963         if (!dev)
1964                 goto out;
1965
1966         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1967                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1968                         err = -EINVAL;
1969                         goto out;
1970                 }
1971                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1972                 rt->rt6i_prefsrc.plen = 128;
1973         } else
1974                 rt->rt6i_prefsrc.plen = 0;
1975
1976         rt->rt6i_flags = cfg->fc_flags;
1977
1978 install_route:
1979         rt->dst.dev = dev;
1980         rt->rt6i_idev = idev;
1981         rt->rt6i_table = table;
1982
1983         cfg->fc_nlinfo.nl_net = dev_net(dev);
1984
1985         return rt;
1986 out:
1987         if (dev)
1988                 dev_put(dev);
1989         if (idev)
1990                 in6_dev_put(idev);
1991         if (rt)
1992                 dst_free(&rt->dst);
1993
1994         return ERR_PTR(err);
1995 }
1996
1997 int ip6_route_add(struct fib6_config *cfg)
1998 {
1999         struct mx6_config mxc = { .mx = NULL, };
2000         struct rt6_info *rt;
2001         int err;
2002
2003         rt = ip6_route_info_create(cfg);
2004         if (IS_ERR(rt)) {
2005                 err = PTR_ERR(rt);
2006                 rt = NULL;
2007                 goto out;
2008         }
2009
2010         err = ip6_convert_metrics(&mxc, cfg);
2011         if (err)
2012                 goto out;
2013
2014         err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2015
2016         kfree(mxc.mx);
2017
2018         return err;
2019 out:
2020         if (rt)
2021                 dst_free(&rt->dst);
2022
2023         return err;
2024 }
2025
2026 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2027 {
2028         int err;
2029         struct fib6_table *table;
2030         struct net *net = dev_net(rt->dst.dev);
2031
2032         if (rt == net->ipv6.ip6_null_entry ||
2033             rt->dst.flags & DST_NOCACHE) {
2034                 err = -ENOENT;
2035                 goto out;
2036         }
2037
2038         table = rt->rt6i_table;
2039         write_lock_bh(&table->tb6_lock);
2040         err = fib6_del(rt, info);
2041         write_unlock_bh(&table->tb6_lock);
2042
2043 out:
2044         ip6_rt_put(rt);
2045         return err;
2046 }
2047
2048 int ip6_del_rt(struct rt6_info *rt)
2049 {
2050         struct nl_info info = {
2051                 .nl_net = dev_net(rt->dst.dev),
2052         };
2053         return __ip6_del_rt(rt, &info);
2054 }
2055
2056 static int ip6_route_del(struct fib6_config *cfg)
2057 {
2058         struct fib6_table *table;
2059         struct fib6_node *fn;
2060         struct rt6_info *rt;
2061         int err = -ESRCH;
2062
2063         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2064         if (!table)
2065                 return err;
2066
2067         read_lock_bh(&table->tb6_lock);
2068
2069         fn = fib6_locate(&table->tb6_root,
2070                          &cfg->fc_dst, cfg->fc_dst_len,
2071                          &cfg->fc_src, cfg->fc_src_len);
2072
2073         if (fn) {
2074                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2075                         if ((rt->rt6i_flags & RTF_CACHE) &&
2076                             !(cfg->fc_flags & RTF_CACHE))
2077                                 continue;
2078                         if (cfg->fc_ifindex &&
2079                             (!rt->dst.dev ||
2080                              rt->dst.dev->ifindex != cfg->fc_ifindex))
2081                                 continue;
2082                         if (cfg->fc_flags & RTF_GATEWAY &&
2083                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2084                                 continue;
2085                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2086                                 continue;
2087                         dst_hold(&rt->dst);
2088                         read_unlock_bh(&table->tb6_lock);
2089
2090                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2091                 }
2092         }
2093         read_unlock_bh(&table->tb6_lock);
2094
2095         return err;
2096 }
2097
2098 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2099 {
2100         struct netevent_redirect netevent;
2101         struct rt6_info *rt, *nrt = NULL;
2102         struct ndisc_options ndopts;
2103         struct inet6_dev *in6_dev;
2104         struct neighbour *neigh;
2105         struct rd_msg *msg;
2106         int optlen, on_link;
2107         u8 *lladdr;
2108
2109         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2110         optlen -= sizeof(*msg);
2111
2112         if (optlen < 0) {
2113                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2114                 return;
2115         }
2116
2117         msg = (struct rd_msg *)icmp6_hdr(skb);
2118
2119         if (ipv6_addr_is_multicast(&msg->dest)) {
2120                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2121                 return;
2122         }
2123
2124         on_link = 0;
2125         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2126                 on_link = 1;
2127         } else if (ipv6_addr_type(&msg->target) !=
2128                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2129                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2130                 return;
2131         }
2132
2133         in6_dev = __in6_dev_get(skb->dev);
2134         if (!in6_dev)
2135                 return;
2136         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2137                 return;
2138
2139         /* RFC2461 8.1:
2140          *      The IP source address of the Redirect MUST be the same as the current
2141          *      first-hop router for the specified ICMP Destination Address.
2142          */
2143
2144         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2145                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2146                 return;
2147         }
2148
2149         lladdr = NULL;
2150         if (ndopts.nd_opts_tgt_lladdr) {
2151                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2152                                              skb->dev);
2153                 if (!lladdr) {
2154                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2155                         return;
2156                 }
2157         }
2158
2159         rt = (struct rt6_info *) dst;
2160         if (rt->rt6i_flags & RTF_REJECT) {
2161                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2162                 return;
2163         }
2164
2165         /* Redirect received -> path was valid.
2166          * Look, redirects are sent only in response to data packets,
2167          * so that this nexthop apparently is reachable. --ANK
2168          */
2169         dst_confirm(&rt->dst);
2170
2171         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2172         if (!neigh)
2173                 return;
2174
2175         /*
2176          *      We have finally decided to accept it.
2177          */
2178
2179         neigh_update(neigh, lladdr, NUD_STALE,
2180                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
2181                      NEIGH_UPDATE_F_OVERRIDE|
2182                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2183                                      NEIGH_UPDATE_F_ISROUTER))
2184                      );
2185
2186         nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2187         if (!nrt)
2188                 goto out;
2189
2190         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2191         if (on_link)
2192                 nrt->rt6i_flags &= ~RTF_GATEWAY;
2193
2194         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2195
2196         if (ip6_ins_rt(nrt))
2197                 goto out;
2198
2199         netevent.old = &rt->dst;
2200         netevent.new = &nrt->dst;
2201         netevent.daddr = &msg->dest;
2202         netevent.neigh = neigh;
2203         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2204
2205         if (rt->rt6i_flags & RTF_CACHE) {
2206                 rt = (struct rt6_info *) dst_clone(&rt->dst);
2207                 ip6_del_rt(rt);
2208         }
2209
2210 out:
2211         neigh_release(neigh);
2212 }
2213
2214 /*
2215  *      Misc support functions
2216  */
2217
2218 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2219 {
2220         BUG_ON(from->dst.from);
2221
2222         rt->rt6i_flags &= ~RTF_EXPIRES;
2223         dst_hold(&from->dst);
2224         rt->dst.from = &from->dst;
2225         dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2226 }
2227
2228 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2229 {
2230         rt->dst.input = ort->dst.input;
2231         rt->dst.output = ort->dst.output;
2232         rt->rt6i_dst = ort->rt6i_dst;
2233         rt->dst.error = ort->dst.error;
2234         rt->rt6i_idev = ort->rt6i_idev;
2235         if (rt->rt6i_idev)
2236                 in6_dev_hold(rt->rt6i_idev);
2237         rt->dst.lastuse = jiffies;
2238         rt->rt6i_gateway = ort->rt6i_gateway;
2239         rt->rt6i_flags = ort->rt6i_flags;
2240         rt6_set_from(rt, ort);
2241         rt->rt6i_metric = ort->rt6i_metric;
2242 #ifdef CONFIG_IPV6_SUBTREES
2243         rt->rt6i_src = ort->rt6i_src;
2244 #endif
2245         rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2246         rt->rt6i_table = ort->rt6i_table;
2247         rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2248 }
2249
2250 #ifdef CONFIG_IPV6_ROUTE_INFO
2251 static struct rt6_info *rt6_get_route_info(struct net *net,
2252                                            const struct in6_addr *prefix, int prefixlen,
2253                                            const struct in6_addr *gwaddr, int ifindex)
2254 {
2255         struct fib6_node *fn;
2256         struct rt6_info *rt = NULL;
2257         struct fib6_table *table;
2258
2259         table = fib6_get_table(net, RT6_TABLE_INFO);
2260         if (!table)
2261                 return NULL;
2262
2263         read_lock_bh(&table->tb6_lock);
2264         fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2265         if (!fn)
2266                 goto out;
2267
2268         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2269                 if (rt->dst.dev->ifindex != ifindex)
2270                         continue;
2271                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2272                         continue;
2273                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2274                         continue;
2275                 dst_hold(&rt->dst);
2276                 break;
2277         }
2278 out:
2279         read_unlock_bh(&table->tb6_lock);
2280         return rt;
2281 }
2282
2283 static struct rt6_info *rt6_add_route_info(struct net *net,
2284                                            const struct in6_addr *prefix, int prefixlen,
2285                                            const struct in6_addr *gwaddr, int ifindex,
2286                                            unsigned int pref)
2287 {
2288         struct fib6_config cfg = {
2289                 .fc_metric      = IP6_RT_PRIO_USER,
2290                 .fc_ifindex     = ifindex,
2291                 .fc_dst_len     = prefixlen,
2292                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2293                                   RTF_UP | RTF_PREF(pref),
2294                 .fc_nlinfo.portid = 0,
2295                 .fc_nlinfo.nlh = NULL,
2296                 .fc_nlinfo.nl_net = net,
2297         };
2298
2299         cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
2300         cfg.fc_dst = *prefix;
2301         cfg.fc_gateway = *gwaddr;
2302
2303         /* We should treat it as a default route if prefix length is 0. */
2304         if (!prefixlen)
2305                 cfg.fc_flags |= RTF_DEFAULT;
2306
2307         ip6_route_add(&cfg);
2308
2309         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
2310 }
2311 #endif
2312
2313 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2314 {
2315         struct rt6_info *rt;
2316         struct fib6_table *table;
2317
2318         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2319         if (!table)
2320                 return NULL;
2321
2322         read_lock_bh(&table->tb6_lock);
2323         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2324                 if (dev == rt->dst.dev &&
2325                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2326                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
2327                         break;
2328         }
2329         if (rt)
2330                 dst_hold(&rt->dst);
2331         read_unlock_bh(&table->tb6_lock);
2332         return rt;
2333 }
2334
2335 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2336                                      struct net_device *dev,
2337                                      unsigned int pref)
2338 {
2339         struct fib6_config cfg = {
2340                 .fc_table       = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2341                 .fc_metric      = IP6_RT_PRIO_USER,
2342                 .fc_ifindex     = dev->ifindex,
2343                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2344                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2345                 .fc_nlinfo.portid = 0,
2346                 .fc_nlinfo.nlh = NULL,
2347                 .fc_nlinfo.nl_net = dev_net(dev),
2348         };
2349
2350         cfg.fc_gateway = *gwaddr;
2351
2352         ip6_route_add(&cfg);
2353
2354         return rt6_get_dflt_router(gwaddr, dev);
2355 }
2356
2357 void rt6_purge_dflt_routers(struct net *net)
2358 {
2359         struct rt6_info *rt;
2360         struct fib6_table *table;
2361
2362         /* NOTE: Keep consistent with rt6_get_dflt_router */
2363         table = fib6_get_table(net, RT6_TABLE_DFLT);
2364         if (!table)
2365                 return;
2366
2367 restart:
2368         read_lock_bh(&table->tb6_lock);
2369         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2370                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2371                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2372                         dst_hold(&rt->dst);
2373                         read_unlock_bh(&table->tb6_lock);
2374                         ip6_del_rt(rt);
2375                         goto restart;
2376                 }
2377         }
2378         read_unlock_bh(&table->tb6_lock);
2379 }
2380
2381 static void rtmsg_to_fib6_config(struct net *net,
2382                                  struct in6_rtmsg *rtmsg,
2383                                  struct fib6_config *cfg)
2384 {
2385         memset(cfg, 0, sizeof(*cfg));
2386
2387         cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2388                          : RT6_TABLE_MAIN;
2389         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2390         cfg->fc_metric = rtmsg->rtmsg_metric;
2391         cfg->fc_expires = rtmsg->rtmsg_info;
2392         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2393         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2394         cfg->fc_flags = rtmsg->rtmsg_flags;
2395
2396         cfg->fc_nlinfo.nl_net = net;
2397
2398         cfg->fc_dst = rtmsg->rtmsg_dst;
2399         cfg->fc_src = rtmsg->rtmsg_src;
2400         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2401 }
2402
2403 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2404 {
2405         struct fib6_config cfg;
2406         struct in6_rtmsg rtmsg;
2407         int err;
2408
2409         switch (cmd) {
2410         case SIOCADDRT:         /* Add a route */
2411         case SIOCDELRT:         /* Delete a route */
2412                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2413                         return -EPERM;
2414                 err = copy_from_user(&rtmsg, arg,
2415                                      sizeof(struct in6_rtmsg));
2416                 if (err)
2417                         return -EFAULT;
2418
2419                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2420
2421                 rtnl_lock();
2422                 switch (cmd) {
2423                 case SIOCADDRT:
2424                         err = ip6_route_add(&cfg);
2425                         break;
2426                 case SIOCDELRT:
2427                         err = ip6_route_del(&cfg);
2428                         break;
2429                 default:
2430                         err = -EINVAL;
2431                 }
2432                 rtnl_unlock();
2433
2434                 return err;
2435         }
2436
2437         return -EINVAL;
2438 }
2439
2440 /*
2441  *      Drop the packet on the floor
2442  */
2443
2444 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2445 {
2446         int type;
2447         struct dst_entry *dst = skb_dst(skb);
2448         switch (ipstats_mib_noroutes) {
2449         case IPSTATS_MIB_INNOROUTES:
2450                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2451                 if (type == IPV6_ADDR_ANY) {
2452                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2453                                       IPSTATS_MIB_INADDRERRORS);
2454                         break;
2455                 }
2456                 /* FALLTHROUGH */
2457         case IPSTATS_MIB_OUTNOROUTES:
2458                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2459                               ipstats_mib_noroutes);
2460                 break;
2461         }
2462         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2463         kfree_skb(skb);
2464         return 0;
2465 }
2466
2467 static int ip6_pkt_discard(struct sk_buff *skb)
2468 {
2469         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2470 }
2471
2472 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2473 {
2474         skb->dev = skb_dst(skb)->dev;
2475         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2476 }
2477
2478 static int ip6_pkt_prohibit(struct sk_buff *skb)
2479 {
2480         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2481 }
2482
2483 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2484 {
2485         skb->dev = skb_dst(skb)->dev;
2486         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2487 }
2488
2489 /*
2490  *      Allocate a dst for local (unicast / anycast) address.
2491  */
2492
2493 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2494                                     const struct in6_addr *addr,
2495                                     bool anycast)
2496 {
2497         u32 tb_id;
2498         struct net *net = dev_net(idev->dev);
2499         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2500                                             DST_NOCOUNT);
2501         if (!rt)
2502                 return ERR_PTR(-ENOMEM);
2503
2504         in6_dev_hold(idev);
2505
2506         rt->dst.flags |= DST_HOST;
2507         rt->dst.input = ip6_input;
2508         rt->dst.output = ip6_output;
2509         rt->rt6i_idev = idev;
2510
2511         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2512         if (anycast)
2513                 rt->rt6i_flags |= RTF_ANYCAST;
2514         else
2515                 rt->rt6i_flags |= RTF_LOCAL;
2516
2517         rt->rt6i_gateway  = *addr;
2518         rt->rt6i_dst.addr = *addr;
2519         rt->rt6i_dst.plen = 128;
2520         tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2521         rt->rt6i_table = fib6_get_table(net, tb_id);
2522         rt->dst.flags |= DST_NOCACHE;
2523
2524         atomic_set(&rt->dst.__refcnt, 1);
2525
2526         return rt;
2527 }
2528
2529 int ip6_route_get_saddr(struct net *net,
2530                         struct rt6_info *rt,
2531                         const struct in6_addr *daddr,
2532                         unsigned int prefs,
2533                         struct in6_addr *saddr)
2534 {
2535         struct inet6_dev *idev =
2536                 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2537         int err = 0;
2538         if (rt && rt->rt6i_prefsrc.plen)
2539                 *saddr = rt->rt6i_prefsrc.addr;
2540         else
2541                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2542                                          daddr, prefs, saddr);
2543         return err;
2544 }
2545
2546 /* remove deleted ip from prefsrc entries */
2547 struct arg_dev_net_ip {
2548         struct net_device *dev;
2549         struct net *net;
2550         struct in6_addr *addr;
2551 };
2552
2553 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2554 {
2555         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2556         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2557         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2558
2559         if (((void *)rt->dst.dev == dev || !dev) &&
2560             rt != net->ipv6.ip6_null_entry &&
2561             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2562                 /* remove prefsrc entry */
2563                 rt->rt6i_prefsrc.plen = 0;
2564         }
2565         return 0;
2566 }
2567
2568 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2569 {
2570         struct net *net = dev_net(ifp->idev->dev);
2571         struct arg_dev_net_ip adni = {
2572                 .dev = ifp->idev->dev,
2573                 .net = net,
2574                 .addr = &ifp->addr,
2575         };
2576         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2577 }
2578
2579 #define RTF_RA_ROUTER           (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2580 #define RTF_CACHE_GATEWAY       (RTF_GATEWAY | RTF_CACHE)
2581
2582 /* Remove routers and update dst entries when gateway turn into host. */
2583 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2584 {
2585         struct in6_addr *gateway = (struct in6_addr *)arg;
2586
2587         if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2588              ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2589              ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2590                 return -1;
2591         }
2592         return 0;
2593 }
2594
2595 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2596 {
2597         fib6_clean_all(net, fib6_clean_tohost, gateway);
2598 }
2599
2600 struct arg_dev_net {
2601         struct net_device *dev;
2602         struct net *net;
2603 };
2604
2605 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2606 {
2607         const struct arg_dev_net *adn = arg;
2608         const struct net_device *dev = adn->dev;
2609
2610         if ((rt->dst.dev == dev || !dev) &&
2611             rt != adn->net->ipv6.ip6_null_entry)
2612                 return -1;
2613
2614         return 0;
2615 }
2616
2617 void rt6_ifdown(struct net *net, struct net_device *dev)
2618 {
2619         struct arg_dev_net adn = {
2620                 .dev = dev,
2621                 .net = net,
2622         };
2623
2624         fib6_clean_all(net, fib6_ifdown, &adn);
2625         icmp6_clean_all(fib6_ifdown, &adn);
2626         if (dev)
2627                 rt6_uncached_list_flush_dev(net, dev);
2628 }
2629
2630 struct rt6_mtu_change_arg {
2631         struct net_device *dev;
2632         unsigned int mtu;
2633 };
2634
2635 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2636 {
2637         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2638         struct inet6_dev *idev;
2639
2640         /* In IPv6 pmtu discovery is not optional,
2641            so that RTAX_MTU lock cannot disable it.
2642            We still use this lock to block changes
2643            caused by addrconf/ndisc.
2644         */
2645
2646         idev = __in6_dev_get(arg->dev);
2647         if (!idev)
2648                 return 0;
2649
2650         /* For administrative MTU increase, there is no way to discover
2651            IPv6 PMTU increase, so PMTU increase should be updated here.
2652            Since RFC 1981 doesn't include administrative MTU increase
2653            update PMTU increase is a MUST. (i.e. jumbo frame)
2654          */
2655         /*
2656            If new MTU is less than route PMTU, this new MTU will be the
2657            lowest MTU in the path, update the route PMTU to reflect PMTU
2658            decreases; if new MTU is greater than route PMTU, and the
2659            old MTU is the lowest MTU in the path, update the route PMTU
2660            to reflect the increase. In this case if the other nodes' MTU
2661            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2662            PMTU discouvery.
2663          */
2664         if (rt->dst.dev == arg->dev &&
2665             !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2666                 if (rt->rt6i_flags & RTF_CACHE) {
2667                         /* For RTF_CACHE with rt6i_pmtu == 0
2668                          * (i.e. a redirected route),
2669                          * the metrics of its rt->dst.from has already
2670                          * been updated.
2671                          */
2672                         if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2673                                 rt->rt6i_pmtu = arg->mtu;
2674                 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2675                            (dst_mtu(&rt->dst) < arg->mtu &&
2676                             dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2677                         dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2678                 }
2679         }
2680         return 0;
2681 }
2682
2683 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2684 {
2685         struct rt6_mtu_change_arg arg = {
2686                 .dev = dev,
2687                 .mtu = mtu,
2688         };
2689
2690         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2691 }
2692
2693 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2694         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2695         [RTA_OIF]               = { .type = NLA_U32 },
2696         [RTA_IIF]               = { .type = NLA_U32 },
2697         [RTA_PRIORITY]          = { .type = NLA_U32 },
2698         [RTA_METRICS]           = { .type = NLA_NESTED },
2699         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2700         [RTA_PREF]              = { .type = NLA_U8 },
2701         [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
2702         [RTA_ENCAP]             = { .type = NLA_NESTED },
2703 };
2704
2705 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2706                               struct fib6_config *cfg)
2707 {
2708         struct rtmsg *rtm;
2709         struct nlattr *tb[RTA_MAX+1];
2710         unsigned int pref;
2711         int err;
2712
2713         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2714         if (err < 0)
2715                 goto errout;
2716
2717         err = -EINVAL;
2718         rtm = nlmsg_data(nlh);
2719         memset(cfg, 0, sizeof(*cfg));
2720
2721         cfg->fc_table = rtm->rtm_table;
2722         cfg->fc_dst_len = rtm->rtm_dst_len;
2723         cfg->fc_src_len = rtm->rtm_src_len;
2724         cfg->fc_flags = RTF_UP;
2725         cfg->fc_protocol = rtm->rtm_protocol;
2726         cfg->fc_type = rtm->rtm_type;
2727
2728         if (rtm->rtm_type == RTN_UNREACHABLE ||
2729             rtm->rtm_type == RTN_BLACKHOLE ||
2730             rtm->rtm_type == RTN_PROHIBIT ||
2731             rtm->rtm_type == RTN_THROW)
2732                 cfg->fc_flags |= RTF_REJECT;
2733
2734         if (rtm->rtm_type == RTN_LOCAL)
2735                 cfg->fc_flags |= RTF_LOCAL;
2736
2737         if (rtm->rtm_flags & RTM_F_CLONED)
2738                 cfg->fc_flags |= RTF_CACHE;
2739
2740         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2741         cfg->fc_nlinfo.nlh = nlh;
2742         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2743
2744         if (tb[RTA_GATEWAY]) {
2745                 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2746                 cfg->fc_flags |= RTF_GATEWAY;
2747         }
2748
2749         if (tb[RTA_DST]) {
2750                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2751
2752                 if (nla_len(tb[RTA_DST]) < plen)
2753                         goto errout;
2754
2755                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2756         }
2757
2758         if (tb[RTA_SRC]) {
2759                 int plen = (rtm->rtm_src_len + 7) >> 3;
2760
2761                 if (nla_len(tb[RTA_SRC]) < plen)
2762                         goto errout;
2763
2764                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2765         }
2766
2767         if (tb[RTA_PREFSRC])
2768                 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2769
2770         if (tb[RTA_OIF])
2771                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2772
2773         if (tb[RTA_PRIORITY])
2774                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2775
2776         if (tb[RTA_METRICS]) {
2777                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2778                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2779         }
2780
2781         if (tb[RTA_TABLE])
2782                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2783
2784         if (tb[RTA_MULTIPATH]) {
2785                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2786                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2787         }
2788
2789         if (tb[RTA_PREF]) {
2790                 pref = nla_get_u8(tb[RTA_PREF]);
2791                 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2792                     pref != ICMPV6_ROUTER_PREF_HIGH)
2793                         pref = ICMPV6_ROUTER_PREF_MEDIUM;
2794                 cfg->fc_flags |= RTF_PREF(pref);
2795         }
2796
2797         if (tb[RTA_ENCAP])
2798                 cfg->fc_encap = tb[RTA_ENCAP];
2799
2800         if (tb[RTA_ENCAP_TYPE])
2801                 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2802
2803         err = 0;
2804 errout:
2805         return err;
2806 }
2807
2808 struct rt6_nh {
2809         struct rt6_info *rt6_info;
2810         struct fib6_config r_cfg;
2811         struct mx6_config mxc;
2812         struct list_head next;
2813 };
2814
2815 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2816 {
2817         struct rt6_nh *nh;
2818
2819         list_for_each_entry(nh, rt6_nh_list, next) {
2820                 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2821                         &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2822                         nh->r_cfg.fc_ifindex);
2823         }
2824 }
2825
2826 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2827                                  struct rt6_info *rt, struct fib6_config *r_cfg)
2828 {
2829         struct rt6_nh *nh;
2830         struct rt6_info *rtnh;
2831         int err = -EEXIST;
2832
2833         list_for_each_entry(nh, rt6_nh_list, next) {
2834                 /* check if rt6_info already exists */
2835                 rtnh = nh->rt6_info;
2836
2837                 if (rtnh->dst.dev == rt->dst.dev &&
2838                     rtnh->rt6i_idev == rt->rt6i_idev &&
2839                     ipv6_addr_equal(&rtnh->rt6i_gateway,
2840                                     &rt->rt6i_gateway))
2841                         return err;
2842         }
2843
2844         nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2845         if (!nh)
2846                 return -ENOMEM;
2847         nh->rt6_info = rt;
2848         err = ip6_convert_metrics(&nh->mxc, r_cfg);
2849         if (err) {
2850                 kfree(nh);
2851                 return err;
2852         }
2853         memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2854         list_add_tail(&nh->next, rt6_nh_list);
2855
2856         return 0;
2857 }
2858
2859 static int ip6_route_multipath_add(struct fib6_config *cfg)
2860 {
2861         struct fib6_config r_cfg;
2862         struct rtnexthop *rtnh;
2863         struct rt6_info *rt;
2864         struct rt6_nh *err_nh;
2865         struct rt6_nh *nh, *nh_safe;
2866         int remaining;
2867         int attrlen;
2868         int err = 1;
2869         int nhn = 0;
2870         int replace = (cfg->fc_nlinfo.nlh &&
2871                        (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2872         LIST_HEAD(rt6_nh_list);
2873
2874         remaining = cfg->fc_mp_len;
2875         rtnh = (struct rtnexthop *)cfg->fc_mp;
2876
2877         /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2878          * rt6_info structs per nexthop
2879          */
2880         while (rtnh_ok(rtnh, remaining)) {
2881                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2882                 if (rtnh->rtnh_ifindex)
2883                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2884
2885                 attrlen = rtnh_attrlen(rtnh);
2886                 if (attrlen > 0) {
2887                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2888
2889                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2890                         if (nla) {
2891                                 r_cfg.fc_gateway = nla_get_in6_addr(nla);
2892                                 r_cfg.fc_flags |= RTF_GATEWAY;
2893                         }
2894                         r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2895                         nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2896                         if (nla)
2897                                 r_cfg.fc_encap_type = nla_get_u16(nla);
2898                 }
2899
2900                 rt = ip6_route_info_create(&r_cfg);
2901                 if (IS_ERR(rt)) {
2902                         err = PTR_ERR(rt);
2903                         rt = NULL;
2904                         goto cleanup;
2905                 }
2906
2907                 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
2908                 if (err) {
2909                         dst_free(&rt->dst);
2910                         goto cleanup;
2911                 }
2912
2913                 rtnh = rtnh_next(rtnh, &remaining);
2914         }
2915
2916         err_nh = NULL;
2917         list_for_each_entry(nh, &rt6_nh_list, next) {
2918                 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2919                 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2920                 nh->rt6_info = NULL;
2921                 if (err) {
2922                         if (replace && nhn)
2923                                 ip6_print_replace_route_err(&rt6_nh_list);
2924                         err_nh = nh;
2925                         goto add_errout;
2926                 }
2927
2928                 /* Because each route is added like a single route we remove
2929                  * these flags after the first nexthop: if there is a collision,
2930                  * we have already failed to add the first nexthop:
2931                  * fib6_add_rt2node() has rejected it; when replacing, old
2932                  * nexthops have been replaced by first new, the rest should
2933                  * be added to it.
2934                  */
2935                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2936                                                      NLM_F_REPLACE);
2937                 nhn++;
2938         }
2939
2940         goto cleanup;
2941
2942 add_errout:
2943         /* Delete routes that were already added */
2944         list_for_each_entry(nh, &rt6_nh_list, next) {
2945                 if (err_nh == nh)
2946                         break;
2947                 ip6_route_del(&nh->r_cfg);
2948         }
2949
2950 cleanup:
2951         list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2952                 if (nh->rt6_info)
2953                         dst_free(&nh->rt6_info->dst);
2954                 kfree(nh->mxc.mx);
2955                 list_del(&nh->next);
2956                 kfree(nh);
2957         }
2958
2959         return err;
2960 }
2961
2962 static int ip6_route_multipath_del(struct fib6_config *cfg)
2963 {
2964         struct fib6_config r_cfg;
2965         struct rtnexthop *rtnh;
2966         int remaining;
2967         int attrlen;
2968         int err = 1, last_err = 0;
2969
2970         remaining = cfg->fc_mp_len;
2971         rtnh = (struct rtnexthop *)cfg->fc_mp;
2972
2973         /* Parse a Multipath Entry */
2974         while (rtnh_ok(rtnh, remaining)) {
2975                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2976                 if (rtnh->rtnh_ifindex)
2977                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2978
2979                 attrlen = rtnh_attrlen(rtnh);
2980                 if (attrlen > 0) {
2981                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2982
2983                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2984                         if (nla) {
2985                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2986                                 r_cfg.fc_flags |= RTF_GATEWAY;
2987                         }
2988                 }
2989                 err = ip6_route_del(&r_cfg);
2990                 if (err)
2991                         last_err = err;
2992
2993                 rtnh = rtnh_next(rtnh, &remaining);
2994         }
2995
2996         return last_err;
2997 }
2998
2999 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3000 {
3001         struct fib6_config cfg;
3002         int err;
3003
3004         err = rtm_to_fib6_config(skb, nlh, &cfg);
3005         if (err < 0)
3006                 return err;
3007
3008         if (cfg.fc_mp)
3009                 return ip6_route_multipath_del(&cfg);
3010         else
3011                 return ip6_route_del(&cfg);
3012 }
3013
3014 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3015 {
3016         struct fib6_config cfg;
3017         int err;
3018
3019         err = rtm_to_fib6_config(skb, nlh, &cfg);
3020         if (err < 0)
3021                 return err;
3022
3023         if (cfg.fc_mp)
3024                 return ip6_route_multipath_add(&cfg);
3025         else
3026                 return ip6_route_add(&cfg);
3027 }
3028
3029 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3030 {
3031         return NLMSG_ALIGN(sizeof(struct rtmsg))
3032                + nla_total_size(16) /* RTA_SRC */
3033                + nla_total_size(16) /* RTA_DST */
3034                + nla_total_size(16) /* RTA_GATEWAY */
3035                + nla_total_size(16) /* RTA_PREFSRC */
3036                + nla_total_size(4) /* RTA_TABLE */
3037                + nla_total_size(4) /* RTA_IIF */
3038                + nla_total_size(4) /* RTA_OIF */
3039                + nla_total_size(4) /* RTA_PRIORITY */
3040                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3041                + nla_total_size(sizeof(struct rta_cacheinfo))
3042                + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3043                + nla_total_size(1) /* RTA_PREF */
3044                + lwtunnel_get_encap_size(rt->dst.lwtstate);
3045 }
3046
3047 static int rt6_fill_node(struct net *net,
3048                          struct sk_buff *skb, struct rt6_info *rt,
3049                          struct in6_addr *dst, struct in6_addr *src,
3050                          int iif, int type, u32 portid, u32 seq,
3051                          int prefix, int nowait, unsigned int flags)
3052 {
3053         u32 metrics[RTAX_MAX];
3054         struct rtmsg *rtm;
3055         struct nlmsghdr *nlh;
3056         long expires;
3057         u32 table;
3058
3059         if (prefix) {   /* user wants prefix routes only */
3060                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3061                         /* success since this is not a prefix route */
3062                         return 1;
3063                 }
3064         }
3065
3066         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3067         if (!nlh)
3068                 return -EMSGSIZE;
3069
3070         rtm = nlmsg_data(nlh);
3071         rtm->rtm_family = AF_INET6;
3072         rtm->rtm_dst_len = rt->rt6i_dst.plen;
3073         rtm->rtm_src_len = rt->rt6i_src.plen;
3074         rtm->rtm_tos = 0;
3075         if (rt->rt6i_table)
3076                 table = rt->rt6i_table->tb6_id;
3077         else
3078                 table = RT6_TABLE_UNSPEC;
3079         rtm->rtm_table = table;
3080         if (nla_put_u32(skb, RTA_TABLE, table))
3081                 goto nla_put_failure;
3082         if (rt->rt6i_flags & RTF_REJECT) {
3083                 switch (rt->dst.error) {
3084                 case -EINVAL:
3085                         rtm->rtm_type = RTN_BLACKHOLE;
3086                         break;
3087                 case -EACCES:
3088                         rtm->rtm_type = RTN_PROHIBIT;
3089                         break;
3090                 case -EAGAIN:
3091                         rtm->rtm_type = RTN_THROW;
3092                         break;
3093                 default:
3094                         rtm->rtm_type = RTN_UNREACHABLE;
3095                         break;
3096                 }
3097         }
3098         else if (rt->rt6i_flags & RTF_LOCAL)
3099                 rtm->rtm_type = RTN_LOCAL;
3100         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3101                 rtm->rtm_type = RTN_LOCAL;
3102         else
3103                 rtm->rtm_type = RTN_UNICAST;
3104         rtm->rtm_flags = 0;
3105         if (!netif_carrier_ok(rt->dst.dev)) {
3106                 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3107                 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3108                         rtm->rtm_flags |= RTNH_F_DEAD;
3109         }
3110         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3111         rtm->rtm_protocol = rt->rt6i_protocol;
3112         if (rt->rt6i_flags & RTF_DYNAMIC)
3113                 rtm->rtm_protocol = RTPROT_REDIRECT;
3114         else if (rt->rt6i_flags & RTF_ADDRCONF) {
3115                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3116                         rtm->rtm_protocol = RTPROT_RA;
3117                 else
3118                         rtm->rtm_protocol = RTPROT_KERNEL;
3119         }
3120
3121         if (rt->rt6i_flags & RTF_CACHE)
3122                 rtm->rtm_flags |= RTM_F_CLONED;
3123
3124         if (dst) {
3125                 if (nla_put_in6_addr(skb, RTA_DST, dst))
3126                         goto nla_put_failure;
3127                 rtm->rtm_dst_len = 128;
3128         } else if (rtm->rtm_dst_len)
3129                 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3130                         goto nla_put_failure;
3131 #ifdef CONFIG_IPV6_SUBTREES
3132         if (src) {
3133                 if (nla_put_in6_addr(skb, RTA_SRC, src))
3134                         goto nla_put_failure;
3135                 rtm->rtm_src_len = 128;
3136         } else if (rtm->rtm_src_len &&
3137                    nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3138                 goto nla_put_failure;
3139 #endif
3140         if (iif) {
3141 #ifdef CONFIG_IPV6_MROUTE
3142                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3143                         int err = ip6mr_get_route(net, skb, rtm, nowait,
3144                                                   portid);
3145
3146                         if (err <= 0) {
3147                                 if (!nowait) {
3148                                         if (err == 0)
3149                                                 return 0;
3150                                         goto nla_put_failure;
3151                                 } else {
3152                                         if (err == -EMSGSIZE)
3153                                                 goto nla_put_failure;
3154                                 }
3155                         }
3156                 } else
3157 #endif
3158                         if (nla_put_u32(skb, RTA_IIF, iif))
3159                                 goto nla_put_failure;
3160         } else if (dst) {
3161                 struct in6_addr saddr_buf;
3162                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3163                     nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3164                         goto nla_put_failure;
3165         }
3166
3167         if (rt->rt6i_prefsrc.plen) {
3168                 struct in6_addr saddr_buf;
3169                 saddr_buf = rt->rt6i_prefsrc.addr;
3170                 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3171                         goto nla_put_failure;
3172         }
3173
3174         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3175         if (rt->rt6i_pmtu)
3176                 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3177         if (rtnetlink_put_metrics(skb, metrics) < 0)
3178                 goto nla_put_failure;
3179
3180         if (rt->rt6i_flags & RTF_GATEWAY) {
3181                 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3182                         goto nla_put_failure;
3183         }
3184
3185         if (rt->dst.dev &&
3186             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3187                 goto nla_put_failure;
3188         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3189                 goto nla_put_failure;
3190
3191         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3192
3193         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3194                 goto nla_put_failure;
3195
3196         if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3197                 goto nla_put_failure;
3198
3199         if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3200                 goto nla_put_failure;
3201
3202         nlmsg_end(skb, nlh);
3203         return 0;
3204
3205 nla_put_failure:
3206         nlmsg_cancel(skb, nlh);
3207         return -EMSGSIZE;
3208 }
3209
3210 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3211 {
3212         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3213         int prefix;
3214
3215         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3216                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3217                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3218         } else
3219                 prefix = 0;
3220
3221         return rt6_fill_node(arg->net,
3222                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3223                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3224                      prefix, 0, NLM_F_MULTI);
3225 }
3226
3227 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3228 {
3229         struct net *net = sock_net(in_skb->sk);
3230         struct nlattr *tb[RTA_MAX+1];
3231         struct rt6_info *rt;
3232         struct sk_buff *skb;
3233         struct rtmsg *rtm;
3234         struct flowi6 fl6;
3235         int err, iif = 0, oif = 0;
3236
3237         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3238         if (err < 0)
3239                 goto errout;
3240
3241         err = -EINVAL;
3242         memset(&fl6, 0, sizeof(fl6));
3243
3244         if (tb[RTA_SRC]) {
3245                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3246                         goto errout;
3247
3248                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3249         }
3250
3251         if (tb[RTA_DST]) {
3252                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3253                         goto errout;
3254
3255                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3256         }
3257
3258         if (tb[RTA_IIF])
3259                 iif = nla_get_u32(tb[RTA_IIF]);
3260
3261         if (tb[RTA_OIF])
3262                 oif = nla_get_u32(tb[RTA_OIF]);
3263
3264         if (tb[RTA_MARK])
3265                 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3266
3267         if (iif) {
3268                 struct net_device *dev;
3269                 int flags = 0;
3270
3271                 dev = __dev_get_by_index(net, iif);
3272                 if (!dev) {
3273                         err = -ENODEV;
3274                         goto errout;
3275                 }
3276
3277                 fl6.flowi6_iif = iif;
3278
3279                 if (!ipv6_addr_any(&fl6.saddr))
3280                         flags |= RT6_LOOKUP_F_HAS_SADDR;
3281
3282                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3283                                                                flags);
3284         } else {
3285                 fl6.flowi6_oif = oif;
3286
3287                 if (netif_index_is_l3_master(net, oif)) {
3288                         fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3289                                            FLOWI_FLAG_SKIP_NH_OIF;
3290                 }
3291
3292                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3293         }
3294
3295         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3296         if (!skb) {
3297                 ip6_rt_put(rt);
3298                 err = -ENOBUFS;
3299                 goto errout;
3300         }
3301
3302         /* Reserve room for dummy headers, this skb can pass
3303            through good chunk of routing engine.
3304          */
3305         skb_reset_mac_header(skb);
3306         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3307
3308         skb_dst_set(skb, &rt->dst);
3309
3310         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3311                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3312                             nlh->nlmsg_seq, 0, 0, 0);
3313         if (err < 0) {
3314                 kfree_skb(skb);
3315                 goto errout;
3316         }
3317
3318         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3319 errout:
3320         return err;
3321 }
3322
3323 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3324                      unsigned int nlm_flags)
3325 {
3326         struct sk_buff *skb;
3327         struct net *net = info->nl_net;
3328         u32 seq;
3329         int err;
3330
3331         err = -ENOBUFS;
3332         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3333
3334         skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3335         if (!skb)
3336                 goto errout;
3337
3338         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3339                                 event, info->portid, seq, 0, 0, nlm_flags);
3340         if (err < 0) {
3341                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3342                 WARN_ON(err == -EMSGSIZE);
3343                 kfree_skb(skb);
3344                 goto errout;
3345         }
3346         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3347                     info->nlh, gfp_any());
3348         return;
3349 errout:
3350         if (err < 0)
3351                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3352 }
3353
3354 static int ip6_route_dev_notify(struct notifier_block *this,
3355                                 unsigned long event, void *ptr)
3356 {
3357         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3358         struct net *net = dev_net(dev);
3359
3360         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3361                 net->ipv6.ip6_null_entry->dst.dev = dev;
3362                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3363 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3364                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3365                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3366                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3367                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3368 #endif
3369         }
3370
3371         return NOTIFY_OK;
3372 }
3373
3374 /*
3375  *      /proc
3376  */
3377
3378 #ifdef CONFIG_PROC_FS
3379
3380 static const struct file_operations ipv6_route_proc_fops = {
3381         .owner          = THIS_MODULE,
3382         .open           = ipv6_route_open,
3383         .read           = seq_read,
3384         .llseek         = seq_lseek,
3385         .release        = seq_release_net,
3386 };
3387
3388 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3389 {
3390         struct net *net = (struct net *)seq->private;
3391         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3392                    net->ipv6.rt6_stats->fib_nodes,
3393                    net->ipv6.rt6_stats->fib_route_nodes,
3394                    net->ipv6.rt6_stats->fib_rt_alloc,
3395                    net->ipv6.rt6_stats->fib_rt_entries,
3396                    net->ipv6.rt6_stats->fib_rt_cache,
3397                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3398                    net->ipv6.rt6_stats->fib_discarded_routes);
3399
3400         return 0;
3401 }
3402
3403 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3404 {
3405         return single_open_net(inode, file, rt6_stats_seq_show);
3406 }
3407
3408 static const struct file_operations rt6_stats_seq_fops = {
3409         .owner   = THIS_MODULE,
3410         .open    = rt6_stats_seq_open,
3411         .read    = seq_read,
3412         .llseek  = seq_lseek,
3413         .release = single_release_net,
3414 };
3415 #endif  /* CONFIG_PROC_FS */
3416
3417 #ifdef CONFIG_SYSCTL
3418
3419 static
3420 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3421                               void __user *buffer, size_t *lenp, loff_t *ppos)
3422 {
3423         struct net *net;
3424         int delay;
3425         if (!write)
3426                 return -EINVAL;
3427
3428         net = (struct net *)ctl->extra1;
3429         delay = net->ipv6.sysctl.flush_delay;
3430         proc_dointvec(ctl, write, buffer, lenp, ppos);
3431         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3432         return 0;
3433 }
3434
3435 struct ctl_table ipv6_route_table_template[] = {
3436         {
3437                 .procname       =       "flush",
3438                 .data           =       &init_net.ipv6.sysctl.flush_delay,
3439                 .maxlen         =       sizeof(int),
3440                 .mode           =       0200,
3441                 .proc_handler   =       ipv6_sysctl_rtcache_flush
3442         },
3443         {
3444                 .procname       =       "gc_thresh",
3445                 .data           =       &ip6_dst_ops_template.gc_thresh,
3446                 .maxlen         =       sizeof(int),
3447                 .mode           =       0644,
3448                 .proc_handler   =       proc_dointvec,
3449         },
3450         {
3451                 .procname       =       "max_size",
3452                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
3453                 .maxlen         =       sizeof(int),
3454                 .mode           =       0644,
3455                 .proc_handler   =       proc_dointvec,
3456         },
3457         {
3458                 .procname       =       "gc_min_interval",
3459                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3460                 .maxlen         =       sizeof(int),
3461                 .mode           =       0644,
3462                 .proc_handler   =       proc_dointvec_jiffies,
3463         },
3464         {
3465                 .procname       =       "gc_timeout",
3466                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3467                 .maxlen         =       sizeof(int),
3468                 .mode           =       0644,
3469                 .proc_handler   =       proc_dointvec_jiffies,
3470         },
3471         {
3472                 .procname       =       "gc_interval",
3473                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3474                 .maxlen         =       sizeof(int),
3475                 .mode           =       0644,
3476                 .proc_handler   =       proc_dointvec_jiffies,
3477         },
3478         {
3479                 .procname       =       "gc_elasticity",
3480                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3481                 .maxlen         =       sizeof(int),
3482                 .mode           =       0644,
3483                 .proc_handler   =       proc_dointvec,
3484         },
3485         {
3486                 .procname       =       "mtu_expires",
3487                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3488                 .maxlen         =       sizeof(int),
3489                 .mode           =       0644,
3490                 .proc_handler   =       proc_dointvec_jiffies,
3491         },
3492         {
3493                 .procname       =       "min_adv_mss",
3494                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3495                 .maxlen         =       sizeof(int),
3496                 .mode           =       0644,
3497                 .proc_handler   =       proc_dointvec,
3498         },
3499         {
3500                 .procname       =       "gc_min_interval_ms",
3501                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3502                 .maxlen         =       sizeof(int),
3503                 .mode           =       0644,
3504                 .proc_handler   =       proc_dointvec_ms_jiffies,
3505         },
3506         { }
3507 };
3508
3509 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3510 {
3511         struct ctl_table *table;
3512
3513         table = kmemdup(ipv6_route_table_template,
3514                         sizeof(ipv6_route_table_template),
3515                         GFP_KERNEL);
3516
3517         if (table) {
3518                 table[0].data = &net->ipv6.sysctl.flush_delay;
3519                 table[0].extra1 = net;
3520                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3521                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3522                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3523                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3524                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3525                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3526                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3527                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3528                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3529
3530                 /* Don't export sysctls to unprivileged users */
3531                 if (net->user_ns != &init_user_ns)
3532                         table[0].procname = NULL;
3533         }
3534
3535         return table;
3536 }
3537 #endif
3538
3539 static int __net_init ip6_route_net_init(struct net *net)
3540 {
3541         int ret = -ENOMEM;
3542
3543         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3544                sizeof(net->ipv6.ip6_dst_ops));
3545
3546         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3547                 goto out_ip6_dst_ops;
3548
3549         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3550                                            sizeof(*net->ipv6.ip6_null_entry),
3551                                            GFP_KERNEL);
3552         if (!net->ipv6.ip6_null_entry)
3553                 goto out_ip6_dst_entries;
3554         net->ipv6.ip6_null_entry->dst.path =
3555                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3556         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3557         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3558                          ip6_template_metrics, true);
3559
3560 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3561         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3562                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3563                                                GFP_KERNEL);
3564         if (!net->ipv6.ip6_prohibit_entry)
3565                 goto out_ip6_null_entry;
3566         net->ipv6.ip6_prohibit_entry->dst.path =
3567                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3568         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3569         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3570                          ip6_template_metrics, true);
3571
3572         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3573                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3574                                                GFP_KERNEL);
3575         if (!net->ipv6.ip6_blk_hole_entry)
3576                 goto out_ip6_prohibit_entry;
3577         net->ipv6.ip6_blk_hole_entry->dst.path =
3578                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3579         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3580         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3581                          ip6_template_metrics, true);
3582 #endif
3583
3584         net->ipv6.sysctl.flush_delay = 0;
3585         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3586         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3587         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3588         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3589         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3590         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3591         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3592
3593         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3594
3595         ret = 0;
3596 out:
3597         return ret;
3598
3599 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3600 out_ip6_prohibit_entry:
3601         kfree(net->ipv6.ip6_prohibit_entry);
3602 out_ip6_null_entry:
3603         kfree(net->ipv6.ip6_null_entry);
3604 #endif
3605 out_ip6_dst_entries:
3606         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3607 out_ip6_dst_ops:
3608         goto out;
3609 }
3610
3611 static void __net_exit ip6_route_net_exit(struct net *net)
3612 {
3613         kfree(net->ipv6.ip6_null_entry);
3614 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3615         kfree(net->ipv6.ip6_prohibit_entry);
3616         kfree(net->ipv6.ip6_blk_hole_entry);
3617 #endif
3618         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3619 }
3620
3621 static int __net_init ip6_route_net_init_late(struct net *net)
3622 {
3623 #ifdef CONFIG_PROC_FS
3624         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3625         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3626 #endif
3627         return 0;
3628 }
3629
3630 static void __net_exit ip6_route_net_exit_late(struct net *net)
3631 {
3632 #ifdef CONFIG_PROC_FS
3633         remove_proc_entry("ipv6_route", net->proc_net);
3634         remove_proc_entry("rt6_stats", net->proc_net);
3635 #endif
3636 }
3637
3638 static struct pernet_operations ip6_route_net_ops = {
3639         .init = ip6_route_net_init,
3640         .exit = ip6_route_net_exit,
3641 };
3642
3643 static int __net_init ipv6_inetpeer_init(struct net *net)
3644 {
3645         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3646
3647         if (!bp)
3648                 return -ENOMEM;
3649         inet_peer_base_init(bp);
3650         net->ipv6.peers = bp;
3651         return 0;
3652 }
3653
3654 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3655 {
3656         struct inet_peer_base *bp = net->ipv6.peers;
3657
3658         net->ipv6.peers = NULL;
3659         inetpeer_invalidate_tree(bp);
3660         kfree(bp);
3661 }
3662
3663 static struct pernet_operations ipv6_inetpeer_ops = {
3664         .init   =       ipv6_inetpeer_init,
3665         .exit   =       ipv6_inetpeer_exit,
3666 };
3667
3668 static struct pernet_operations ip6_route_net_late_ops = {
3669         .init = ip6_route_net_init_late,
3670         .exit = ip6_route_net_exit_late,
3671 };
3672
3673 static struct notifier_block ip6_route_dev_notifier = {
3674         .notifier_call = ip6_route_dev_notify,
3675         .priority = 0,
3676 };
3677
3678 int __init ip6_route_init(void)
3679 {
3680         int ret;
3681         int cpu;
3682
3683         ret = -ENOMEM;
3684         ip6_dst_ops_template.kmem_cachep =
3685                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3686                                   SLAB_HWCACHE_ALIGN, NULL);
3687         if (!ip6_dst_ops_template.kmem_cachep)
3688                 goto out;
3689
3690         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3691         if (ret)
3692                 goto out_kmem_cache;
3693
3694         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3695         if (ret)
3696                 goto out_dst_entries;
3697
3698         ret = register_pernet_subsys(&ip6_route_net_ops);
3699         if (ret)
3700                 goto out_register_inetpeer;
3701
3702         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3703
3704         /* Registering of the loopback is done before this portion of code,
3705          * the loopback reference in rt6_info will not be taken, do it
3706          * manually for init_net */
3707         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3708         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3709   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3710         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3711         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3712         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3713         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3714   #endif
3715         ret = fib6_init();
3716         if (ret)
3717                 goto out_register_subsys;
3718
3719         ret = xfrm6_init();
3720         if (ret)
3721                 goto out_fib6_init;
3722
3723         ret = fib6_rules_init();
3724         if (ret)
3725                 goto xfrm6_init;
3726
3727         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3728         if (ret)
3729                 goto fib6_rules_init;
3730
3731         ret = -ENOBUFS;
3732         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3733             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3734             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3735                 goto out_register_late_subsys;
3736
3737         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3738         if (ret)
3739                 goto out_register_late_subsys;
3740
3741         for_each_possible_cpu(cpu) {
3742                 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3743
3744                 INIT_LIST_HEAD(&ul->head);
3745                 spin_lock_init(&ul->lock);
3746         }
3747
3748 out:
3749         return ret;
3750
3751 out_register_late_subsys:
3752         unregister_pernet_subsys(&ip6_route_net_late_ops);
3753 fib6_rules_init:
3754         fib6_rules_cleanup();
3755 xfrm6_init:
3756         xfrm6_fini();
3757 out_fib6_init:
3758         fib6_gc_cleanup();
3759 out_register_subsys:
3760         unregister_pernet_subsys(&ip6_route_net_ops);
3761 out_register_inetpeer:
3762         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3763 out_dst_entries:
3764         dst_entries_destroy(&ip6_dst_blackhole_ops);
3765 out_kmem_cache:
3766         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3767         goto out;
3768 }
3769
3770 void ip6_route_cleanup(void)
3771 {
3772         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3773         unregister_pernet_subsys(&ip6_route_net_late_ops);
3774         fib6_rules_cleanup();
3775         xfrm6_fini();
3776         fib6_gc_cleanup();
3777         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3778         unregister_pernet_subsys(&ip6_route_net_ops);
3779         dst_entries_destroy(&ip6_dst_blackhole_ops);
3780         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3781 }