2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
60 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 return hash_32((__force u32)key ^ (__force u32)remote,
72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
73 struct dst_entry *dst, __be32 saddr)
75 struct dst_entry *old_dst;
78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83 static noinline void tunnel_dst_set(struct ip_tunnel *t,
84 struct dst_entry *dst, __be32 saddr)
86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
89 static void tunnel_dst_reset(struct ip_tunnel *t)
91 tunnel_dst_set(t, NULL, 0);
94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
98 for_each_possible_cpu(i)
99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104 u32 cookie, __be32 *saddr)
106 struct ip_tunnel_dst *idst;
107 struct dst_entry *dst;
110 idst = raw_cpu_ptr(t->dst_cache);
111 dst = rcu_dereference(idst->dst);
112 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
115 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116 *saddr = idst->saddr;
124 return (struct rtable *)dst;
127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128 __be16 flags, __be32 key)
130 if (p->i_flags & TUNNEL_KEY) {
131 if (flags & TUNNEL_KEY)
132 return key == p->i_key;
134 /* key expected, none present */
137 return !(flags & TUNNEL_KEY);
140 /* Fallback tunnel: no source, no destination, no key, no options
143 We require exact key match i.e. if a key is present in packet
144 it will match only tunnel with the same key; if it is not present,
145 it will match only keyless tunnel.
147 All keysless packets, if not matched configured keyless tunnels
148 will match fallback tunnel.
149 Given src, dst and key, find appropriate for input tunnel.
151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152 int link, __be16 flags,
153 __be32 remote, __be32 local,
157 struct ip_tunnel *t, *cand = NULL;
158 struct hlist_head *head;
160 hash = ip_tunnel_hash(key, remote);
161 head = &itn->tunnels[hash];
163 hlist_for_each_entry_rcu(t, head, hash_node) {
164 if (local != t->parms.iph.saddr ||
165 remote != t->parms.iph.daddr ||
166 !(t->dev->flags & IFF_UP))
169 if (!ip_tunnel_key_match(&t->parms, flags, key))
172 if (t->parms.link == link)
178 hlist_for_each_entry_rcu(t, head, hash_node) {
179 if (remote != t->parms.iph.daddr ||
180 t->parms.iph.saddr != 0 ||
181 !(t->dev->flags & IFF_UP))
184 if (!ip_tunnel_key_match(&t->parms, flags, key))
187 if (t->parms.link == link)
193 hash = ip_tunnel_hash(key, 0);
194 head = &itn->tunnels[hash];
196 hlist_for_each_entry_rcu(t, head, hash_node) {
197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
201 if (!(t->dev->flags & IFF_UP))
204 if (!ip_tunnel_key_match(&t->parms, flags, key))
207 if (t->parms.link == link)
213 if (flags & TUNNEL_NO_KEY)
214 goto skip_key_lookup;
216 hlist_for_each_entry_rcu(t, head, hash_node) {
217 if (t->parms.i_key != key ||
218 t->parms.iph.saddr != 0 ||
219 t->parms.iph.daddr != 0 ||
220 !(t->dev->flags & IFF_UP))
223 if (t->parms.link == link)
233 t = rcu_dereference(itn->collect_md_tun);
237 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
238 return netdev_priv(itn->fb_tunnel_dev);
242 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
244 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
245 struct ip_tunnel_parm *parms)
249 __be32 i_key = parms->i_key;
251 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
252 remote = parms->iph.daddr;
256 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
259 h = ip_tunnel_hash(i_key, remote);
260 return &itn->tunnels[h];
263 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
265 struct hlist_head *head = ip_bucket(itn, &t->parms);
268 rcu_assign_pointer(itn->collect_md_tun, t);
269 hlist_add_head_rcu(&t->hash_node, head);
272 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
275 rcu_assign_pointer(itn->collect_md_tun, NULL);
276 hlist_del_init_rcu(&t->hash_node);
279 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
280 struct ip_tunnel_parm *parms,
283 __be32 remote = parms->iph.daddr;
284 __be32 local = parms->iph.saddr;
285 __be32 key = parms->i_key;
286 __be16 flags = parms->i_flags;
287 int link = parms->link;
288 struct ip_tunnel *t = NULL;
289 struct hlist_head *head = ip_bucket(itn, parms);
291 hlist_for_each_entry_rcu(t, head, hash_node) {
292 if (local == t->parms.iph.saddr &&
293 remote == t->parms.iph.daddr &&
294 link == t->parms.link &&
295 type == t->dev->type &&
296 ip_tunnel_key_match(&t->parms, flags, key))
302 static struct net_device *__ip_tunnel_create(struct net *net,
303 const struct rtnl_link_ops *ops,
304 struct ip_tunnel_parm *parms)
307 struct ip_tunnel *tunnel;
308 struct net_device *dev;
312 strlcpy(name, parms->name, IFNAMSIZ);
314 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
318 strlcpy(name, ops->kind, IFNAMSIZ);
319 strncat(name, "%d", 2);
323 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
328 dev_net_set(dev, net);
330 dev->rtnl_link_ops = ops;
332 tunnel = netdev_priv(dev);
333 tunnel->parms = *parms;
336 err = register_netdevice(dev);
348 static inline void init_tunnel_flow(struct flowi4 *fl4,
350 __be32 daddr, __be32 saddr,
351 __be32 key, __u8 tos, int oif)
353 memset(fl4, 0, sizeof(*fl4));
354 fl4->flowi4_oif = oif;
357 fl4->flowi4_tos = tos;
358 fl4->flowi4_proto = proto;
359 fl4->fl4_gre_key = key;
362 static int ip_tunnel_bind_dev(struct net_device *dev)
364 struct net_device *tdev = NULL;
365 struct ip_tunnel *tunnel = netdev_priv(dev);
366 const struct iphdr *iph;
367 int hlen = LL_MAX_HEADER;
368 int mtu = ETH_DATA_LEN;
369 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
371 iph = &tunnel->parms.iph;
373 /* Guess output device to choose reasonable mtu and needed_headroom */
378 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
379 iph->saddr, tunnel->parms.o_key,
380 RT_TOS(iph->tos), tunnel->parms.link);
381 rt = ip_route_output_key(tunnel->net, &fl4);
385 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
388 if (dev->type != ARPHRD_ETHER)
389 dev->flags |= IFF_POINTOPOINT;
392 if (!tdev && tunnel->parms.link)
393 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
396 hlen = tdev->hard_header_len + tdev->needed_headroom;
400 dev->needed_headroom = t_hlen + hlen;
401 mtu -= (dev->hard_header_len + t_hlen);
409 static struct ip_tunnel *ip_tunnel_create(struct net *net,
410 struct ip_tunnel_net *itn,
411 struct ip_tunnel_parm *parms)
413 struct ip_tunnel *nt;
414 struct net_device *dev;
416 BUG_ON(!itn->fb_tunnel_dev);
417 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
419 return ERR_CAST(dev);
421 dev->mtu = ip_tunnel_bind_dev(dev);
423 nt = netdev_priv(dev);
424 ip_tunnel_add(itn, nt);
428 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
429 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
432 struct pcpu_sw_netstats *tstats;
433 const struct iphdr *iph = ip_hdr(skb);
436 #ifdef CONFIG_NET_IPGRE_BROADCAST
437 if (ipv4_is_multicast(iph->daddr)) {
438 tunnel->dev->stats.multicast++;
439 skb->pkt_type = PACKET_BROADCAST;
443 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
444 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
445 tunnel->dev->stats.rx_crc_errors++;
446 tunnel->dev->stats.rx_errors++;
450 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
451 if (!(tpi->flags&TUNNEL_SEQ) ||
452 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
453 tunnel->dev->stats.rx_fifo_errors++;
454 tunnel->dev->stats.rx_errors++;
457 tunnel->i_seqno = ntohl(tpi->seq) + 1;
460 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb);
465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466 &iph->saddr, iph->tos);
468 ++tunnel->dev->stats.rx_frame_errors;
469 ++tunnel->dev->stats.rx_errors;
474 tstats = this_cpu_ptr(tunnel->dev->tstats);
475 u64_stats_update_begin(&tstats->syncp);
476 tstats->rx_packets++;
477 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp);
480 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
482 if (tunnel->dev->type == ARPHRD_ETHER) {
483 skb->protocol = eth_type_trans(skb, tunnel->dev);
484 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
486 skb->dev = tunnel->dev;
490 skb_dst_set(skb, (struct dst_entry *)tun_dst);
492 gro_cells_receive(&tunnel->gro_cells, skb);
499 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
501 static int ip_encap_hlen(struct ip_tunnel_encap *e)
503 const struct ip_tunnel_encap_ops *ops;
506 if (e->type == TUNNEL_ENCAP_NONE)
509 if (e->type >= MAX_IPTUN_ENCAP_OPS)
513 ops = rcu_dereference(iptun_encaps[e->type]);
514 if (likely(ops && ops->encap_hlen))
515 hlen = ops->encap_hlen(e);
521 const struct ip_tunnel_encap_ops __rcu *
522 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
524 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
527 if (num >= MAX_IPTUN_ENCAP_OPS)
530 return !cmpxchg((const struct ip_tunnel_encap_ops **)
534 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
536 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
541 if (num >= MAX_IPTUN_ENCAP_OPS)
544 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
546 ops, NULL) == ops) ? 0 : -1;
552 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
554 int ip_tunnel_encap_setup(struct ip_tunnel *t,
555 struct ip_tunnel_encap *ipencap)
559 memset(&t->encap, 0, sizeof(t->encap));
561 hlen = ip_encap_hlen(ipencap);
565 t->encap.type = ipencap->type;
566 t->encap.sport = ipencap->sport;
567 t->encap.dport = ipencap->dport;
568 t->encap.flags = ipencap->flags;
570 t->encap_hlen = hlen;
571 t->hlen = t->encap_hlen + t->tun_hlen;
575 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
577 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
578 u8 *protocol, struct flowi4 *fl4)
580 const struct ip_tunnel_encap_ops *ops;
583 if (t->encap.type == TUNNEL_ENCAP_NONE)
586 if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
590 ops = rcu_dereference(iptun_encaps[t->encap.type]);
591 if (likely(ops && ops->build_header))
592 ret = ops->build_header(skb, &t->encap, protocol, fl4);
597 EXPORT_SYMBOL(ip_tunnel_encap);
599 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
600 struct rtable *rt, __be16 df,
601 const struct iphdr *inner_iph)
603 struct ip_tunnel *tunnel = netdev_priv(dev);
604 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
608 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
609 - sizeof(struct iphdr) - tunnel->hlen;
611 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
614 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
616 if (skb->protocol == htons(ETH_P_IP)) {
617 if (!skb_is_gso(skb) &&
618 (inner_iph->frag_off & htons(IP_DF)) &&
620 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
621 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
625 #if IS_ENABLED(CONFIG_IPV6)
626 else if (skb->protocol == htons(ETH_P_IPV6)) {
627 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
629 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
630 mtu >= IPV6_MIN_MTU) {
631 if ((tunnel->parms.iph.daddr &&
632 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
633 rt6->rt6i_dst.plen == 128) {
634 rt6->rt6i_flags |= RTF_MODIFIED;
635 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
639 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
641 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
649 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
650 const struct iphdr *tnl_params, u8 protocol)
652 struct ip_tunnel *tunnel = netdev_priv(dev);
653 const struct iphdr *inner_iph;
657 struct rtable *rt; /* Route to the other host */
658 unsigned int max_headroom; /* The extra header space needed */
663 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
664 connected = (tunnel->parms.iph.daddr != 0);
666 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
668 dst = tnl_params->daddr;
673 dev->stats.tx_fifo_errors++;
677 if (skb->protocol == htons(ETH_P_IP)) {
678 rt = skb_rtable(skb);
679 dst = rt_nexthop(rt, inner_iph->daddr);
681 #if IS_ENABLED(CONFIG_IPV6)
682 else if (skb->protocol == htons(ETH_P_IPV6)) {
683 const struct in6_addr *addr6;
684 struct neighbour *neigh;
685 bool do_tx_error_icmp;
688 neigh = dst_neigh_lookup(skb_dst(skb),
689 &ipv6_hdr(skb)->daddr);
693 addr6 = (const struct in6_addr *)&neigh->primary_key;
694 addr_type = ipv6_addr_type(addr6);
696 if (addr_type == IPV6_ADDR_ANY) {
697 addr6 = &ipv6_hdr(skb)->daddr;
698 addr_type = ipv6_addr_type(addr6);
701 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
702 do_tx_error_icmp = true;
704 do_tx_error_icmp = false;
705 dst = addr6->s6_addr32[3];
707 neigh_release(neigh);
708 if (do_tx_error_icmp)
718 tos = tnl_params->tos;
721 if (skb->protocol == htons(ETH_P_IP)) {
722 tos = inner_iph->tos;
724 } else if (skb->protocol == htons(ETH_P_IPV6)) {
725 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
730 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
731 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
733 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
736 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
739 rt = ip_route_output_key(tunnel->net, &fl4);
742 dev->stats.tx_carrier_errors++;
746 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
749 if (rt->dst.dev == dev) {
751 dev->stats.collisions++;
755 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
760 if (tunnel->err_count > 0) {
761 if (time_before(jiffies,
762 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
765 dst_link_failure(skb);
767 tunnel->err_count = 0;
770 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
771 ttl = tnl_params->ttl;
773 if (skb->protocol == htons(ETH_P_IP))
774 ttl = inner_iph->ttl;
775 #if IS_ENABLED(CONFIG_IPV6)
776 else if (skb->protocol == htons(ETH_P_IPV6))
777 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
780 ttl = ip4_dst_hoplimit(&rt->dst);
783 df = tnl_params->frag_off;
784 if (skb->protocol == htons(ETH_P_IP))
785 df |= (inner_iph->frag_off&htons(IP_DF));
787 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
788 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
789 if (max_headroom > dev->needed_headroom)
790 dev->needed_headroom = max_headroom;
792 if (skb_cow_head(skb, dev->needed_headroom)) {
794 dev->stats.tx_dropped++;
799 err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
800 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
801 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
805 #if IS_ENABLED(CONFIG_IPV6)
807 dst_link_failure(skb);
810 dev->stats.tx_errors++;
813 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
815 static void ip_tunnel_update(struct ip_tunnel_net *itn,
817 struct net_device *dev,
818 struct ip_tunnel_parm *p,
821 ip_tunnel_del(itn, t);
822 t->parms.iph.saddr = p->iph.saddr;
823 t->parms.iph.daddr = p->iph.daddr;
824 t->parms.i_key = p->i_key;
825 t->parms.o_key = p->o_key;
826 if (dev->type != ARPHRD_ETHER) {
827 memcpy(dev->dev_addr, &p->iph.saddr, 4);
828 memcpy(dev->broadcast, &p->iph.daddr, 4);
830 ip_tunnel_add(itn, t);
832 t->parms.iph.ttl = p->iph.ttl;
833 t->parms.iph.tos = p->iph.tos;
834 t->parms.iph.frag_off = p->iph.frag_off;
836 if (t->parms.link != p->link) {
839 t->parms.link = p->link;
840 mtu = ip_tunnel_bind_dev(dev);
844 ip_tunnel_dst_reset_all(t);
845 netdev_state_change(dev);
848 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
851 struct ip_tunnel *t = netdev_priv(dev);
852 struct net *net = t->net;
853 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
855 BUG_ON(!itn->fb_tunnel_dev);
858 if (dev == itn->fb_tunnel_dev) {
859 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
861 t = netdev_priv(dev);
863 memcpy(p, &t->parms, sizeof(*p));
869 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
872 p->iph.frag_off |= htons(IP_DF);
873 if (!(p->i_flags & VTI_ISVTI)) {
874 if (!(p->i_flags & TUNNEL_KEY))
876 if (!(p->o_flags & TUNNEL_KEY))
880 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
882 if (cmd == SIOCADDTUNNEL) {
884 t = ip_tunnel_create(net, itn, p);
885 err = PTR_ERR_OR_ZERO(t);
892 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
899 unsigned int nflags = 0;
901 if (ipv4_is_multicast(p->iph.daddr))
902 nflags = IFF_BROADCAST;
903 else if (p->iph.daddr)
904 nflags = IFF_POINTOPOINT;
906 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
911 t = netdev_priv(dev);
917 ip_tunnel_update(itn, t, dev, p, true);
925 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
928 if (dev == itn->fb_tunnel_dev) {
930 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
934 if (t == netdev_priv(itn->fb_tunnel_dev))
938 unregister_netdevice(dev);
949 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
951 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
953 struct ip_tunnel *tunnel = netdev_priv(dev);
954 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
955 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
960 if (new_mtu > max_mtu) {
970 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
972 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
974 return __ip_tunnel_change_mtu(dev, new_mtu, true);
976 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
978 static void ip_tunnel_dev_free(struct net_device *dev)
980 struct ip_tunnel *tunnel = netdev_priv(dev);
982 gro_cells_destroy(&tunnel->gro_cells);
983 free_percpu(tunnel->dst_cache);
984 free_percpu(dev->tstats);
988 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
990 struct ip_tunnel *tunnel = netdev_priv(dev);
991 struct ip_tunnel_net *itn;
993 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
995 if (itn->fb_tunnel_dev != dev) {
996 ip_tunnel_del(itn, netdev_priv(dev));
997 unregister_netdevice_queue(dev, head);
1000 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
1002 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
1004 struct ip_tunnel *tunnel = netdev_priv(dev);
1008 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1010 int ip_tunnel_get_iflink(const struct net_device *dev)
1012 struct ip_tunnel *tunnel = netdev_priv(dev);
1014 return tunnel->parms.link;
1016 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1018 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
1019 struct rtnl_link_ops *ops, char *devname)
1021 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1022 struct ip_tunnel_parm parms;
1025 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1026 INIT_HLIST_HEAD(&itn->tunnels[i]);
1029 itn->fb_tunnel_dev = NULL;
1033 memset(&parms, 0, sizeof(parms));
1035 strlcpy(parms.name, devname, IFNAMSIZ);
1038 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1039 /* FB netdevice is special: we have one, and only one per netns.
1040 * Allowing to move it to another netns is clearly unsafe.
1042 if (!IS_ERR(itn->fb_tunnel_dev)) {
1043 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1044 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1045 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1049 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1051 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1053 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1054 struct rtnl_link_ops *ops)
1056 struct net *net = dev_net(itn->fb_tunnel_dev);
1057 struct net_device *dev, *aux;
1060 for_each_netdev_safe(net, dev, aux)
1061 if (dev->rtnl_link_ops == ops)
1062 unregister_netdevice_queue(dev, head);
1064 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1065 struct ip_tunnel *t;
1066 struct hlist_node *n;
1067 struct hlist_head *thead = &itn->tunnels[h];
1069 hlist_for_each_entry_safe(t, n, thead, hash_node)
1070 /* If dev is in the same netns, it has already
1071 * been added to the list by the previous loop.
1073 if (!net_eq(dev_net(t->dev), net))
1074 unregister_netdevice_queue(t->dev, head);
1078 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1083 ip_tunnel_destroy(itn, &list, ops);
1084 unregister_netdevice_many(&list);
1087 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1089 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1090 struct ip_tunnel_parm *p)
1092 struct ip_tunnel *nt;
1093 struct net *net = dev_net(dev);
1094 struct ip_tunnel_net *itn;
1098 nt = netdev_priv(dev);
1099 itn = net_generic(net, nt->ip_tnl_net_id);
1101 if (nt->collect_md) {
1102 if (rtnl_dereference(itn->collect_md_tun))
1105 if (ip_tunnel_find(itn, p, dev->type))
1111 err = register_netdevice(dev);
1115 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1116 eth_hw_addr_random(dev);
1118 mtu = ip_tunnel_bind_dev(dev);
1122 ip_tunnel_add(itn, nt);
1126 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1128 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1129 struct ip_tunnel_parm *p)
1131 struct ip_tunnel *t;
1132 struct ip_tunnel *tunnel = netdev_priv(dev);
1133 struct net *net = tunnel->net;
1134 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1136 if (dev == itn->fb_tunnel_dev)
1139 t = ip_tunnel_find(itn, p, dev->type);
1147 if (dev->type != ARPHRD_ETHER) {
1148 unsigned int nflags = 0;
1150 if (ipv4_is_multicast(p->iph.daddr))
1151 nflags = IFF_BROADCAST;
1152 else if (p->iph.daddr)
1153 nflags = IFF_POINTOPOINT;
1155 if ((dev->flags ^ nflags) &
1156 (IFF_POINTOPOINT | IFF_BROADCAST))
1161 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1164 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1166 int ip_tunnel_init(struct net_device *dev)
1168 struct ip_tunnel *tunnel = netdev_priv(dev);
1169 struct iphdr *iph = &tunnel->parms.iph;
1172 dev->destructor = ip_tunnel_dev_free;
1173 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1177 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1178 if (!tunnel->dst_cache) {
1179 free_percpu(dev->tstats);
1183 err = gro_cells_init(&tunnel->gro_cells, dev);
1185 free_percpu(tunnel->dst_cache);
1186 free_percpu(dev->tstats);
1191 tunnel->net = dev_net(dev);
1192 strcpy(tunnel->parms.name, dev->name);
1196 if (tunnel->collect_md) {
1197 dev->features |= NETIF_F_NETNS_LOCAL;
1198 netif_keep_dst(dev);
1202 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1204 void ip_tunnel_uninit(struct net_device *dev)
1206 struct ip_tunnel *tunnel = netdev_priv(dev);
1207 struct net *net = tunnel->net;
1208 struct ip_tunnel_net *itn;
1210 itn = net_generic(net, tunnel->ip_tnl_net_id);
1211 /* fb_tunnel_dev will be unregisted in net-exit call. */
1212 if (itn->fb_tunnel_dev != dev)
1213 ip_tunnel_del(itn, netdev_priv(dev));
1215 ip_tunnel_dst_reset_all(tunnel);
1217 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1219 /* Do least required initialization, rest of init is done in tunnel_init call */
1220 void ip_tunnel_setup(struct net_device *dev, int net_id)
1222 struct ip_tunnel *tunnel = netdev_priv(dev);
1223 tunnel->ip_tnl_net_id = net_id;
1225 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1227 MODULE_LICENSE("GPL");