These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / ipv4 / fib_frontend.c
index 872494e..4734475 100644 (file)
@@ -45,6 +45,8 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 #include <net/xfrm.h>
+#include <net/l3mdev.h>
+#include <trace/events/fib.h>
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
 
@@ -211,12 +213,12 @@ void fib_flush_external(struct net *net)
  */
 static inline unsigned int __inet_dev_addr_type(struct net *net,
                                                const struct net_device *dev,
-                                               __be32 addr)
+                                               __be32 addr, u32 tb_id)
 {
        struct flowi4           fl4 = { .daddr = addr };
        struct fib_result       res;
        unsigned int ret = RTN_BROADCAST;
-       struct fib_table *local_table;
+       struct fib_table *table;
 
        if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
                return RTN_BROADCAST;
@@ -225,10 +227,10 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
 
        rcu_read_lock();
 
-       local_table = fib_get_table(net, RT_TABLE_LOCAL);
-       if (local_table) {
+       table = fib_get_table(net, tb_id);
+       if (table) {
                ret = RTN_UNICAST;
-               if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
+               if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
                        if (!dev || dev == res.fi->fib_dev)
                                ret = res.type;
                }
@@ -238,19 +240,40 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
        return ret;
 }
 
+unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id)
+{
+       return __inet_dev_addr_type(net, NULL, addr, tb_id);
+}
+EXPORT_SYMBOL(inet_addr_type_table);
+
 unsigned int inet_addr_type(struct net *net, __be32 addr)
 {
-       return __inet_dev_addr_type(net, NULL, addr);
+       return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL);
 }
 EXPORT_SYMBOL(inet_addr_type);
 
 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
                                __be32 addr)
 {
-       return __inet_dev_addr_type(net, dev, addr);
+       u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+
+       return __inet_dev_addr_type(net, dev, addr, rt_table);
 }
 EXPORT_SYMBOL(inet_dev_addr_type);
 
+/* inet_addr_type with dev == NULL but using the table from a dev
+ * if one is associated
+ */
+unsigned int inet_addr_type_dev_table(struct net *net,
+                                     const struct net_device *dev,
+                                     __be32 addr)
+{
+       u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+
+       return __inet_dev_addr_type(net, NULL, addr, rt_table);
+}
+EXPORT_SYMBOL(inet_addr_type_dev_table);
+
 __be32 fib_compute_spec_dst(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
@@ -280,7 +303,8 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
                fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
                fl4.flowi4_scope = scope;
                fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
-               if (!fib_lookup(net, &fl4, &res))
+               fl4.flowi4_tun_key.tun_id = 0;
+               if (!fib_lookup(net, &fl4, &res, 0))
                        return FIB_RES_PREFSRC(net, res);
        } else {
                scope = RT_SCOPE_LINK;
@@ -308,18 +332,24 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
        bool dev_match;
 
        fl4.flowi4_oif = 0;
-       fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
+       fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
+       if (!fl4.flowi4_iif)
+               fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
        fl4.daddr = src;
        fl4.saddr = dst;
        fl4.flowi4_tos = tos;
        fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+       fl4.flowi4_tun_key.tun_id = 0;
+       fl4.flowi4_flags = 0;
 
        no_addr = idev->ifa_list == NULL;
 
        fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
 
+       trace_fib_validate_source(dev, &fl4);
+
        net = dev_net(dev);
-       if (fib_lookup(net, &fl4, &res))
+       if (fib_lookup(net, &fl4, &res, 0))
                goto last_resort;
        if (res.type != RTN_UNICAST &&
            (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
@@ -337,6 +367,9 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
                if (nh->nh_dev == dev) {
                        dev_match = true;
                        break;
+               } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+                       dev_match = true;
+                       break;
                }
        }
 #else
@@ -354,7 +387,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
        fl4.flowi4_oif = dev->ifindex;
 
        ret = 0;
-       if (fib_lookup(net, &fl4, &res) == 0) {
+       if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
                if (res.type == RTN_UNICAST)
                        ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
        }
@@ -494,9 +527,12 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
 
        addr = sk_extract_addr(&rt->rt_gateway);
        if (rt->rt_gateway.sa_family == AF_INET && addr) {
+               unsigned int addr_type;
+
                cfg->fc_gw = addr;
+               addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
                if (rt->rt_flags & RTF_GATEWAY &&
-                   inet_addr_type(net, addr) == RTN_UNICAST)
+                   addr_type == RTN_UNICAST)
                        cfg->fc_scope = RT_SCOPE_UNIVERSE;
        }
 
@@ -591,6 +627,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
        [RTA_METRICS]           = { .type = NLA_NESTED },
        [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
        [RTA_FLOW]              = { .type = NLA_U32 },
+       [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
+       [RTA_ENCAP]             = { .type = NLA_NESTED },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -656,6 +694,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
                case RTA_TABLE:
                        cfg->fc_table = nla_get_u32(attr);
                        break;
+               case RTA_ENCAP:
+                       cfg->fc_encap = attr;
+                       break;
+               case RTA_ENCAP_TYPE:
+                       cfg->fc_encap_type = nla_get_u16(attr);
+                       break;
                }
        }
 
@@ -760,6 +804,7 @@ out:
 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
 {
        struct net *net = dev_net(ifa->ifa_dev->dev);
+       u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
        struct fib_table *tb;
        struct fib_config cfg = {
                .fc_protocol = RTPROT_KERNEL,
@@ -774,11 +819,10 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
                },
        };
 
-       if (type == RTN_UNICAST)
-               tb = fib_new_table(net, RT_TABLE_MAIN);
-       else
-               tb = fib_new_table(net, RT_TABLE_LOCAL);
+       if (!tb_id)
+               tb_id = (type == RTN_UNICAST) ? RT_TABLE_MAIN : RT_TABLE_LOCAL;
 
+       tb = fib_new_table(net, tb_id);
        if (!tb)
                return;
 
@@ -823,9 +867,10 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 
        if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
            (prefix != addr || ifa->ifa_prefixlen < 32)) {
-               fib_magic(RTM_NEWROUTE,
-                         dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
-                         prefix, ifa->ifa_prefixlen, prim);
+               if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
+                       fib_magic(RTM_NEWROUTE,
+                                 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+                                 prefix, ifa->ifa_prefixlen, prim);
 
                /* Add network specific broadcasts, when it takes a sense */
                if (ifa->ifa_prefixlen < 31) {
@@ -870,9 +915,10 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
                }
        } else if (!ipv4_is_zeronet(any) &&
                   (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
-               fib_magic(RTM_DELROUTE,
-                         dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
-                         any, ifa->ifa_prefixlen, prim);
+               if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
+                       fib_magic(RTM_DELROUTE,
+                                 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+                                 any, ifa->ifa_prefixlen, prim);
                subnet = 1;
        }
 
@@ -960,11 +1006,14 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
                        fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
        }
        if (!(ok & LOCAL_OK)) {
+               unsigned int addr_type;
+
                fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
 
                /* Check, that this local address finally disappeared. */
-               if (gone &&
-                   inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
+               addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
+                                                    ifa->ifa_local);
+               if (gone && addr_type != RTN_LOCAL) {
                        /* And the last, but not the least thing.
                         * We must flush stray FIB entries.
                         *
@@ -1063,9 +1112,10 @@ static void nl_fib_lookup_exit(struct net *net)
        net->ipv4.fibnl = NULL;
 }
 
-static void fib_disable_ip(struct net_device *dev, int force)
+static void fib_disable_ip(struct net_device *dev, unsigned long event,
+                          bool force)
 {
-       if (fib_sync_down_dev(dev, force))
+       if (fib_sync_down_dev(dev, event, force))
                fib_flush(dev_net(dev));
        rt_cache_flush(dev_net(dev));
        arp_ifdown(dev);
@@ -1081,7 +1131,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
        case NETDEV_UP:
                fib_add_ifaddr(ifa);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-               fib_sync_up(dev);
+               fib_sync_up(dev, RTNH_F_DEAD);
 #endif
                atomic_inc(&net->ipv4.dev_addr_genid);
                rt_cache_flush(dev_net(dev));
@@ -1093,7 +1143,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
                        /* Last address was deleted from this interface.
                         * Disable IP.
                         */
-                       fib_disable_ip(dev, 1);
+                       fib_disable_ip(dev, event, true);
                } else {
                        rt_cache_flush(dev_net(dev));
                }
@@ -1105,11 +1155,13 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+       struct netdev_notifier_changeupper_info *info;
        struct in_device *in_dev;
        struct net *net = dev_net(dev);
+       unsigned int flags;
 
        if (event == NETDEV_UNREGISTER) {
-               fib_disable_ip(dev, 2);
+               fib_disable_ip(dev, event, true);
                rt_flush_dev(dev);
                return NOTIFY_DONE;
        }
@@ -1124,18 +1176,32 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
                        fib_add_ifaddr(ifa);
                } endfor_ifa(in_dev);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-               fib_sync_up(dev);
+               fib_sync_up(dev, RTNH_F_DEAD);
 #endif
                atomic_inc(&net->ipv4.dev_addr_genid);
                rt_cache_flush(net);
                break;
        case NETDEV_DOWN:
-               fib_disable_ip(dev, 0);
+               fib_disable_ip(dev, event, false);
                break;
-       case NETDEV_CHANGEMTU:
        case NETDEV_CHANGE:
+               flags = dev_get_flags(dev);
+               if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+                       fib_sync_up(dev, RTNH_F_LINKDOWN);
+               else
+                       fib_sync_down_dev(dev, event, false);
+               /* fall through */
+       case NETDEV_CHANGEMTU:
                rt_cache_flush(net);
                break;
+       case NETDEV_CHANGEUPPER:
+               info = ptr;
+               /* flush all routes if dev is linked to or unlinked from
+                * an L3 master device (e.g., VRF)
+                */
+               if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+                       fib_disable_ip(dev, NETDEV_DOWN, true);
+               break;
        }
        return NOTIFY_DONE;
 }