These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / ipv4 / fib_trie.c
index 0ca933d..744e593 100644 (file)
@@ -72,6 +72,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/vmalloc.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -80,6 +81,7 @@
 #include <net/sock.h>
 #include <net/ip_fib.h>
 #include <net/switchdev.h>
+#include <trace/events/fib.h>
 #include "fib_lookup.h"
 
 #define MAX_STAT_DEPTH 32
@@ -324,13 +326,15 @@ static inline void empty_child_dec(struct key_vector *n)
 
 static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
 {
-       struct tnode *kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
-       struct key_vector *l = kv->kv;
+       struct key_vector *l;
+       struct tnode *kv;
 
+       kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
        if (!kv)
                return NULL;
 
        /* initialize key vector */
+       l = kv->kv;
        l->key = key;
        l->pos = 0;
        l->bits = 0;
@@ -345,24 +349,26 @@ static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
 
 static struct key_vector *tnode_new(t_key key, int pos, int bits)
 {
-       struct tnode *tnode = tnode_alloc(bits);
        unsigned int shift = pos + bits;
-       struct key_vector *tn = tnode->kv;
+       struct key_vector *tn;
+       struct tnode *tnode;
 
        /* verify bits and pos their msb bits clear and values are valid */
        BUG_ON(!bits || (shift > KEYLENGTH));
 
-       pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
-                sizeof(struct key_vector *) << bits);
-
+       tnode = tnode_alloc(bits);
        if (!tnode)
                return NULL;
 
+       pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
+                sizeof(struct key_vector *) << bits);
+
        if (bits == KEYLENGTH)
                tnode->full_children = 1;
        else
                tnode->empty_children = 1ul << bits;
 
+       tn = tnode->kv;
        tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
        tn->pos = pos;
        tn->bits = bits;
@@ -1077,6 +1083,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
        struct trie *t = (struct trie *)tb->tb_data;
        struct fib_alias *fa, *new_fa;
        struct key_vector *l, *tp;
+       unsigned int nlflags = 0;
        struct fib_info *fi;
        u8 plen = cfg->fc_dst_len;
        u8 slen = KEYLENGTH - plen;
@@ -1165,14 +1172,15 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
                        new_fa->fa_state = state & ~FA_S_ACCESSED;
                        new_fa->fa_slen = fa->fa_slen;
                        new_fa->tb_id = tb->tb_id;
+                       new_fa->fa_default = -1;
 
-                       err = netdev_switch_fib_ipv4_add(key, plen, fi,
-                                                        new_fa->fa_tos,
-                                                        cfg->fc_type,
-                                                        cfg->fc_nlflags,
-                                                        tb->tb_id);
+                       err = switchdev_fib_ipv4_add(key, plen, fi,
+                                                    new_fa->fa_tos,
+                                                    cfg->fc_type,
+                                                    cfg->fc_nlflags,
+                                                    tb->tb_id);
                        if (err) {
-                               netdev_switch_fib_ipv4_abort(fi);
+                               switchdev_fib_ipv4_abort(fi);
                                kmem_cache_free(fn_alias_kmem, new_fa);
                                goto out;
                        }
@@ -1196,7 +1204,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
                if (fa_match)
                        goto out;
 
-               if (!(cfg->fc_nlflags & NLM_F_APPEND))
+               if (cfg->fc_nlflags & NLM_F_APPEND)
+                       nlflags = NLM_F_APPEND;
+               else
                        fa = fa_first;
        }
        err = -ENOENT;
@@ -1214,14 +1224,13 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
        new_fa->fa_state = 0;
        new_fa->fa_slen = slen;
        new_fa->tb_id = tb->tb_id;
+       new_fa->fa_default = -1;
 
        /* (Optionally) offload fib entry to switch hardware. */
-       err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
-                                        cfg->fc_type,
-                                        cfg->fc_nlflags,
-                                        tb->tb_id);
+       err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type,
+                                    cfg->fc_nlflags, tb->tb_id);
        if (err) {
-               netdev_switch_fib_ipv4_abort(fi);
+               switchdev_fib_ipv4_abort(fi);
                goto out_free_new_fa;
        }
 
@@ -1235,12 +1244,12 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 
        rt_cache_flush(cfg->fc_nlinfo.nl_net);
        rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
-                 &cfg->fc_nlinfo, 0);
+                 &cfg->fc_nlinfo, nlflags);
 succeeded:
        return 0;
 
 out_sw_fib_del:
-       netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
+       switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
 out_free_new_fa:
        kmem_cache_free(fn_alias_kmem, new_fa);
 out:
@@ -1270,6 +1279,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
        unsigned long index;
        t_key cindex;
 
+       trace_fib_table_lookup(tb->tb_id, flp);
+
        pn = t->kv;
        cindex = 0;
 
@@ -1406,11 +1417,20 @@ found:
                        continue;
                for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
                        const struct fib_nh *nh = &fi->fib_nh[nhsel];
+                       struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
 
                        if (nh->nh_flags & RTNH_F_DEAD)
                                continue;
-                       if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
+                       if (in_dev &&
+                           IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+                           nh->nh_flags & RTNH_F_LINKDOWN &&
+                           !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
                                continue;
+                       if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
+                               if (flp->flowi4_oif &&
+                                   flp->flowi4_oif != nh->nh_oif)
+                                       continue;
+                       }
 
                        if (!(fib_flags & FIB_LOOKUP_NOREF))
                                atomic_inc(&fi->fib_clntref);
@@ -1425,6 +1445,8 @@ found:
 #ifdef CONFIG_IP_FIB_TRIE_STATS
                        this_cpu_inc(stats->semantic_match_passed);
 #endif
+                       trace_fib_table_lookup_nh(nh);
+
                        return err;
                }
        }
@@ -1518,8 +1540,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
        if (!fa_to_delete)
                return -ESRCH;
 
-       netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
-                                  cfg->fc_type, tb->tb_id);
+       switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
+                              cfg->fc_type, tb->tb_id);
 
        rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
                  &cfg->fc_nlinfo, 0);
@@ -1547,7 +1569,7 @@ static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
        do {
                /* record parent and next child index */
                pn = n;
-               cindex = key ? get_index(key, pn) : 0;
+               cindex = (key > pn->key) ? get_index(key, pn) : 0;
 
                if (cindex >> pn->bits)
                        break;
@@ -1768,10 +1790,9 @@ void fib_table_flush_external(struct fib_table *tb)
                        if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD))
                                continue;
 
-                       netdev_switch_fib_ipv4_del(n->key,
-                                                  KEYLENGTH - fa->fa_slen,
-                                                  fi, fa->fa_tos,
-                                                  fa->fa_type, tb->tb_id);
+                       switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
+                                              fi, fa->fa_tos, fa->fa_type,
+                                              tb->tb_id);
                }
 
                /* update leaf slen */
@@ -1834,10 +1855,9 @@ int fib_table_flush(struct fib_table *tb)
                                continue;
                        }
 
-                       netdev_switch_fib_ipv4_del(n->key,
-                                                  KEYLENGTH - fa->fa_slen,
-                                                  fi, fa->fa_tos,
-                                                  fa->fa_type, tb->tb_id);
+                       switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
+                                              fi, fa->fa_tos, fa->fa_type,
+                                              tb->tb_id);
                        hlist_del_rcu(&fa->fa_list);
                        fib_release_info(fa->fa_info);
                        alias_free_mem_rcu(fa);
@@ -1976,7 +1996,6 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
                return NULL;
 
        tb->tb_id = id;
-       tb->tb_default = -1;
        tb->tb_num_default = 0;
        tb->tb_data = (alias ? alias->__data : tb->__data);
 
@@ -2053,11 +2072,12 @@ static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
 static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
                                             struct trie *t)
 {
-       struct key_vector *n, *pn = t->kv;
+       struct key_vector *n, *pn;
 
        if (!t)
                return NULL;
 
+       pn = t->kv;
        n = rcu_dereference(pn->tnode[0]);
        if (!n)
                return NULL;