These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / xfrm / xfrm_policy.c
index 638af06..b5e665b 100644 (file)
@@ -115,7 +115,8 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
        rcu_read_unlock();
 }
 
-static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
+static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
+                                                 int tos, int oif,
                                                  const xfrm_address_t *saddr,
                                                  const xfrm_address_t *daddr,
                                                  int family)
@@ -127,14 +128,15 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
        if (unlikely(afinfo == NULL))
                return ERR_PTR(-EAFNOSUPPORT);
 
-       dst = afinfo->dst_lookup(net, tos, saddr, daddr);
+       dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
 
        xfrm_policy_put_afinfo(afinfo);
 
        return dst;
 }
 
-static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
+static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
+                                               int tos, int oif,
                                                xfrm_address_t *prev_saddr,
                                                xfrm_address_t *prev_daddr,
                                                int family)
@@ -153,7 +155,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
                daddr = x->coaddr;
        }
 
-       dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
+       dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
 
        if (!IS_ERR(dst)) {
                if (prev_saddr != saddr)
@@ -301,6 +303,14 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 }
 EXPORT_SYMBOL(xfrm_policy_alloc);
 
+static void xfrm_policy_destroy_rcu(struct rcu_head *head)
+{
+       struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
+
+       security_xfrm_policy_free(policy->security);
+       kfree(policy);
+}
+
 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
 
 void xfrm_policy_destroy(struct xfrm_policy *policy)
@@ -310,19 +320,10 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
        if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
                BUG();
 
-       security_xfrm_policy_free(policy->security);
-       kfree(policy);
+       call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
 }
 EXPORT_SYMBOL(xfrm_policy_destroy);
 
-static void xfrm_queue_purge(struct sk_buff_head *list)
-{
-       struct sk_buff *skb;
-
-       while ((skb = skb_dequeue(list)) != NULL)
-               kfree_skb(skb);
-}
-
 /* Rule must be locked. Release descentant resources, announce
  * entry dead. The rule must be unlinked from lists to the moment.
  */
@@ -335,7 +336,7 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 
        if (del_timer(&policy->polq.hold_timer))
                xfrm_pol_put(policy);
-       xfrm_queue_purge(&policy->polq.hold_queue);
+       skb_queue_purge(&policy->polq.hold_queue);
 
        if (del_timer(&policy->timer))
                xfrm_pol_put(policy);
@@ -708,6 +709,9 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
        struct xfrm_policy_queue *pq = &old->polq;
        struct sk_buff_head list;
 
+       if (skb_queue_empty(&pq->hold_queue))
+               return;
+
        __skb_queue_head_init(&list);
 
        spin_lock_bh(&pq->hold_queue.lock);
@@ -716,9 +720,6 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
                xfrm_pol_put(old);
        spin_unlock_bh(&pq->hold_queue.lock);
 
-       if (skb_queue_empty(&list))
-               return;
-
        pq = &new->polq;
 
        spin_lock_bh(&pq->hold_queue.lock);
@@ -1012,7 +1013,9 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
        if (list_empty(&walk->walk.all))
                x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
        else
-               x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
+               x = list_first_entry(&walk->walk.all,
+                                    struct xfrm_policy_walk_entry, all);
+
        list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
                if (x->dead)
                        continue;
@@ -1120,6 +1123,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
        }
        chain = &net->xfrm.policy_inexact[dir];
        hlist_for_each_entry(pol, chain, bydst) {
+               if ((pol->priority >= priority) && ret)
+                       break;
+
                err = xfrm_policy_match(pol, fl, type, family, dir);
                if (err) {
                        if (err == -ESRCH)
@@ -1128,13 +1134,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
                                ret = ERR_PTR(err);
                                goto fail;
                        }
-               } else if (pol->priority < priority) {
+               } else {
                        ret = pol;
                        break;
                }
        }
-       if (ret)
-               xfrm_pol_hold(ret);
+
+       xfrm_pol_hold(ret);
 fail:
        read_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
@@ -1209,14 +1215,16 @@ static inline int policy_to_flow_dir(int dir)
        }
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
+static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
                                                 const struct flowi *fl)
 {
        struct xfrm_policy *pol;
        struct net *net = sock_net(sk);
 
+       rcu_read_lock();
        read_lock_bh(&net->xfrm.xfrm_policy_lock);
-       if ((pol = sk->sk_policy[dir]) != NULL) {
+       pol = rcu_dereference(sk->sk_policy[dir]);
+       if (pol != NULL) {
                bool match = xfrm_selector_match(&pol->selector, fl,
                                                 sk->sk_family);
                int err = 0;
@@ -1240,6 +1248,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
        }
 out:
        read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+       rcu_read_unlock();
        return pol;
 }
 
@@ -1308,13 +1317,14 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 #endif
 
        write_lock_bh(&net->xfrm.xfrm_policy_lock);
-       old_pol = sk->sk_policy[dir];
-       sk->sk_policy[dir] = pol;
+       old_pol = rcu_dereference_protected(sk->sk_policy[dir],
+                               lockdep_is_held(&net->xfrm.xfrm_policy_lock));
        if (pol) {
                pol->curlft.add_time = get_seconds();
                pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
                xfrm_sk_policy_link(pol, dir);
        }
+       rcu_assign_pointer(sk->sk_policy[dir], pol);
        if (old_pol) {
                if (pol)
                        xfrm_policy_requeue(old_pol, pol);
@@ -1362,29 +1372,38 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
        return newp;
 }
 
-int __xfrm_sk_clone_policy(struct sock *sk)
+int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
 {
-       struct xfrm_policy *p0 = sk->sk_policy[0],
-                          *p1 = sk->sk_policy[1];
+       const struct xfrm_policy *p;
+       struct xfrm_policy *np;
+       int i, ret = 0;
 
-       sk->sk_policy[0] = sk->sk_policy[1] = NULL;
-       if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
-               return -ENOMEM;
-       if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
-               return -ENOMEM;
-       return 0;
+       rcu_read_lock();
+       for (i = 0; i < 2; i++) {
+               p = rcu_dereference(osk->sk_policy[i]);
+               if (p) {
+                       np = clone_policy(p, i);
+                       if (unlikely(!np)) {
+                               ret = -ENOMEM;
+                               break;
+                       }
+                       rcu_assign_pointer(sk->sk_policy[i], np);
+               }
+       }
+       rcu_read_unlock();
+       return ret;
 }
 
 static int
-xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
-              unsigned short family)
+xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
+              xfrm_address_t *remote, unsigned short family)
 {
        int err;
        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
        if (unlikely(afinfo == NULL))
                return -EINVAL;
-       err = afinfo->get_saddr(net, local, remote);
+       err = afinfo->get_saddr(net, oif, local, remote);
        xfrm_policy_put_afinfo(afinfo);
        return err;
 }
@@ -1413,7 +1432,9 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
                        remote = &tmpl->id.daddr;
                        local = &tmpl->saddr;
                        if (xfrm_addr_any(local, tmpl->encap_family)) {
-                               error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
+                               error = xfrm_get_saddr(net, fl->flowi_oif,
+                                                      &tmp, remote,
+                                                      tmpl->encap_family);
                                if (error)
                                        goto fail;
                                local = &tmp;
@@ -1582,8 +1603,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 
                memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
                xdst->flo.ops = &xfrm_bundle_fc_ops;
-               if (afinfo->init_dst)
-                       afinfo->init_dst(net, xdst);
        } else
                xdst = ERR_PTR(-ENOBUFS);
 
@@ -1693,8 +1712,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 
                if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
                        family = xfrm[i]->props.family;
-                       dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
-                                             family);
+                       dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
+                                             &saddr, &daddr, family);
                        err = PTR_ERR(dst);
                        if (IS_ERR(dst))
                                goto put_states;
@@ -1888,6 +1907,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
        struct sock *sk;
        struct dst_entry *dst;
        struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+       struct net *net = xp_net(pol);
        struct xfrm_policy_queue *pq = &pol->polq;
        struct flowi fl;
        struct sk_buff_head list;
@@ -1904,8 +1924,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
        spin_unlock(&pq->hold_queue.lock);
 
        dst_hold(dst->path);
-       dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
-                         sk, 0);
+       dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
        if (IS_ERR(dst))
                goto purge_queue;
 
@@ -1935,8 +1954,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
 
                xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
                dst_hold(skb_dst(skb)->path);
-               dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
-                                 &fl, skb->sk, 0);
+               dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
                if (IS_ERR(dst)) {
                        kfree_skb(skb);
                        continue;
@@ -1946,7 +1964,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
                skb_dst_drop(skb);
                skb_dst_set(skb, dst);
 
-               dst_output(skb);
+               dst_output(net, skb->sk, skb);
        }
 
 out:
@@ -1955,11 +1973,11 @@ out:
 
 purge_queue:
        pq->timeout = 0;
-       xfrm_queue_purge(&pq->hold_queue);
+       skb_queue_purge(&pq->hold_queue);
        xfrm_pol_put(pol);
 }
 
-static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
+static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        unsigned long sched_next;
        struct dst_entry *dst = skb_dst(skb);
@@ -2186,7 +2204,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
  */
 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                              const struct flowi *fl,
-                             struct sock *sk, int flags)
+                             const struct sock *sk, int flags)
 {
        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
        struct flow_cache_object *flo;
@@ -2200,6 +2218,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
        xdst = NULL;
        route = NULL;
 
+       sk = sk_const_to_full_sk(sk);
        if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
                num_pols = 1;
                pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
@@ -2334,7 +2353,7 @@ EXPORT_SYMBOL(xfrm_lookup);
  */
 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
                                    const struct flowi *fl,
-                                   struct sock *sk, int flags)
+                                   const struct sock *sk, int flags)
 {
        struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
                                            flags | XFRM_LOOKUP_QUEUE |
@@ -2479,6 +2498,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        }
 
        pol = NULL;
+       sk = sk_to_full_sk(sk);
        if (sk && sk->sk_policy[dir]) {
                pol = xfrm_sk_policy_lookup(sk, dir, &fl);
                if (IS_ERR(pol)) {
@@ -2806,7 +2826,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
 
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
-       struct net *net;
        int err = 0;
        if (unlikely(afinfo == NULL))
                return -EINVAL;
@@ -2814,7 +2833,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
                return -EAFNOSUPPORT;
        spin_lock(&xfrm_policy_afinfo_lock);
        if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
-               err = -ENOBUFS;
+               err = -EEXIST;
        else {
                struct dst_ops *dst_ops = afinfo->dst_ops;
                if (likely(dst_ops->kmem_cachep == NULL))
@@ -2837,26 +2856,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
        }
        spin_unlock(&xfrm_policy_afinfo_lock);
 
-       rtnl_lock();
-       for_each_net(net) {
-               struct dst_ops *xfrm_dst_ops;
-
-               switch (afinfo->family) {
-               case AF_INET:
-                       xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
-                       break;
-#if IS_ENABLED(CONFIG_IPV6)
-               case AF_INET6:
-                       xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
-                       break;
-#endif
-               default:
-                       BUG();
-               }
-               *xfrm_dst_ops = *afinfo->dst_ops;
-       }
-       rtnl_unlock();
-
        return err;
 }
 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2892,22 +2891,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
 
-static void __net_init xfrm_dst_ops_init(struct net *net)
-{
-       struct xfrm_policy_afinfo *afinfo;
-
-       rcu_read_lock();
-       afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
-       if (afinfo)
-               net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
-#if IS_ENABLED(CONFIG_IPV6)
-       afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
-       if (afinfo)
-               net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
-#endif
-       rcu_read_unlock();
-}
-
 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -3056,7 +3039,6 @@ static int __net_init xfrm_net_init(struct net *net)
        rv = xfrm_policy_init(net);
        if (rv < 0)
                goto out_policy;
-       xfrm_dst_ops_init(net);
        rv = xfrm_sysctl_init(net);
        if (rv < 0)
                goto out_sysctl;
@@ -3209,16 +3191,17 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
        }
        chain = &net->xfrm.policy_inexact[dir];
        hlist_for_each_entry(pol, chain, bydst) {
+               if ((pol->priority >= priority) && ret)
+                       break;
+
                if (xfrm_migrate_selector_match(sel, &pol->selector) &&
-                   pol->type == type &&
-                   pol->priority < priority) {
+                   pol->type == type) {
                        ret = pol;
                        break;
                }
        }
 
-       if (ret)
-               xfrm_pol_hold(ret);
+       xfrm_pol_hold(ret);
 
        read_unlock_bh(&net->xfrm.xfrm_policy_lock);