These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / net / ipv4 / xfrm4_policy.c
index bff6974..7b0edb3 100644 (file)
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
+#include <net/l3mdev.h>
 
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
 
 static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
-                                           int tos,
+                                           int tos, int oif,
                                            const xfrm_address_t *saddr,
                                            const xfrm_address_t *daddr)
 {
@@ -28,9 +29,12 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
        memset(fl4, 0, sizeof(*fl4));
        fl4->daddr = daddr->a4;
        fl4->flowi4_tos = tos;
+       fl4->flowi4_oif = oif;
        if (saddr)
                fl4->saddr = saddr->a4;
 
+       fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF;
+
        rt = __ip_route_output_key(net, fl4);
        if (!IS_ERR(rt))
                return &rt->dst;
@@ -38,22 +42,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
        return ERR_CAST(rt);
 }
 
-static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
+static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
                                          const xfrm_address_t *saddr,
                                          const xfrm_address_t *daddr)
 {
        struct flowi4 fl4;
 
-       return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr);
+       return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
 }
 
-static int xfrm4_get_saddr(struct net *net,
+static int xfrm4_get_saddr(struct net *net, int oif,
                           xfrm_address_t *saddr, xfrm_address_t *daddr)
 {
        struct dst_entry *dst;
        struct flowi4 fl4;
 
-       dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr);
+       dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
        if (IS_ERR(dst))
                return -EHOSTUNREACH;
 
@@ -93,6 +97,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
        xdst->u.rt.rt_gateway = rt->rt_gateway;
        xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
        xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+       xdst->u.rt.rt_table_id = rt->rt_table_id;
        INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 
        return 0;
@@ -107,7 +112,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
        int oif = 0;
 
        if (skb_dst(skb))
-               oif = skb_dst(skb)->dev->ifindex;
+               oif = l3mdev_fib_oif(skb_dst(skb)->dev);
 
        memset(fl4, 0, sizeof(struct flowi4));
        fl4->flowi4_mark = skb->mark;
@@ -122,7 +127,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
                case IPPROTO_DCCP:
                        if (xprth + 4 < skb->data ||
                            pskb_may_pull(skb, xprth + 4 - skb->data)) {
-                               __be16 *ports = (__be16 *)xprth;
+                               __be16 *ports;
+
+                               xprth = skb_network_header(skb) + iph->ihl * 4;
+                               ports = (__be16 *)xprth;
 
                                fl4->fl4_sport = ports[!!reverse];
                                fl4->fl4_dport = ports[!reverse];
@@ -130,8 +138,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
                        break;
 
                case IPPROTO_ICMP:
-                       if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
-                               u8 *icmp = xprth;
+                       if (xprth + 2 < skb->data ||
+                           pskb_may_pull(skb, xprth + 2 - skb->data)) {
+                               u8 *icmp;
+
+                               xprth = skb_network_header(skb) + iph->ihl * 4;
+                               icmp = xprth;
 
                                fl4->fl4_icmp_type = icmp[0];
                                fl4->fl4_icmp_code = icmp[1];
@@ -139,33 +151,50 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
                        break;
 
                case IPPROTO_ESP:
-                       if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
-                               __be32 *ehdr = (__be32 *)xprth;
+                       if (xprth + 4 < skb->data ||
+                           pskb_may_pull(skb, xprth + 4 - skb->data)) {
+                               __be32 *ehdr;
+
+                               xprth = skb_network_header(skb) + iph->ihl * 4;
+                               ehdr = (__be32 *)xprth;
 
                                fl4->fl4_ipsec_spi = ehdr[0];
                        }
                        break;
 
                case IPPROTO_AH:
-                       if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
-                               __be32 *ah_hdr = (__be32 *)xprth;
+                       if (xprth + 8 < skb->data ||
+                           pskb_may_pull(skb, xprth + 8 - skb->data)) {
+                               __be32 *ah_hdr;
+
+                               xprth = skb_network_header(skb) + iph->ihl * 4;
+                               ah_hdr = (__be32 *)xprth;
 
                                fl4->fl4_ipsec_spi = ah_hdr[1];
                        }
                        break;
 
                case IPPROTO_COMP:
-                       if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
-                               __be16 *ipcomp_hdr = (__be16 *)xprth;
+                       if (xprth + 4 < skb->data ||
+                           pskb_may_pull(skb, xprth + 4 - skb->data)) {
+                               __be16 *ipcomp_hdr;
+
+                               xprth = skb_network_header(skb) + iph->ihl * 4;
+                               ipcomp_hdr = (__be16 *)xprth;
 
                                fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
                        }
                        break;
 
                case IPPROTO_GRE:
-                       if (pskb_may_pull(skb, xprth + 12 - skb->data)) {
-                               __be16 *greflags = (__be16 *)xprth;
-                               __be32 *gre_hdr = (__be32 *)xprth;
+                       if (xprth + 12 < skb->data ||
+                           pskb_may_pull(skb, xprth + 12 - skb->data)) {
+                               __be16 *greflags;
+                               __be32 *gre_hdr;
+
+                               xprth = skb_network_header(skb) + iph->ihl * 4;
+                               greflags = (__be16 *)xprth;
+                               gre_hdr = (__be32 *)xprth;
 
                                if (greflags[0] & GRE_KEY) {
                                        if (greflags[0] & GRE_CSUM)
@@ -230,7 +259,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
        xfrm_dst_ifdown(dst, dev);
 }
 
-static struct dst_ops xfrm4_dst_ops = {
+static struct dst_ops xfrm4_dst_ops_template = {
        .family =               AF_INET,
        .gc =                   xfrm4_garbage_collect,
        .update_pmtu =          xfrm4_update_pmtu,
@@ -239,12 +268,12 @@ static struct dst_ops xfrm4_dst_ops = {
        .destroy =              xfrm4_dst_destroy,
        .ifdown =               xfrm4_dst_ifdown,
        .local_out =            __ip_local_out,
-       .gc_thresh =            32768,
+       .gc_thresh =            INT_MAX,
 };
 
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
        .family =               AF_INET,
-       .dst_ops =              &xfrm4_dst_ops,
+       .dst_ops =              &xfrm4_dst_ops_template,
        .dst_lookup =           xfrm4_dst_lookup,
        .get_saddr =            xfrm4_get_saddr,
        .decode_session =       _decode_session4,
@@ -266,7 +295,7 @@ static struct ctl_table xfrm4_policy_table[] = {
        { }
 };
 
-static int __net_init xfrm4_net_init(struct net *net)
+static int __net_init xfrm4_net_sysctl_init(struct net *net)
 {
        struct ctl_table *table;
        struct ctl_table_header *hdr;
@@ -294,7 +323,7 @@ err_alloc:
        return -ENOMEM;
 }
 
-static void __net_exit xfrm4_net_exit(struct net *net)
+static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
 {
        struct ctl_table *table;
 
@@ -306,12 +335,44 @@ static void __net_exit xfrm4_net_exit(struct net *net)
        if (!net_eq(net, &init_net))
                kfree(table);
 }
+#else /* CONFIG_SYSCTL */
+static int inline xfrm4_net_sysctl_init(struct net *net)
+{
+       return 0;
+}
+
+static void inline xfrm4_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm4_net_init(struct net *net)
+{
+       int ret;
+
+       memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
+              sizeof(xfrm4_dst_ops_template));
+       ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
+       if (ret)
+               return ret;
+
+       ret = xfrm4_net_sysctl_init(net);
+       if (ret)
+               dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+
+       return ret;
+}
+
+static void __net_exit xfrm4_net_exit(struct net *net)
+{
+       xfrm4_net_sysctl_exit(net);
+       dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+}
 
 static struct pernet_operations __net_initdata xfrm4_net_ops = {
        .init   = xfrm4_net_init,
        .exit   = xfrm4_net_exit,
 };
-#endif
 
 static void __init xfrm4_policy_init(void)
 {
@@ -320,13 +381,9 @@ static void __init xfrm4_policy_init(void)
 
 void __init xfrm4_init(void)
 {
-       dst_entries_init(&xfrm4_dst_ops);
-
        xfrm4_state_init();
        xfrm4_policy_init();
        xfrm4_protocol_init();
-#ifdef CONFIG_SYSCTL
        register_pernet_subsys(&xfrm4_net_ops);
-#endif
 }