These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / drivers / net / ipvlan / ipvlan_core.c
index c30b5c3..8c48bb2 100644 (file)
@@ -85,11 +85,9 @@ void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
                hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
 }
 
-void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync)
+void ipvlan_ht_addr_del(struct ipvl_addr *addr)
 {
        hlist_del_init_rcu(&addr->hlnode);
-       if (sync)
-               synchronize_rcu();
 }
 
 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
@@ -189,67 +187,74 @@ unsigned int ipvlan_mac_hash(const unsigned char *addr)
        return hash & IPVLAN_MAC_FILTER_MASK;
 }
 
-static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb,
-                                  const struct ipvl_dev *in_dev, bool local)
+void ipvlan_process_multicast(struct work_struct *work)
 {
-       struct ethhdr *eth = eth_hdr(skb);
+       struct ipvl_port *port = container_of(work, struct ipvl_port, wq);
+       struct ethhdr *ethh;
        struct ipvl_dev *ipvlan;
-       struct sk_buff *nskb;
+       struct sk_buff *skb, *nskb;
+       struct sk_buff_head list;
        unsigned int len;
        unsigned int mac_hash;
        int ret;
+       u8 pkt_type;
+       bool hlocal, dlocal;
 
-       if (skb->protocol == htons(ETH_P_PAUSE))
-               return;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
-               if (local && (ipvlan == in_dev))
-                       continue;
+       __skb_queue_head_init(&list);
 
-               mac_hash = ipvlan_mac_hash(eth->h_dest);
-               if (!test_bit(mac_hash, ipvlan->mac_filters))
-                       continue;
+       spin_lock_bh(&port->backlog.lock);
+       skb_queue_splice_tail_init(&port->backlog, &list);
+       spin_unlock_bh(&port->backlog.lock);
 
-               ret = NET_RX_DROP;
-               len = skb->len + ETH_HLEN;
-               nskb = skb_clone(skb, GFP_ATOMIC);
-               if (!nskb)
-                       goto mcast_acct;
+       while ((skb = __skb_dequeue(&list)) != NULL) {
+               ethh = eth_hdr(skb);
+               hlocal = ether_addr_equal(ethh->h_source, port->dev->dev_addr);
+               mac_hash = ipvlan_mac_hash(ethh->h_dest);
 
-               if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast))
-                       nskb->pkt_type = PACKET_BROADCAST;
+               if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
+                       pkt_type = PACKET_BROADCAST;
                else
-                       nskb->pkt_type = PACKET_MULTICAST;
-
-               nskb->dev = ipvlan->dev;
-               if (local)
-                       ret = dev_forward_skb(ipvlan->dev, nskb);
-               else
-                       ret = netif_rx(nskb);
-mcast_acct:
-               ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
-       }
-       rcu_read_unlock();
-
-       /* Locally generated? ...Forward a copy to the main-device as
-        * well. On the RX side we'll ignore it (wont give it to any
-        * of the virtual devices.
-        */
-       if (local) {
-               nskb = skb_clone(skb, GFP_ATOMIC);
-               if (nskb) {
-                       if (ether_addr_equal(eth->h_dest, port->dev->broadcast))
-                               nskb->pkt_type = PACKET_BROADCAST;
+                       pkt_type = PACKET_MULTICAST;
+
+               dlocal = false;
+               rcu_read_lock();
+               list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
+                       if (hlocal && (ipvlan->dev == skb->dev)) {
+                               dlocal = true;
+                               continue;
+                       }
+                       if (!test_bit(mac_hash, ipvlan->mac_filters))
+                               continue;
+
+                       ret = NET_RX_DROP;
+                       len = skb->len + ETH_HLEN;
+                       nskb = skb_clone(skb, GFP_ATOMIC);
+                       if (!nskb)
+                               goto acct;
+
+                       nskb->pkt_type = pkt_type;
+                       nskb->dev = ipvlan->dev;
+                       if (hlocal)
+                               ret = dev_forward_skb(ipvlan->dev, nskb);
                        else
-                               nskb->pkt_type = PACKET_MULTICAST;
-
-                       dev_forward_skb(port->dev, nskb);
+                               ret = netif_rx(nskb);
+acct:
+                       ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
+               }
+               rcu_read_unlock();
+
+               if (dlocal) {
+                       /* If the packet originated here, send it out. */
+                       skb->dev = port->dev;
+                       skb->pkt_type = pkt_type;
+                       dev_queue_xmit(skb);
+               } else {
+                       kfree_skb(skb);
                }
        }
 }
 
-static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
+static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
                            bool local)
 {
        struct ipvl_dev *ipvlan = addr->master;
@@ -257,6 +262,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
        unsigned int len;
        rx_handler_result_t ret = RX_HANDLER_CONSUMED;
        bool success = false;
+       struct sk_buff *skb = *pskb;
 
        len = skb->len + ETH_HLEN;
        if (unlikely(!(dev->flags & IFF_UP))) {
@@ -268,6 +274,7 @@ static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
        if (!skb)
                goto out;
 
+       *pskb = skb;
        skb->dev = dev;
        skb->pkt_type = PACKET_HOST;
 
@@ -339,17 +346,18 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
 {
        const struct iphdr *ip4h = ip_hdr(skb);
        struct net_device *dev = skb->dev;
+       struct net *net = dev_net(dev);
        struct rtable *rt;
        int err, ret = NET_XMIT_DROP;
        struct flowi4 fl4 = {
-               .flowi4_oif = dev_get_iflink(dev),
+               .flowi4_oif = dev->ifindex,
                .flowi4_tos = RT_TOS(ip4h->tos),
                .flowi4_flags = FLOWI_FLAG_ANYSRC,
                .daddr = ip4h->daddr,
                .saddr = ip4h->saddr,
        };
 
-       rt = ip_route_output_flow(dev_net(dev), &fl4, NULL);
+       rt = ip_route_output_flow(net, &fl4, NULL);
        if (IS_ERR(rt))
                goto err;
 
@@ -359,7 +367,7 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
        }
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
-       err = ip_local_out(skb);
+       err = ip_local_out(net, skb->sk, skb);
        if (unlikely(net_xmit_eval(err)))
                dev->stats.tx_errors++;
        else
@@ -376,10 +384,11 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
 {
        const struct ipv6hdr *ip6h = ipv6_hdr(skb);
        struct net_device *dev = skb->dev;
+       struct net *net = dev_net(dev);
        struct dst_entry *dst;
        int err, ret = NET_XMIT_DROP;
        struct flowi6 fl6 = {
-               .flowi6_iif = skb->dev->ifindex,
+               .flowi6_iif = dev->ifindex,
                .daddr = ip6h->daddr,
                .saddr = ip6h->saddr,
                .flowi6_flags = FLOWI_FLAG_ANYSRC,
@@ -388,7 +397,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
                .flowi6_proto = ip6h->nexthdr,
        };
 
-       dst = ip6_route_output(dev_net(dev), NULL, &fl6);
+       dst = ip6_route_output(net, NULL, &fl6);
        if (dst->error) {
                ret = dst->error;
                dst_release(dst);
@@ -396,7 +405,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
        }
        skb_dst_drop(skb);
        skb_dst_set(skb, dst);
-       err = ip6_local_out(skb);
+       err = ip6_local_out(net, skb->sk, skb);
        if (unlikely(net_xmit_eval(err)))
                dev->stats.tx_errors++;
        else
@@ -446,6 +455,26 @@ out:
        return ret;
 }
 
+static void ipvlan_multicast_enqueue(struct ipvl_port *port,
+                                    struct sk_buff *skb)
+{
+       if (skb->protocol == htons(ETH_P_PAUSE)) {
+               kfree_skb(skb);
+               return;
+       }
+
+       spin_lock(&port->backlog.lock);
+       if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
+               __skb_queue_tail(&port->backlog, skb);
+               spin_unlock(&port->backlog.lock);
+               schedule_work(&port->wq);
+       } else {
+               spin_unlock(&port->backlog.lock);
+               atomic_long_inc(&skb->dev->rx_dropped);
+               kfree_skb(skb);
+       }
+}
+
 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 {
        const struct ipvl_dev *ipvlan = netdev_priv(dev);
@@ -459,7 +488,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
 
        addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
        if (addr)
-               return ipvlan_rcv_frame(addr, skb, true);
+               return ipvlan_rcv_frame(addr, &skb, true);
 
 out:
        skb->dev = ipvlan->phy_dev;
@@ -479,7 +508,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
                if (lyr3h) {
                        addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
                        if (addr)
-                               return ipvlan_rcv_frame(addr, skb, true);
+                               return ipvlan_rcv_frame(addr, &skb, true);
                }
                skb = skb_share_check(skb, GFP_ATOMIC);
                if (!skb)
@@ -493,11 +522,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
                return dev_forward_skb(ipvlan->phy_dev, skb);
 
        } else if (is_multicast_ether_addr(eth->h_dest)) {
-               u8 ip_summed = skb->ip_summed;
-
-               skb->ip_summed = CHECKSUM_UNNECESSARY;
-               ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true);
-               skb->ip_summed = ip_summed;
+               ipvlan_multicast_enqueue(ipvlan->port, skb);
+               return NET_XMIT_SUCCESS;
        }
 
        skb->dev = ipvlan->phy_dev;
@@ -507,7 +533,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct ipvl_dev *ipvlan = netdev_priv(dev);
-       struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev);
+       struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev);
 
        if (!port)
                goto out;
@@ -565,7 +591,7 @@ static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
 
        addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
        if (addr)
-               ret = ipvlan_rcv_frame(addr, skb, false);
+               ret = ipvlan_rcv_frame(addr, pskb, false);
 
 out:
        return ret;
@@ -581,8 +607,18 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
        int addr_type;
 
        if (is_multicast_ether_addr(eth->h_dest)) {
-               if (ipvlan_external_frame(skb, port))
-                       ipvlan_multicast_frame(port, skb, NULL, false);
+               if (ipvlan_external_frame(skb, port)) {
+                       struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+                       /* External frames are queued for device local
+                        * distribution, but a copy is given to master
+                        * straight away to avoid sending duplicates later
+                        * when work-queue processes this frame. This is
+                        * achieved by returning RX_HANDLER_PASS.
+                        */
+                       if (nskb)
+                               ipvlan_multicast_enqueue(port, nskb);
+               }
        } else {
                struct ipvl_addr *addr;
 
@@ -592,7 +628,7 @@ static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
 
                addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
                if (addr)
-                       ret = ipvlan_rcv_frame(addr, skb, false);
+                       ret = ipvlan_rcv_frame(addr, pskb, false);
        }
 
        return ret;
@@ -617,5 +653,5 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
        WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
                          port->mode);
        kfree_skb(skb);
-       return NET_RX_DROP;
+       return RX_HANDLER_CONSUMED;
 }